sys/vm/vm_phys.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright (c) 2002-2006 Rice University
   5  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
   6  * All rights reserved.
   7  *
   8  * This software was developed for the FreeBSD Project by Alan L. Cox,
   9  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
  24  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  27  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  30  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 /*
  35  *      Physical memory system implementation
  36  *
  37  * Any external functions defined by this module are only to be used by the
  38  * virtual memory system.
  39  */
  40
  41 #include <sys/cdefs.h>
  42 __FBSDID("$FreeBSD$");
  43
  44 #include "opt_ddb.h"
  45 #include "opt_vm.h"
  46
  47 #include <sys/param.h>
  48 #include <sys/systm.h>
  49 #include <sys/lock.h>
  50 #include <sys/kernel.h>
  51 #include <sys/malloc.h>
  52 #include <sys/mutex.h>
  53 #include <sys/proc.h>
  54 #include <sys/queue.h>
  55 #include <sys/rwlock.h>
  56 #include <sys/sbuf.h>
  57 #include <sys/sysctl.h>
  58 #include <sys/tree.h>
  59 #include <sys/vmmeter.h>
  60 #include <sys/seq.h>
  61
  62 #include <ddb/ddb.h>
  63
  64 #include <vm/vm.h>
  65 #include <vm/vm_param.h>
  66 #include <vm/vm_kern.h>
  67 #include <vm/vm_object.h>
  68 #include <vm/vm_page.h>
  69 #include <vm/vm_phys.h>
  70
  71 #include <vm/vm_domain.h>
  72
  73 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
  74     "Too many physsegs.");
  75
  76 #ifdef VM_NUMA_ALLOC
  77 struct mem_affinity *mem_affinity;
  78 int *mem_locality;
  79 #endif
  80
  81 int vm_ndomains = 1;
  82
  83 struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
  84 int vm_phys_nsegs;
  85
  86 struct vm_phys_fictitious_seg;
  87 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
  88     struct vm_phys_fictitious_seg *);
  89
  90 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree =
  91     RB_INITIALIZER(_vm_phys_fictitious_tree);
  92
  93 struct vm_phys_fictitious_seg {
  94         RB_ENTRY(vm_phys_fictitious_seg) node;
  95         /* Memory region data */
  96         vm_paddr_t      start;
  97         vm_paddr_t      end;
  98         vm_page_t       first_page;
  99 };
 100
 101 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node,
 102     vm_phys_fictitious_cmp);
 103
 104 static struct rwlock vm_phys_fictitious_reg_lock;
 105 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
 106
 107 static struct vm_freelist
 108     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
 109
 110 static int vm_nfreelists;
 111
 112 /*
 113  * Provides the mapping from VM_FREELIST_* to free list indices (flind).
 114  */
 115 static int vm_freelist_to_flind[VM_NFREELIST];
 116
 117 CTASSERT(VM_FREELIST_DEFAULT == 0);
 118
 119 #ifdef VM_FREELIST_ISADMA
 120 #define VM_ISADMA_BOUNDARY      16777216
 121 #endif
 122 #ifdef VM_FREELIST_DMA32
 123 #define VM_DMA32_BOUNDARY       ((vm_paddr_t)1 << 32)
 124 #endif
 125
 126 /*
 127  * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about
 128  * the ordering of the free list boundaries.
 129  */
 130 #if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY)
 131 CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY);
 132 #endif
 133 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY)
 134 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY);
 135 #endif
 136
 137 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
 138 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
 139     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
 140
 141 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
 142 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
 143     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
 144
 145 #ifdef VM_NUMA_ALLOC
 146 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS);
 147 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD,
 148     NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info");
 149 #endif
 150
 151 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
 152     &vm_ndomains, 0, "Number of physical memory domains available.");
 153
 154 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg,
 155     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
 156     vm_paddr_t boundary);
 157 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain);
 158 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end);
 159 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
 160     int order);
 161
 162 /*
 163  * Red-black tree helpers for vm fictitious range management.
 164  */
 165 static inline int
 166 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p,
 167     struct vm_phys_fictitious_seg *range)
 168 {
 169
 170         KASSERT(range->start != 0 && range->end != 0,
 171             ("Invalid range passed on search for vm_fictitious page"));
 172         if (p->start >= range->end)
 173                 return (1);
 174         if (p->start < range->start)
 175                 return (-1);
 176
 177         return (0);
 178 }
 179
 180 static int
 181 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1,
 182     struct vm_phys_fictitious_seg *p2)
 183 {
 184
 185         /* Check if this is a search for a page */
 186         if (p1->end == 0)
 187                 return (vm_phys_fictitious_in_range(p1, p2));
 188
 189         KASSERT(p2->end != 0,
 190     ("Invalid range passed as second parameter to vm fictitious comparison"));
 191
 192         /* Searching to add a new range */
 193         if (p1->end <= p2->start)
 194                 return (-1);
 195         if (p1->start >= p2->end)
 196                 return (1);
 197
 198         panic("Trying to add overlapping vm fictitious ranges:\n"
 199             "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start,
 200             (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
 201 }
 202
 203 boolean_t
 204 vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
 205 {
 206         struct vm_phys_seg *s;
 207         int idx;
 208
 209         while ((idx = ffsl(mask)) != 0) {
 210                 idx--;  /* ffsl counts from 1 */
 211                 mask &= ~(1UL << idx);
 212                 s = &vm_phys_segs[idx];
 213                 if (low < s->end && high > s->start)
 214                         return (TRUE);
 215         }
 216         return (FALSE);
 217 }
 218
 219 /*
 220  * Outputs the state of the physical memory allocator, specifically,
 221  * the amount of physical memory in each free list.
 222  */
 223 static int
 224 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
 225 {
 226         struct sbuf sbuf;
 227         struct vm_freelist *fl;
 228         int dom, error, flind, oind, pind;
 229
 230         error = sysctl_wire_old_buffer(req, 0);
 231         if (error != 0)
 232                 return (error);
 233         sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
 234         for (dom = 0; dom < vm_ndomains; dom++) {
 235                 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
 236                 for (flind = 0; flind < vm_nfreelists; flind++) {
 237                         sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
 238                             "\n  ORDER (SIZE)  |  NUMBER"
 239                             "\n              ", flind);
 240                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
 241                                 sbuf_printf(&sbuf, "  |  POOL %d", pind);
 242                         sbuf_printf(&sbuf, "\n--            ");
 243                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
 244                                 sbuf_printf(&sbuf, "-- --      ");
 245                         sbuf_printf(&sbuf, "--\n");
 246                         for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
 247                                 sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
 248                                     1 << (PAGE_SHIFT - 10 + oind));
 249                                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 250                                 fl = vm_phys_free_queues[dom][flind][pind];
 251                                         sbuf_printf(&sbuf, "  |  %6d",
 252                                             fl[oind].lcnt);
 253                                 }
 254                                 sbuf_printf(&sbuf, "\n");
 255                         }
 256                 }
 257         }
 258         error = sbuf_finish(&sbuf);
 259         sbuf_delete(&sbuf);
 260         return (error);
 261 }
 262
 263 /*
 264  * Outputs the set of physical memory segments.
 265  */
 266 static int
 267 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
 268 {
 269         struct sbuf sbuf;
 270         struct vm_phys_seg *seg;
 271         int error, segind;
 272
 273         error = sysctl_wire_old_buffer(req, 0);
 274         if (error != 0)
 275                 return (error);
 276         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 277         for (segind = 0; segind < vm_phys_nsegs; segind++) {
 278                 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
 279                 seg = &vm_phys_segs[segind];
 280                 sbuf_printf(&sbuf, "start:     %#jx\n",
 281                     (uintmax_t)seg->start);
 282                 sbuf_printf(&sbuf, "end:       %#jx\n",
 283                     (uintmax_t)seg->end);
 284                 sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
 285                 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
 286         }
 287         error = sbuf_finish(&sbuf);
 288         sbuf_delete(&sbuf);
 289         return (error);
 290 }
 291
 292 /*
 293  * Return affinity, or -1 if there's no affinity information.
 294  */
 295 int
 296 vm_phys_mem_affinity(int f, int t)
 297 {
 298
 299 #ifdef VM_NUMA_ALLOC
 300         if (mem_locality == NULL)
 301                 return (-1);
 302         if (f >= vm_ndomains || t >= vm_ndomains)
 303                 return (-1);
 304         return (mem_locality[f * vm_ndomains + t]);
 305 #else
 306         return (-1);
 307 #endif
 308 }
 309
 310 #ifdef VM_NUMA_ALLOC
 311 /*
 312  * Outputs the VM locality table.
 313  */
 314 static int
 315 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS)
 316 {
 317         struct sbuf sbuf;
 318         int error, i, j;
 319
 320         error = sysctl_wire_old_buffer(req, 0);
 321         if (error != 0)
 322                 return (error);
 323         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 324
 325         sbuf_printf(&sbuf, "\n");
 326
 327         for (i = 0; i < vm_ndomains; i++) {
 328                 sbuf_printf(&sbuf, "%d: ", i);
 329                 for (j = 0; j < vm_ndomains; j++) {
 330                         sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j));
 331                 }
 332                 sbuf_printf(&sbuf, "\n");
 333         }
 334         error = sbuf_finish(&sbuf);
 335         sbuf_delete(&sbuf);
 336         return (error);
 337 }
 338 #endif
 339
 340 static void
 341 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
 342 {
 343
 344         m->order = order;
 345         if (tail)
 346                 TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
 347         else
 348                 TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
 349         fl[order].lcnt++;
 350 }
 351
 352 static void
 353 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
 354 {
 355
 356         TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
 357         fl[order].lcnt--;
 358         m->order = VM_NFREEORDER;
 359 }
 360
 361 /*
 362  * Create a physical memory segment.
 363  */
 364 static void
 365 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain)
 366 {
 367         struct vm_phys_seg *seg;
 368
 369         KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
 370             ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
 371         KASSERT(domain >= 0 && domain < vm_ndomains,
 372             ("vm_phys_create_seg: invalid domain provided"));
 373         seg = &vm_phys_segs[vm_phys_nsegs++];
 374         while (seg > vm_phys_segs && (seg - 1)->start >= end) {
 375                 *seg = *(seg - 1);
 376                 seg--;
 377         }
 378         seg->start = start;
 379         seg->end = end;
 380         seg->domain = domain;
 381 }
 382
 383 static void
 384 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end)
 385 {
 386 #ifdef VM_NUMA_ALLOC
 387         int i;
 388
 389         if (mem_affinity == NULL) {
 390                 _vm_phys_create_seg(start, end, 0);
 391                 return;
 392         }
 393
 394         for (i = 0;; i++) {
 395                 if (mem_affinity[i].end == 0)
 396                         panic("Reached end of affinity info");
 397                 if (mem_affinity[i].end <= start)
 398                         continue;
 399                 if (mem_affinity[i].start > start)
 400                         panic("No affinity info for start %jx",
 401                             (uintmax_t)start);
 402                 if (mem_affinity[i].end >= end) {
 403                         _vm_phys_create_seg(start, end,
 404                             mem_affinity[i].domain);
 405                         break;
 406                 }
 407                 _vm_phys_create_seg(start, mem_affinity[i].end,
 408                     mem_affinity[i].domain);
 409                 start = mem_affinity[i].end;
 410         }
 411 #else
 412         _vm_phys_create_seg(start, end, 0);
 413 #endif
 414 }
 415
 416 /*
 417  * Add a physical memory segment.
 418  */
 419 void
 420 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end)
 421 {
 422         vm_paddr_t paddr;
 423
 424         KASSERT((start & PAGE_MASK) == 0,
 425             ("vm_phys_define_seg: start is not page aligned"));
 426         KASSERT((end & PAGE_MASK) == 0,
 427             ("vm_phys_define_seg: end is not page aligned"));
 428
 429         /*
 430          * Split the physical memory segment if it spans two or more free
 431          * list boundaries.
 432          */
 433         paddr = start;
 434 #ifdef  VM_FREELIST_ISADMA
 435         if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) {
 436                 vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY);
 437                 paddr = VM_ISADMA_BOUNDARY;
 438         }
 439 #endif
 440 #ifdef  VM_FREELIST_LOWMEM
 441         if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) {
 442                 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY);
 443                 paddr = VM_LOWMEM_BOUNDARY;
 444         }
 445 #endif
 446 #ifdef  VM_FREELIST_DMA32
 447         if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) {
 448                 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY);
 449                 paddr = VM_DMA32_BOUNDARY;
 450         }
 451 #endif
 452         vm_phys_create_seg(paddr, end);
 453 }
 454
 455 /*
 456  * Initialize the physical memory allocator.
 457  *
 458  * Requires that vm_page_array is initialized!
 459  */
 460 void
 461 vm_phys_init(void)
 462 {
 463         struct vm_freelist *fl;
 464         struct vm_phys_seg *seg;
 465         u_long npages;
 466         int dom, flind, freelist, oind, pind, segind;
 467
 468         /*
 469          * Compute the number of free lists, and generate the mapping from the
 470          * manifest constants VM_FREELIST_* to the free list indices.
 471          *
 472          * Initially, the entries of vm_freelist_to_flind[] are set to either
 473          * 0 or 1 to indicate which free lists should be created.
 474          */
 475         npages = 0;
 476         for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
 477                 seg = &vm_phys_segs[segind];
 478 #ifdef  VM_FREELIST_ISADMA
 479                 if (seg->end <= VM_ISADMA_BOUNDARY)
 480                         vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1;
 481                 else
 482 #endif
 483 #ifdef  VM_FREELIST_LOWMEM
 484                 if (seg->end <= VM_LOWMEM_BOUNDARY)
 485                         vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1;
 486                 else
 487 #endif
 488 #ifdef  VM_FREELIST_DMA32
 489                 if (
 490 #ifdef  VM_DMA32_NPAGES_THRESHOLD
 491                     /*
 492                      * Create the DMA32 free list only if the amount of
 493                      * physical memory above physical address 4G exceeds the
 494                      * given threshold.
 495                      */
 496                     npages > VM_DMA32_NPAGES_THRESHOLD &&
 497 #endif
 498                     seg->end <= VM_DMA32_BOUNDARY)
 499                         vm_freelist_to_flind[VM_FREELIST_DMA32] = 1;
 500                 else
 501 #endif
 502                 {
 503                         npages += atop(seg->end - seg->start);
 504                         vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1;
 505                 }
 506         }
 507         /* Change each entry into a running total of the free lists. */
 508         for (freelist = 1; freelist < VM_NFREELIST; freelist++) {
 509                 vm_freelist_to_flind[freelist] +=
 510                     vm_freelist_to_flind[freelist - 1];
 511         }
 512         vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1];
 513         KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists"));
 514         /* Change each entry into a free list index. */
 515         for (freelist = 0; freelist < VM_NFREELIST; freelist++)
 516                 vm_freelist_to_flind[freelist]--;
 517
 518         /*
 519          * Initialize the first_page and free_queues fields of each physical
 520          * memory segment.
 521          */
 522 #ifdef VM_PHYSSEG_SPARSE
 523         npages = 0;
 524 #endif
 525         for (segind = 0; segind < vm_phys_nsegs; segind++) {
 526                 seg = &vm_phys_segs[segind];
 527 #ifdef VM_PHYSSEG_SPARSE
 528                 seg->first_page = &vm_page_array[npages];
 529                 npages += atop(seg->end - seg->start);
 530 #else
 531                 seg->first_page = PHYS_TO_VM_PAGE(seg->start);
 532 #endif
 533 #ifdef  VM_FREELIST_ISADMA
 534                 if (seg->end <= VM_ISADMA_BOUNDARY) {
 535                         flind = vm_freelist_to_flind[VM_FREELIST_ISADMA];
 536                         KASSERT(flind >= 0,
 537                             ("vm_phys_init: ISADMA flind < 0"));
 538                 } else
 539 #endif
 540 #ifdef  VM_FREELIST_LOWMEM
 541                 if (seg->end <= VM_LOWMEM_BOUNDARY) {
 542                         flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM];
 543                         KASSERT(flind >= 0,
 544                             ("vm_phys_init: LOWMEM flind < 0"));
 545                 } else
 546 #endif
 547 #ifdef  VM_FREELIST_DMA32
 548                 if (seg->end <= VM_DMA32_BOUNDARY) {
 549                         flind = vm_freelist_to_flind[VM_FREELIST_DMA32];
 550                         KASSERT(flind >= 0,
 551                             ("vm_phys_init: DMA32 flind < 0"));
 552                 } else
 553 #endif
 554                 {
 555                         flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT];
 556                         KASSERT(flind >= 0,
 557                             ("vm_phys_init: DEFAULT flind < 0"));
 558                 }
 559                 seg->free_queues = &vm_phys_free_queues[seg->domain][flind];
 560         }
 561
 562         /*
 563          * Initialize the free queues.
 564          */
 565         for (dom = 0; dom < vm_ndomains; dom++) {
 566                 for (flind = 0; flind < vm_nfreelists; flind++) {
 567                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 568                                 fl = vm_phys_free_queues[dom][flind][pind];
 569                                 for (oind = 0; oind < VM_NFREEORDER; oind++)
 570                                         TAILQ_INIT(&fl[oind].pl);
 571                         }
 572                 }
 573         }
 574
 575         rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
 576 }
 577
 578 /*
 579  * Split a contiguous, power of two-sized set of physical pages.
 580  */
 581 static __inline void
 582 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
 583 {
 584         vm_page_t m_buddy;
 585
 586         while (oind > order) {
 587                 oind--;
 588                 m_buddy = &m[1 << oind];
 589                 KASSERT(m_buddy->order == VM_NFREEORDER,
 590                     ("vm_phys_split_pages: page %p has unexpected order %d",
 591                     m_buddy, m_buddy->order));
 592                 vm_freelist_add(fl, m_buddy, oind, 0);
 593         }
 594 }
 595
 596 /*
 597  * Allocate a contiguous, power of two-sized set of physical pages
 598  * from the free lists.
 599  *
 600  * The free page queues must be locked.
 601  */
 602 vm_page_t
 603 vm_phys_alloc_pages(int domain, int pool, int order)
 604 {
 605         vm_page_t m;
 606         int freelist;
 607
 608         for (freelist = 0; freelist < VM_NFREELIST; freelist++) {
 609                 m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order);
 610                 if (m != NULL)
 611                         return (m);
 612         }
 613         return (NULL);
 614 }
 615
 616 /*
 617  * Allocate a contiguous, power of two-sized set of physical pages from the
 618  * specified free list.  The free list must be specified using one of the
 619  * manifest constants VM_FREELIST_*.
 620  *
 621  * The free page queues must be locked.
 622  */
 623 vm_page_t
 624 vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order)
 625 {
 626         struct vm_freelist *alt, *fl;
 627         vm_page_t m;
 628         int oind, pind, flind;
 629
 630         KASSERT(domain >= 0 && domain < vm_ndomains,
 631             ("vm_phys_alloc_freelist_pages: domain %d is out of range",
 632             domain));
 633         KASSERT(freelist < VM_NFREELIST,
 634             ("vm_phys_alloc_freelist_pages: freelist %d is out of range",
 635             flind));
 636         KASSERT(pool < VM_NFREEPOOL,
 637             ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
 638         KASSERT(order < VM_NFREEORDER,
 639             ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
 640
 641         flind = vm_freelist_to_flind[freelist];
 642         /* Check if freelist is present */
 643         if (flind < 0)
 644                 return (NULL);
 645
 646         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 647         fl = &vm_phys_free_queues[domain][flind][pool][0];
 648         for (oind = order; oind < VM_NFREEORDER; oind++) {
 649                 m = TAILQ_FIRST(&fl[oind].pl);
 650                 if (m != NULL) {
 651                         vm_freelist_rem(fl, m, oind);
 652                         vm_phys_split_pages(m, oind, fl, order);
 653                         return (m);
 654                 }
 655         }
 656
 657         /*
 658          * The given pool was empty.  Find the largest
 659          * contiguous, power-of-two-sized set of pages in any
 660          * pool.  Transfer these pages to the given pool, and
 661          * use them to satisfy the allocation.
 662          */
 663         for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
 664                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
 665                         alt = &vm_phys_free_queues[domain][flind][pind][0];
 666                         m = TAILQ_FIRST(&alt[oind].pl);
 667                         if (m != NULL) {
 668                                 vm_freelist_rem(alt, m, oind);
 669                                 vm_phys_set_pool(pool, m, oind);
 670                                 vm_phys_split_pages(m, oind, fl, order);
 671                                 return (m);
 672                         }
 673                 }
 674         }
 675         return (NULL);
 676 }
 677
 678 /*
 679  * Find the vm_page corresponding to the given physical address.
 680  */
 681 vm_page_t
 682 vm_phys_paddr_to_vm_page(vm_paddr_t pa)
 683 {
 684         struct vm_phys_seg *seg;
 685         int segind;
 686
 687         for (segind = 0; segind < vm_phys_nsegs; segind++) {
 688                 seg = &vm_phys_segs[segind];
 689                 if (pa >= seg->start && pa < seg->end)
 690                         return (&seg->first_page[atop(pa - seg->start)]);
 691         }
 692         return (NULL);
 693 }
 694
 695 vm_page_t
 696 vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
 697 {
 698         struct vm_phys_fictitious_seg tmp, *seg;
 699         vm_page_t m;
 700
 701         m = NULL;
 702         tmp.start = pa;
 703         tmp.end = 0;
 704
 705         rw_rlock(&vm_phys_fictitious_reg_lock);
 706         seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
 707         rw_runlock(&vm_phys_fictitious_reg_lock);
 708         if (seg == NULL)
 709                 return (NULL);
 710
 711         m = &seg->first_page[atop(pa - seg->start)];
 712         KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m));
 713
 714         return (m);
 715 }
 716
 717 static inline void
 718 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start,
 719     long page_count, vm_memattr_t memattr)
 720 {
 721         long i;
 722
 723         bzero(range, page_count * sizeof(*range));
 724         for (i = 0; i < page_count; i++) {
 725                 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr);
 726                 range[i].oflags &= ~VPO_UNMANAGED;
 727                 range[i].busy_lock = VPB_UNBUSIED;
 728         }
 729 }
 730
 731 int
 732 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
 733     vm_memattr_t memattr)
 734 {
 735         struct vm_phys_fictitious_seg *seg;
 736         vm_page_t fp;
 737         long page_count;
 738 #ifdef VM_PHYSSEG_DENSE
 739         long pi, pe;
 740         long dpage_count;
 741 #endif
 742
 743         KASSERT(start < end,
 744             ("Start of segment isn't less than end (start: %jx end: %jx)",
 745             (uintmax_t)start, (uintmax_t)end));
 746
 747         page_count = (end - start) / PAGE_SIZE;
 748
 749 #ifdef VM_PHYSSEG_DENSE
 750         pi = atop(start);
 751         pe = atop(end);
 752         if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
 753                 fp = &vm_page_array[pi - first_page];
 754                 if ((pe - first_page) > vm_page_array_size) {
 755                         /*
 756                          * We have a segment that starts inside
 757                          * of vm_page_array, but ends outside of it.
 758                          *
 759                          * Use vm_page_array pages for those that are
 760                          * inside of the vm_page_array range, and
 761                          * allocate the remaining ones.
 762                          */
 763                         dpage_count = vm_page_array_size - (pi - first_page);
 764                         vm_phys_fictitious_init_range(fp, start, dpage_count,
 765                             memattr);
 766                         page_count -= dpage_count;
 767                         start += ptoa(dpage_count);
 768                         goto alloc;
 769                 }
 770                 /*
 771                  * We can allocate the full range from vm_page_array,
 772                  * so there's no need to register the range in the tree.
 773                  */
 774                 vm_phys_fictitious_init_range(fp, start, page_count, memattr);
 775                 return (0);
 776         } else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
 777                 /*
 778                  * We have a segment that ends inside of vm_page_array,
 779                  * but starts outside of it.
 780                  */
 781                 fp = &vm_page_array[0];
 782                 dpage_count = pe - first_page;
 783                 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count,
 784                     memattr);
 785                 end -= ptoa(dpage_count);
 786                 page_count -= dpage_count;
 787                 goto alloc;
 788         } else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
 789                 /*
 790                  * Trying to register a fictitious range that expands before
 791                  * and after vm_page_array.
 792                  */
 793                 return (EINVAL);
 794         } else {
 795 alloc:
 796 #endif
 797                 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
 798                     M_WAITOK);
 799 #ifdef VM_PHYSSEG_DENSE
 800         }
 801 #endif
 802         vm_phys_fictitious_init_range(fp, start, page_count, memattr);
 803
 804         seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO);
 805         seg->start = start;
 806         seg->end = end;
 807         seg->first_page = fp;
 808
 809         rw_wlock(&vm_phys_fictitious_reg_lock);
 810         RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg);
 811         rw_wunlock(&vm_phys_fictitious_reg_lock);
 812
 813         return (0);
 814 }
 815
 816 void
 817 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
 818 {
 819         struct vm_phys_fictitious_seg *seg, tmp;
 820 #ifdef VM_PHYSSEG_DENSE
 821         long pi, pe;
 822 #endif
 823
 824         KASSERT(start < end,
 825             ("Start of segment isn't less than end (start: %jx end: %jx)",
 826             (uintmax_t)start, (uintmax_t)end));
 827
 828 #ifdef VM_PHYSSEG_DENSE
 829         pi = atop(start);
 830         pe = atop(end);
 831         if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
 832                 if ((pe - first_page) <= vm_page_array_size) {
 833                         /*
 834                          * This segment was allocated using vm_page_array
 835                          * only, there's nothing to do since those pages
 836                          * were never added to the tree.
 837                          */
 838                         return;
 839                 }
 840                 /*
 841                  * We have a segment that starts inside
 842                  * of vm_page_array, but ends outside of it.
 843                  *
 844                  * Calculate how many pages were added to the
 845                  * tree and free them.
 846                  */
 847                 start = ptoa(first_page + vm_page_array_size);
 848         } else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
 849                 /*
 850                  * We have a segment that ends inside of vm_page_array,
 851                  * but starts outside of it.
 852                  */
 853                 end = ptoa(first_page);
 854         } else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
 855                 /* Since it's not possible to register such a range, panic. */
 856                 panic(
 857                     "Unregistering not registered fictitious range [%#jx:%#jx]",
 858                     (uintmax_t)start, (uintmax_t)end);
 859         }
 860 #endif
 861         tmp.start = start;
 862         tmp.end = 0;
 863
 864         rw_wlock(&vm_phys_fictitious_reg_lock);
 865         seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
 866         if (seg->start != start || seg->end != end) {
 867                 rw_wunlock(&vm_phys_fictitious_reg_lock);
 868                 panic(
 869                     "Unregistering not registered fictitious range [%#jx:%#jx]",
 870                     (uintmax_t)start, (uintmax_t)end);
 871         }
 872         RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg);
 873         rw_wunlock(&vm_phys_fictitious_reg_lock);
 874         free(seg->first_page, M_FICT_PAGES);
 875         free(seg, M_FICT_PAGES);
 876 }
 877
 878 /*
 879  * Free a contiguous, power of two-sized set of physical pages.
 880  *
 881  * The free page queues must be locked.
 882  */
 883 void
 884 vm_phys_free_pages(vm_page_t m, int order)
 885 {
 886         struct vm_freelist *fl;
 887         struct vm_phys_seg *seg;
 888         vm_paddr_t pa;
 889         vm_page_t m_buddy;
 890
 891         KASSERT(m->order == VM_NFREEORDER,
 892             ("vm_phys_free_pages: page %p has unexpected order %d",
 893             m, m->order));
 894         KASSERT(m->pool < VM_NFREEPOOL,
 895             ("vm_phys_free_pages: page %p has unexpected pool %d",
 896             m, m->pool));
 897         KASSERT(order < VM_NFREEORDER,
 898             ("vm_phys_free_pages: order %d is out of range", order));
 899         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 900         seg = &vm_phys_segs[m->segind];
 901         if (order < VM_NFREEORDER - 1) {
 902                 pa = VM_PAGE_TO_PHYS(m);
 903                 do {
 904                         pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
 905                         if (pa < seg->start || pa >= seg->end)
 906                                 break;
 907                         m_buddy = &seg->first_page[atop(pa - seg->start)];
 908                         if (m_buddy->order != order)
 909                                 break;
 910                         fl = (*seg->free_queues)[m_buddy->pool];
 911                         vm_freelist_rem(fl, m_buddy, order);
 912                         if (m_buddy->pool != m->pool)
 913                                 vm_phys_set_pool(m->pool, m_buddy, order);
 914                         order++;
 915                         pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
 916                         m = &seg->first_page[atop(pa - seg->start)];
 917                 } while (order < VM_NFREEORDER - 1);
 918         }
 919         fl = (*seg->free_queues)[m->pool];
 920         vm_freelist_add(fl, m, order, 1);
 921 }
 922
 923 /*
 924  * Free a contiguous, arbitrarily sized set of physical pages.
 925  *
 926  * The free page queues must be locked.
 927  */
 928 void
 929 vm_phys_free_contig(vm_page_t m, u_long npages)
 930 {
 931         u_int n;
 932         int order;
 933
 934         /*
 935          * Avoid unnecessary coalescing by freeing the pages in the largest
 936          * possible power-of-two-sized subsets.
 937          */
 938         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 939         for (;; npages -= n) {
 940                 /*
 941                  * Unsigned "min" is used here so that "order" is assigned
 942                  * "VM_NFREEORDER - 1" when "m"'s physical address is zero
 943                  * or the low-order bits of its physical address are zero
 944                  * because the size of a physical address exceeds the size of
 945                  * a long.
 946                  */
 947                 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
 948                     VM_NFREEORDER - 1);
 949                 n = 1 << order;
 950                 if (npages < n)
 951                         break;
 952                 vm_phys_free_pages(m, order);
 953                 m += n;
 954         }
 955         /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
 956         for (; npages > 0; npages -= n) {
 957                 order = flsl(npages) - 1;
 958                 n = 1 << order;
 959                 vm_phys_free_pages(m, order);
 960                 m += n;
 961         }
 962 }
 963
 964 /*
 965  * Scan physical memory between the specified addresses "low" and "high" for a
 966  * run of contiguous physical pages that satisfy the specified conditions, and
 967  * return the lowest page in the run.  The specified "alignment" determines
 968  * the alignment of the lowest physical page in the run.  If the specified
 969  * "boundary" is non-zero, then the run of physical pages cannot span a
 970  * physical address that is a multiple of "boundary".
 971  *
 972  * "npages" must be greater than zero.  Both "alignment" and "boundary" must
 973  * be a power of two.
 974  */
 975 vm_page_t
 976 vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
 977     u_long alignment, vm_paddr_t boundary, int options)
 978 {
 979         vm_paddr_t pa_end;
 980         vm_page_t m_end, m_run, m_start;
 981         struct vm_phys_seg *seg;
 982         int segind;
 983
 984         KASSERT(npages > 0, ("npages is 0"));
 985         KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 986         KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
 987         if (low >= high)
 988                 return (NULL);
 989         for (segind = 0; segind < vm_phys_nsegs; segind++) {
 990                 seg = &vm_phys_segs[segind];
 991                 if (seg->start >= high)
 992                         break;
 993                 if (low >= seg->end)
 994                         continue;
 995                 if (low <= seg->start)
 996                         m_start = seg->first_page;
 997                 else
 998                         m_start = &seg->first_page[atop(low - seg->start)];
 999                 if (high < seg->end)
1000                         pa_end = high;
1001                 else
1002                         pa_end = seg->end;
1003                 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages))
1004                         continue;
1005                 m_end = &seg->first_page[atop(pa_end - seg->start)];
1006                 m_run = vm_page_scan_contig(npages, m_start, m_end,
1007                     alignment, boundary, options);
1008                 if (m_run != NULL)
1009                         return (m_run);
1010         }
1011         return (NULL);
1012 }
1013
1014 /*
1015  * Set the pool for a contiguous, power of two-sized set of physical pages.
1016  */
1017 void
1018 vm_phys_set_pool(int pool, vm_page_t m, int order)
1019 {
1020         vm_page_t m_tmp;
1021
1022         for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
1023                 m_tmp->pool = pool;
1024 }
1025
1026 /*
1027  * Search for the given physical page "m" in the free lists.  If the search
1028  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
1029  * FALSE, indicating that "m" is not in the free lists.
1030  *
1031  * The free page queues must be locked.
1032  */
1033 boolean_t
1034 vm_phys_unfree_page(vm_page_t m)
1035 {
1036         struct vm_freelist *fl;
1037         struct vm_phys_seg *seg;
1038         vm_paddr_t pa, pa_half;
1039         vm_page_t m_set, m_tmp;
1040         int order;
1041
1042         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
1043
1044         /*
1045          * First, find the contiguous, power of two-sized set of free
1046          * physical pages containing the given physical page "m" and
1047          * assign it to "m_set".
1048          */
1049         seg = &vm_phys_segs[m->segind];
1050         for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
1051             order < VM_NFREEORDER - 1; ) {
1052                 order++;
1053                 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
1054                 if (pa >= seg->start)
1055                         m_set = &seg->first_page[atop(pa - seg->start)];
1056                 else
1057                         return (FALSE);
1058         }
1059         if (m_set->order < order)
1060                 return (FALSE);
1061         if (m_set->order == VM_NFREEORDER)
1062                 return (FALSE);
1063         KASSERT(m_set->order < VM_NFREEORDER,
1064             ("vm_phys_unfree_page: page %p has unexpected order %d",
1065             m_set, m_set->order));
1066
1067         /*
1068          * Next, remove "m_set" from the free lists.  Finally, extract
1069          * "m" from "m_set" using an iterative algorithm: While "m_set"
1070          * is larger than a page, shrink "m_set" by returning the half
1071          * of "m_set" that does not contain "m" to the free lists.
1072          */
1073         fl = (*seg->free_queues)[m_set->pool];
1074         order = m_set->order;
1075         vm_freelist_rem(fl, m_set, order);
1076         while (order > 0) {
1077                 order--;
1078                 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
1079                 if (m->phys_addr < pa_half)
1080                         m_tmp = &seg->first_page[atop(pa_half - seg->start)];
1081                 else {
1082                         m_tmp = m_set;
1083                         m_set = &seg->first_page[atop(pa_half - seg->start)];
1084                 }
1085                 vm_freelist_add(fl, m_tmp, order, 0);
1086         }
1087         KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
1088         return (TRUE);
1089 }
1090
1091 /*
1092  * Allocate a contiguous set of physical pages of the given size
1093  * "npages" from the free lists.  All of the physical pages must be at
1094  * or above the given physical address "low" and below the given
1095  * physical address "high".  The given value "alignment" determines the
1096  * alignment of the first physical page in the set.  If the given value
1097  * "boundary" is non-zero, then the set of physical pages cannot cross
1098  * any physical address boundary that is a multiple of that value.  Both
1099  * "alignment" and "boundary" must be a power of two.
1100  */
1101 vm_page_t
1102 vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
1103     u_long alignment, vm_paddr_t boundary)
1104 {
1105         vm_paddr_t pa_end, pa_start;
1106         vm_page_t m_run;
1107         struct vm_phys_seg *seg;
1108         int segind;
1109
1110         KASSERT(npages > 0, ("npages is 0"));
1111         KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
1112         KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
1113         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
1114         if (low >= high)
1115                 return (NULL);
1116         m_run = NULL;
1117         for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
1118                 seg = &vm_phys_segs[segind];
1119                 if (seg->start >= high || seg->domain != domain)
1120                         continue;
1121                 if (low >= seg->end)
1122                         break;
1123                 if (low <= seg->start)
1124                         pa_start = seg->start;
1125                 else
1126                         pa_start = low;
1127                 if (high < seg->end)
1128                         pa_end = high;
1129                 else
1130                         pa_end = seg->end;
1131                 if (pa_end - pa_start < ptoa(npages))
1132                         continue;
1133                 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high,
1134                     alignment, boundary);
1135                 if (m_run != NULL)
1136                         break;
1137         }
1138         return (m_run);
1139 }
1140
1141 /*
1142  * Allocate a run of contiguous physical pages from the free list for the
1143  * specified segment.
1144  */
1145 static vm_page_t
1146 vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages,
1147     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
1148 {
1149         struct vm_freelist *fl;
1150         vm_paddr_t pa, pa_end, size;
1151         vm_page_t m, m_ret;
1152         u_long npages_end;
1153         int oind, order, pind;
1154
1155         KASSERT(npages > 0, ("npages is 0"));
1156         KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
1157         KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
1158         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
1159         /* Compute the queue that is the best fit for npages. */
1160         for (order = 0; (1 << order) < npages; order++);
1161         /* Search for a run satisfying the specified conditions. */
1162         size = npages << PAGE_SHIFT;
1163         for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER;
1164             oind++) {
1165                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
1166                         fl = (*seg->free_queues)[pind];
1167                         TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
1168                                 /*
1169                                  * Is the size of this allocation request
1170                                  * larger than the largest block size?
1171                                  */
1172                                 if (order >= VM_NFREEORDER) {
1173                                         /*
1174                                          * Determine if a sufficient number of
1175                                          * subsequent blocks to satisfy the
1176                                          * allocation request are free.
1177                                          */
1178                                         pa = VM_PAGE_TO_PHYS(m_ret);
1179                                         pa_end = pa + size;
1180                                         for (;;) {
1181                                                 pa += 1 << (PAGE_SHIFT +
1182                                                     VM_NFREEORDER - 1);
1183                                                 if (pa >= pa_end ||
1184                                                     pa < seg->start ||
1185                                                     pa >= seg->end)
1186                                                         break;
1187                                                 m = &seg->first_page[atop(pa -
1188                                                     seg->start)];
1189                                                 if (m->order != VM_NFREEORDER -
1190                                                     1)
1191                                                         break;
1192                                         }
1193                                         /* If not, go to the next block. */
1194                                         if (pa < pa_end)
1195                                                 continue;
1196                                 }
1197
1198                                 /*
1199                                  * Determine if the blocks are within the
1200                                  * given range, satisfy the given alignment,
1201                                  * and do not cross the given boundary.
1202                                  */
1203                                 pa = VM_PAGE_TO_PHYS(m_ret);
1204                                 pa_end = pa + size;
1205                                 if (pa >= low && pa_end <= high &&
1206                                     (pa & (alignment - 1)) == 0 &&
1207                                     rounddown2(pa ^ (pa_end - 1), boundary) == 0)
1208                                         goto done;
1209                         }
1210                 }
1211         }
1212         return (NULL);
1213 done:
1214         for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
1215                 fl = (*seg->free_queues)[m->pool];
1216                 vm_freelist_rem(fl, m, m->order);
1217         }
1218         if (m_ret->pool != VM_FREEPOOL_DEFAULT)
1219                 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
1220         fl = (*seg->free_queues)[m_ret->pool];
1221         vm_phys_split_pages(m_ret, oind, fl, order);
1222         /* Return excess pages to the free lists. */
1223         npages_end = roundup2(npages, 1 << imin(oind, order));
1224         if (npages < npages_end)
1225                 vm_phys_free_contig(&m_ret[npages], npages_end - npages);
1226         return (m_ret);
1227 }
1228
1229 #ifdef DDB
1230 /*
1231  * Show the number of physical pages in each of the free lists.
1232  */
1233 DB_SHOW_COMMAND(freepages, db_show_freepages)
1234 {
1235         struct vm_freelist *fl;
1236         int flind, oind, pind, dom;
1237
1238         for (dom = 0; dom < vm_ndomains; dom++) {
1239                 db_printf("DOMAIN: %d\n", dom);
1240                 for (flind = 0; flind < vm_nfreelists; flind++) {
1241                         db_printf("FREE LIST %d:\n"
1242                             "\n  ORDER (SIZE)  |  NUMBER"
1243                             "\n              ", flind);
1244                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
1245                                 db_printf("  |  POOL %d", pind);
1246                         db_printf("\n--            ");
1247                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
1248                                 db_printf("-- --      ");
1249                         db_printf("--\n");
1250                         for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
1251                                 db_printf("  %2.2d (%6.6dK)", oind,
1252                                     1 << (PAGE_SHIFT - 10 + oind));
1253                                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
1254                                 fl = vm_phys_free_queues[dom][flind][pind];
1255                                         db_printf("  |  %6.6d", fl[oind].lcnt);
1256                                 }
1257                                 db_printf("\n");
1258                         }
1259                         db_printf("\n");
1260                 }
1261                 db_printf("\n");
1262         }
1263 }
1264 #endif