sys/vm/vm_map.c

   1 /*-
   2  * Copyright (c) 1991, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * This code is derived from software contributed to Berkeley by
   6  * The Mach Operating System project at Carnegie-Mellon University.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  * 4. Neither the name of the University nor the names of its contributors
  17  *    may be used to endorse or promote products derived from this software
  18  *    without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  *
  32  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
  33  *
  34  *
  35  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  36  * All rights reserved.
  37  *
  38  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  39  *
  40  * Permission to use, copy, modify and distribute this software and
  41  * its documentation is hereby granted, provided that both the copyright
  42  * notice and this permission notice appear in all copies of the
  43  * software, derivative works or modified versions, and any portions
  44  * thereof, and that both notices appear in supporting documentation.
  45  *
  46  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  47  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  48  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  49  *
  50  * Carnegie Mellon requests users of this software to return to
  51  *
  52  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  53  *  School of Computer Science
  54  *  Carnegie Mellon University
  55  *  Pittsburgh PA 15213-3890
  56  *
  57  * any improvements or extensions that they make and grant Carnegie the
  58  * rights to redistribute these changes.
  59  */
  60
  61 /*
  62  *      Virtual memory mapping module.
  63  */
  64
  65 #include <sys/cdefs.h>
  66 __FBSDID("$FreeBSD$");
  67
  68 #include <sys/param.h>
  69 #include <sys/systm.h>
  70 #include <sys/kernel.h>
  71 #include <sys/ktr.h>
  72 #include <sys/lock.h>
  73 #include <sys/mutex.h>
  74 #include <sys/proc.h>
  75 #include <sys/vmmeter.h>
  76 #include <sys/mman.h>
  77 #include <sys/vnode.h>
  78 #include <sys/resourcevar.h>
  79 #include <sys/file.h>
  80 #include <sys/sysctl.h>
  81 #include <sys/sysent.h>
  82 #include <sys/shm.h>
  83
  84 #include <vm/vm.h>
  85 #include <vm/vm_param.h>
  86 #include <vm/pmap.h>
  87 #include <vm/vm_map.h>
  88 #include <vm/vm_page.h>
  89 #include <vm/vm_object.h>
  90 #include <vm/vm_pager.h>
  91 #include <vm/vm_kern.h>
  92 #include <vm/vm_extern.h>
  93 #include <vm/swap_pager.h>
  94 #include <vm/uma.h>
  95
  96 /*
  97  *      Virtual memory maps provide for the mapping, protection,
  98  *      and sharing of virtual memory objects.  In addition,
  99  *      this module provides for an efficient virtual copy of
 100  *      memory from one map to another.
 101  *
 102  *      Synchronization is required prior to most operations.
 103  *
 104  *      Maps consist of an ordered doubly-linked list of simple
 105  *      entries; a self-adjusting binary search tree of these
 106  *      entries is used to speed up lookups.
 107  *
 108  *      Since portions of maps are specified by start/end addresses,
 109  *      which may not align with existing map entries, all
 110  *      routines merely "clip" entries to these start/end values.
 111  *      [That is, an entry is split into two, bordering at a
 112  *      start or end value.]  Note that these clippings may not
 113  *      always be necessary (as the two resulting entries are then
 114  *      not changed); however, the clipping is done for convenience.
 115  *
 116  *      As mentioned above, virtual copy operations are performed
 117  *      by copying VM object references from one map to
 118  *      another, and then marking both regions as copy-on-write.
 119  */
 120
 121 static struct mtx map_sleep_mtx;
 122 static uma_zone_t mapentzone;
 123 static uma_zone_t kmapentzone;
 124 static uma_zone_t mapzone;
 125 static uma_zone_t vmspace_zone;
 126 static struct vm_object kmapentobj;
 127 static int vmspace_zinit(void *mem, int size, int flags);
 128 static void vmspace_zfini(void *mem, int size);
 129 static int vm_map_zinit(void *mem, int ize, int flags);
 130 static void vm_map_zfini(void *mem, int size);
 131 static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max);
 132 static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map);
 133 static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
 134 #ifdef INVARIANTS
 135 static void vm_map_zdtor(void *mem, int size, void *arg);
 136 static void vmspace_zdtor(void *mem, int size, void *arg);
 137 #endif
 138
 139 #define ENTRY_CHARGED(e) ((e)->uip != NULL || \
 140     ((e)->object.vm_object != NULL && (e)->object.vm_object->uip != NULL && \
 141      !((e)->eflags & MAP_ENTRY_NEEDS_COPY)))
 142
 143 /*
 144  * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type
 145  * stable.
 146  */
 147 #define PROC_VMSPACE_LOCK(p) do { } while (0)
 148 #define PROC_VMSPACE_UNLOCK(p) do { } while (0)
 149
 150 /*
 151  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
 152  *
 153  *      Asserts that the starting and ending region
 154  *      addresses fall within the valid range of the map.
 155  */
 156 #define VM_MAP_RANGE_CHECK(map, start, end)             \
 157                 {                                       \
 158                 if (start < vm_map_min(map))            \
 159                         start = vm_map_min(map);        \
 160                 if (end > vm_map_max(map))              \
 161                         end = vm_map_max(map);          \
 162                 if (start > end)                        \
 163                         start = end;                    \
 164                 }
 165
 166 /*
 167  *      vm_map_startup:
 168  *
 169  *      Initialize the vm_map module.  Must be called before
 170  *      any other vm_map routines.
 171  *
 172  *      Map and entry structures are allocated from the general
 173  *      purpose memory pool with some exceptions:
 174  *
 175  *      - The kernel map and kmem submap are allocated statically.
 176  *      - Kernel map entries are allocated out of a static pool.
 177  *
 178  *      These restrictions are necessary since malloc() uses the
 179  *      maps and requires map entries.
 180  */
 181
 182 void
 183 vm_map_startup(void)
 184 {
 185         mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF);
 186         mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
 187 #ifdef INVARIANTS
 188             vm_map_zdtor,
 189 #else
 190             NULL,
 191 #endif
 192             vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 193         uma_prealloc(mapzone, MAX_KMAP);
 194         kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
 195             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 196             UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
 197         uma_prealloc(kmapentzone, MAX_KMAPENT);
 198         mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
 199             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 200 }
 201
 202 static void
 203 vmspace_zfini(void *mem, int size)
 204 {
 205         struct vmspace *vm;
 206
 207         vm = (struct vmspace *)mem;
 208         vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map));
 209 }
 210
 211 static int
 212 vmspace_zinit(void *mem, int size, int flags)
 213 {
 214         struct vmspace *vm;
 215
 216         vm = (struct vmspace *)mem;
 217
 218         vm->vm_map.pmap = NULL;
 219         (void)vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map), flags);
 220         return (0);
 221 }
 222
 223 static void
 224 vm_map_zfini(void *mem, int size)
 225 {
 226         vm_map_t map;
 227
 228         map = (vm_map_t)mem;
 229         mtx_destroy(&map->system_mtx);
 230         sx_destroy(&map->lock);
 231 }
 232
 233 static int
 234 vm_map_zinit(void *mem, int size, int flags)
 235 {
 236         vm_map_t map;
 237
 238         map = (vm_map_t)mem;
 239         map->nentries = 0;
 240         map->size = 0;
 241         mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
 242         sx_init(&map->lock, "user map");
 243         return (0);
 244 }
 245
 246 #ifdef INVARIANTS
 247 static void
 248 vmspace_zdtor(void *mem, int size, void *arg)
 249 {
 250         struct vmspace *vm;
 251
 252         vm = (struct vmspace *)mem;
 253
 254         vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
 255 }
 256 static void
 257 vm_map_zdtor(void *mem, int size, void *arg)
 258 {
 259         vm_map_t map;
 260
 261         map = (vm_map_t)mem;
 262         KASSERT(map->nentries == 0,
 263             ("map %p nentries == %d on free.",
 264             map, map->nentries));
 265         KASSERT(map->size == 0,
 266             ("map %p size == %lu on free.",
 267             map, (unsigned long)map->size));
 268 }
 269 #endif  /* INVARIANTS */
 270
 271 /*
 272  * Allocate a vmspace structure, including a vm_map and pmap,
 273  * and initialize those structures.  The refcnt is set to 1.
 274  */
 275 struct vmspace *
 276 vmspace_alloc(min, max)
 277         vm_offset_t min, max;
 278 {
 279         struct vmspace *vm;
 280
 281         vm = uma_zalloc(vmspace_zone, M_WAITOK);
 282         if (vm->vm_map.pmap == NULL && !pmap_pinit(vmspace_pmap(vm))) {
 283                 uma_zfree(vmspace_zone, vm);
 284                 return (NULL);
 285         }
 286         CTR1(KTR_VM, "vmspace_alloc: %p", vm);
 287         _vm_map_init(&vm->vm_map, min, max);
 288         vm->vm_map.pmap = vmspace_pmap(vm);             /* XXX */
 289         vm->vm_refcnt = 1;
 290         vm->vm_shm = NULL;
 291         vm->vm_swrss = 0;
 292         vm->vm_tsize = 0;
 293         vm->vm_dsize = 0;
 294         vm->vm_ssize = 0;
 295         vm->vm_taddr = 0;
 296         vm->vm_daddr = 0;
 297         vm->vm_maxsaddr = 0;
 298         return (vm);
 299 }
 300
 301 void
 302 vm_init2(void)
 303 {
 304         uma_zone_set_obj(kmapentzone, &kmapentobj, lmin(cnt.v_page_count,
 305             (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE) / 8 +
 306              maxproc * 2 + maxfiles);
 307         vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
 308 #ifdef INVARIANTS
 309             vmspace_zdtor,
 310 #else
 311             NULL,
 312 #endif
 313             vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 314 }
 315
 316 static inline void
 317 vmspace_dofree(struct vmspace *vm)
 318 {
 319
 320         CTR1(KTR_VM, "vmspace_free: %p", vm);
 321
 322         /*
 323          * Make sure any SysV shm is freed, it might not have been in
 324          * exit1().
 325          */
 326         shmexit(vm);
 327
 328         /*
 329          * Lock the map, to wait out all other references to it.
 330          * Delete all of the mappings and pages they hold, then call
 331          * the pmap module to reclaim anything left.
 332          */
 333         (void)vm_map_remove(&vm->vm_map, vm->vm_map.min_offset,
 334             vm->vm_map.max_offset);
 335
 336         pmap_release(vmspace_pmap(vm));
 337         vm->vm_map.pmap = NULL;
 338         uma_zfree(vmspace_zone, vm);
 339 }
 340
 341 void
 342 vmspace_free(struct vmspace *vm)
 343 {
 344
 345         if (vm->vm_refcnt == 0)
 346                 panic("vmspace_free: attempt to free already freed vmspace");
 347
 348         if (atomic_fetchadd_int(&vm->vm_refcnt, -1) == 1)
 349                 vmspace_dofree(vm);
 350 }
 351
 352 void
 353 vmspace_exitfree(struct proc *p)
 354 {
 355         struct vmspace *vm;
 356
 357         PROC_VMSPACE_LOCK(p);
 358         vm = p->p_vmspace;
 359         p->p_vmspace = NULL;
 360         PROC_VMSPACE_UNLOCK(p);
 361         KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace"));
 362         vmspace_free(vm);
 363 }
 364
 365 void
 366 vmspace_exit(struct thread *td)
 367 {
 368         int refcnt;
 369         struct vmspace *vm;
 370         struct proc *p;
 371
 372         /*
 373          * Release user portion of address space.
 374          * This releases references to vnodes,
 375          * which could cause I/O if the file has been unlinked.
 376          * Need to do this early enough that we can still sleep.
 377          *
 378          * The last exiting process to reach this point releases as
 379          * much of the environment as it can. vmspace_dofree() is the
 380          * slower fallback in case another process had a temporary
 381          * reference to the vmspace.
 382          */
 383
 384         p = td->td_proc;
 385         vm = p->p_vmspace;
 386         atomic_add_int(&vmspace0.vm_refcnt, 1);
 387         do {
 388                 refcnt = vm->vm_refcnt;
 389                 if (refcnt > 1 && p->p_vmspace != &vmspace0) {
 390                         /* Switch now since other proc might free vmspace */
 391                         PROC_VMSPACE_LOCK(p);
 392                         p->p_vmspace = &vmspace0;
 393                         PROC_VMSPACE_UNLOCK(p);
 394                         pmap_activate(td);
 395                 }
 396         } while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1));
 397         if (refcnt == 1) {
 398                 if (p->p_vmspace != vm) {
 399                         /* vmspace not yet freed, switch back */
 400                         PROC_VMSPACE_LOCK(p);
 401                         p->p_vmspace = vm;
 402                         PROC_VMSPACE_UNLOCK(p);
 403                         pmap_activate(td);
 404                 }
 405                 pmap_remove_pages(vmspace_pmap(vm));
 406                 /* Switch now since this proc will free vmspace */
 407                 PROC_VMSPACE_LOCK(p);
 408                 p->p_vmspace = &vmspace0;
 409                 PROC_VMSPACE_UNLOCK(p);
 410                 pmap_activate(td);
 411                 vmspace_dofree(vm);
 412         }
 413 }
 414
 415 /* Acquire reference to vmspace owned by another process. */
 416
 417 struct vmspace *
 418 vmspace_acquire_ref(struct proc *p)
 419 {
 420         struct vmspace *vm;
 421         int refcnt;
 422
 423         PROC_VMSPACE_LOCK(p);
 424         vm = p->p_vmspace;
 425         if (vm == NULL) {
 426                 PROC_VMSPACE_UNLOCK(p);
 427                 return (NULL);
 428         }
 429         do {
 430                 refcnt = vm->vm_refcnt;
 431                 if (refcnt <= 0) {      /* Avoid 0->1 transition */
 432                         PROC_VMSPACE_UNLOCK(p);
 433                         return (NULL);
 434                 }
 435         } while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt + 1));
 436         if (vm != p->p_vmspace) {
 437                 PROC_VMSPACE_UNLOCK(p);
 438                 vmspace_free(vm);
 439                 return (NULL);
 440         }
 441         PROC_VMSPACE_UNLOCK(p);
 442         return (vm);
 443 }
 444
 445 void
 446 _vm_map_lock(vm_map_t map, const char *file, int line)
 447 {
 448
 449         if (map->system_map)
 450                 _mtx_lock_flags(&map->system_mtx, 0, file, line);
 451         else
 452                 (void)_sx_xlock(&map->lock, 0, file, line);
 453         map->timestamp++;
 454 }
 455
 456 static void
 457 vm_map_process_deferred(void)
 458 {
 459         struct thread *td;
 460         vm_map_entry_t entry;
 461
 462         td = curthread;
 463
 464         while ((entry = td->td_map_def_user) != NULL) {
 465                 td->td_map_def_user = entry->next;
 466                 vm_map_entry_deallocate(entry, FALSE);
 467         }
 468 }
 469
 470 void
 471 _vm_map_unlock(vm_map_t map, const char *file, int line)
 472 {
 473
 474         if (map->system_map)
 475                 _mtx_unlock_flags(&map->system_mtx, 0, file, line);
 476         else {
 477                 _sx_xunlock(&map->lock, file, line);
 478                 vm_map_process_deferred();
 479         }
 480 }
 481
 482 void
 483 _vm_map_lock_read(vm_map_t map, const char *file, int line)
 484 {
 485
 486         if (map->system_map)
 487                 _mtx_lock_flags(&map->system_mtx, 0, file, line);
 488         else
 489                 (void)_sx_slock(&map->lock, 0, file, line);
 490 }
 491
 492 void
 493 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
 494 {
 495
 496         if (map->system_map)
 497                 _mtx_unlock_flags(&map->system_mtx, 0, file, line);
 498         else {
 499                 _sx_sunlock(&map->lock, file, line);
 500                 vm_map_process_deferred();
 501         }
 502 }
 503
 504 int
 505 _vm_map_trylock(vm_map_t map, const char *file, int line)
 506 {
 507         int error;
 508
 509         error = map->system_map ?
 510             !_mtx_trylock(&map->system_mtx, 0, file, line) :
 511             !_sx_try_xlock(&map->lock, file, line);
 512         if (error == 0)
 513                 map->timestamp++;
 514         return (error == 0);
 515 }
 516
 517 int
 518 _vm_map_trylock_read(vm_map_t map, const char *file, int line)
 519 {
 520         int error;
 521
 522         error = map->system_map ?
 523             !_mtx_trylock(&map->system_mtx, 0, file, line) :
 524             !_sx_try_slock(&map->lock, file, line);
 525         return (error == 0);
 526 }
 527
 528 /*
 529  *      _vm_map_lock_upgrade:   [ internal use only ]
 530  *
 531  *      Tries to upgrade a read (shared) lock on the specified map to a write
 532  *      (exclusive) lock.  Returns the value "0" if the upgrade succeeds and a
 533  *      non-zero value if the upgrade fails.  If the upgrade fails, the map is
 534  *      returned without a read or write lock held.
 535  *
 536  *      Requires that the map be read locked.
 537  */
 538 int
 539 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
 540 {
 541         unsigned int last_timestamp;
 542
 543         if (map->system_map) {
 544 #ifdef INVARIANTS
 545                 _mtx_assert(&map->system_mtx, MA_OWNED, file, line);
 546 #endif
 547         } else {
 548                 if (!_sx_try_upgrade(&map->lock, file, line)) {
 549                         last_timestamp = map->timestamp;
 550                         _sx_sunlock(&map->lock, file, line);
 551                         vm_map_process_deferred();
 552                         /*
 553                          * If the map's timestamp does not change while the
 554                          * map is unlocked, then the upgrade succeeds.
 555                          */
 556                         (void)_sx_xlock(&map->lock, 0, file, line);
 557                         if (last_timestamp != map->timestamp) {
 558                                 _sx_xunlock(&map->lock, file, line);
 559                                 return (1);
 560                         }
 561                 }
 562         }
 563         map->timestamp++;
 564         return (0);
 565 }
 566
 567 void
 568 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
 569 {
 570
 571         if (map->system_map) {
 572 #ifdef INVARIANTS
 573                 _mtx_assert(&map->system_mtx, MA_OWNED, file, line);
 574 #endif
 575         } else
 576                 _sx_downgrade(&map->lock, file, line);
 577 }
 578
 579 /*
 580  *      vm_map_locked:
 581  *
 582  *      Returns a non-zero value if the caller holds a write (exclusive) lock
 583  *      on the specified map and the value "0" otherwise.
 584  */
 585 int
 586 vm_map_locked(vm_map_t map)
 587 {
 588
 589         if (map->system_map)
 590                 return (mtx_owned(&map->system_mtx));
 591         else
 592                 return (sx_xlocked(&map->lock));
 593 }
 594
 595 #ifdef INVARIANTS
 596 static void
 597 _vm_map_assert_locked(vm_map_t map, const char *file, int line)
 598 {
 599
 600         if (map->system_map)
 601                 _mtx_assert(&map->system_mtx, MA_OWNED, file, line);
 602         else
 603                 _sx_assert(&map->lock, SA_XLOCKED, file, line);
 604 }
 605
 606 #if 0
 607 static void
 608 _vm_map_assert_locked_read(vm_map_t map, const char *file, int line)
 609 {
 610
 611         if (map->system_map)
 612                 _mtx_assert(&map->system_mtx, MA_OWNED, file, line);
 613         else
 614                 _sx_assert(&map->lock, SA_SLOCKED, file, line);
 615 }
 616 #endif
 617
 618 #define VM_MAP_ASSERT_LOCKED(map) \
 619     _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE)
 620 #define VM_MAP_ASSERT_LOCKED_READ(map) \
 621     _vm_map_assert_locked_read(map, LOCK_FILE, LOCK_LINE)
 622 #else
 623 #define VM_MAP_ASSERT_LOCKED(map)
 624 #define VM_MAP_ASSERT_LOCKED_READ(map)
 625 #endif
 626
 627 /*
 628  *      _vm_map_unlock_and_wait:
 629  *
 630  *      Atomically releases the lock on the specified map and puts the calling
 631  *      thread to sleep.  The calling thread will remain asleep until either
 632  *      vm_map_wakeup() is performed on the map or the specified timeout is
 633  *      exceeded.
 634  *
 635  *      WARNING!  This function does not perform deferred deallocations of
 636  *      objects and map entries.  Therefore, the calling thread is expected to
 637  *      reacquire the map lock after reawakening and later perform an ordinary
 638  *      unlock operation, such as vm_map_unlock(), before completing its
 639  *      operation on the map.
 640  */
 641 int
 642 _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line)
 643 {
 644
 645         mtx_lock(&map_sleep_mtx);
 646         if (map->system_map)
 647                 _mtx_unlock_flags(&map->system_mtx, 0, file, line);
 648         else
 649                 _sx_xunlock(&map->lock, file, line);
 650         return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps",
 651             timo));
 652 }
 653
 654 /*
 655  *      vm_map_wakeup:
 656  *
 657  *      Awaken any threads that have slept on the map using
 658  *      vm_map_unlock_and_wait().
 659  */
 660 void
 661 vm_map_wakeup(vm_map_t map)
 662 {
 663
 664         /*
 665          * Acquire and release map_sleep_mtx to prevent a wakeup()
 666          * from being performed (and lost) between the map unlock
 667          * and the msleep() in _vm_map_unlock_and_wait().
 668          */
 669         mtx_lock(&map_sleep_mtx);
 670         mtx_unlock(&map_sleep_mtx);
 671         wakeup(&map->root);
 672 }
 673
 674 void
 675 vm_map_busy(vm_map_t map)
 676 {
 677
 678         VM_MAP_ASSERT_LOCKED(map);
 679         map->busy++;
 680 }
 681
 682 void
 683 vm_map_unbusy(vm_map_t map)
 684 {
 685
 686         VM_MAP_ASSERT_LOCKED(map);
 687         KASSERT(map->busy, ("vm_map_unbusy: not busy"));
 688         if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) {
 689                 vm_map_modflags(map, 0, MAP_BUSY_WAKEUP);
 690                 wakeup(&map->busy);
 691         }
 692 }
 693
 694 void
 695 vm_map_wait_busy(vm_map_t map)
 696 {
 697
 698         VM_MAP_ASSERT_LOCKED(map);
 699         while (map->busy) {
 700                 vm_map_modflags(map, MAP_BUSY_WAKEUP, 0);
 701                 if (map->system_map)
 702                         msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0);
 703                 else
 704                         sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0);
 705         }
 706         map->timestamp++;
 707 }
 708
 709 long
 710 vmspace_resident_count(struct vmspace *vmspace)
 711 {
 712         return pmap_resident_count(vmspace_pmap(vmspace));
 713 }
 714
 715 long
 716 vmspace_wired_count(struct vmspace *vmspace)
 717 {
 718         return pmap_wired_count(vmspace_pmap(vmspace));
 719 }
 720
 721 /*
 722  *      vm_map_create:
 723  *
 724  *      Creates and returns a new empty VM map with
 725  *      the given physical map structure, and having
 726  *      the given lower and upper address bounds.
 727  */
 728 vm_map_t
 729 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
 730 {
 731         vm_map_t result;
 732
 733         result = uma_zalloc(mapzone, M_WAITOK);
 734         CTR1(KTR_VM, "vm_map_create: %p", result);
 735         _vm_map_init(result, min, max);
 736         result->pmap = pmap;
 737         return (result);
 738 }
 739
 740 /*
 741  * Initialize an existing vm_map structure
 742  * such as that in the vmspace structure.
 743  * The pmap is set elsewhere.
 744  */
 745 static void
 746 _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
 747 {
 748
 749         map->header.next = map->header.prev = &map->header;
 750         map->needs_wakeup = FALSE;
 751         map->system_map = 0;
 752         map->min_offset = min;
 753         map->max_offset = max;
 754         map->flags = 0;
 755         map->root = NULL;
 756         map->timestamp = 0;
 757         map->busy = 0;
 758 }
 759
 760 void
 761 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
 762 {
 763         _vm_map_init(map, min, max);
 764         mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
 765         sx_init(&map->lock, "user map");
 766 }
 767
 768 /*
 769  *      vm_map_entry_dispose:   [ internal use only ]
 770  *
 771  *      Inverse of vm_map_entry_create.
 772  */
 773 static void
 774 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
 775 {
 776         uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
 777 }
 778
 779 /*
 780  *      vm_map_entry_create:    [ internal use only ]
 781  *
 782  *      Allocates a VM map entry for insertion.
 783  *      No entry fields are filled in.
 784  */
 785 static vm_map_entry_t
 786 vm_map_entry_create(vm_map_t map)
 787 {
 788         vm_map_entry_t new_entry;
 789
 790         if (map->system_map)
 791                 new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
 792         else
 793                 new_entry = uma_zalloc(mapentzone, M_WAITOK);
 794         if (new_entry == NULL)
 795                 panic("vm_map_entry_create: kernel resources exhausted");
 796         return (new_entry);
 797 }
 798
 799 /*
 800  *      vm_map_entry_set_behavior:
 801  *
 802  *      Set the expected access behavior, either normal, random, or
 803  *      sequential.
 804  */
 805 static inline void
 806 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
 807 {
 808         entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
 809             (behavior & MAP_ENTRY_BEHAV_MASK);
 810 }
 811
 812 /*
 813  *      vm_map_entry_set_max_free:
 814  *
 815  *      Set the max_free field in a vm_map_entry.
 816  */
 817 static inline void
 818 vm_map_entry_set_max_free(vm_map_entry_t entry)
 819 {
 820
 821         entry->max_free = entry->adj_free;
 822         if (entry->left != NULL && entry->left->max_free > entry->max_free)
 823                 entry->max_free = entry->left->max_free;
 824         if (entry->right != NULL && entry->right->max_free > entry->max_free)
 825                 entry->max_free = entry->right->max_free;
 826 }
 827
 828 /*
 829  *      vm_map_entry_splay:
 830  *
 831  *      The Sleator and Tarjan top-down splay algorithm with the
 832  *      following variation.  Max_free must be computed bottom-up, so
 833  *      on the downward pass, maintain the left and right spines in
 834  *      reverse order.  Then, make a second pass up each side to fix
 835  *      the pointers and compute max_free.  The time bound is O(log n)
 836  *      amortized.
 837  *
 838  *      The new root is the vm_map_entry containing "addr", or else an
 839  *      adjacent entry (lower or higher) if addr is not in the tree.
 840  *
 841  *      The map must be locked, and leaves it so.
 842  *
 843  *      Returns: the new root.
 844  */
 845 static vm_map_entry_t
 846 vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root)
 847 {
 848         vm_map_entry_t llist, rlist;
 849         vm_map_entry_t ltree, rtree;
 850         vm_map_entry_t y;
 851
 852         /* Special case of empty tree. */
 853         if (root == NULL)
 854                 return (root);
 855
 856         /*
 857          * Pass One: Splay down the tree until we find addr or a NULL
 858          * pointer where addr would go.  llist and rlist are the two
 859          * sides in reverse order (bottom-up), with llist linked by
 860          * the right pointer and rlist linked by the left pointer in
 861          * the vm_map_entry.  Wait until Pass Two to set max_free on
 862          * the two spines.
 863          */
 864         llist = NULL;
 865         rlist = NULL;
 866         for (;;) {
 867                 /* root is never NULL in here. */
 868                 if (addr < root->start) {
 869                         y = root->left;
 870                         if (y == NULL)
 871                                 break;
 872                         if (addr < y->start && y->left != NULL) {
 873                                 /* Rotate right and put y on rlist. */
 874                                 root->left = y->right;
 875                                 y->right = root;
 876                                 vm_map_entry_set_max_free(root);
 877                                 root = y->left;
 878                                 y->left = rlist;
 879                                 rlist = y;
 880                         } else {
 881                                 /* Put root on rlist. */
 882                                 root->left = rlist;
 883                                 rlist = root;
 884                                 root = y;
 885                         }
 886                 } else if (addr >= root->end) {
 887                         y = root->right;
 888                         if (y == NULL)
 889                                 break;
 890                         if (addr >= y->end && y->right != NULL) {
 891                                 /* Rotate left and put y on llist. */
 892                                 root->right = y->left;
 893                                 y->left = root;
 894                                 vm_map_entry_set_max_free(root);
 895                                 root = y->right;
 896                                 y->right = llist;
 897                                 llist = y;
 898                         } else {
 899                                 /* Put root on llist. */
 900                                 root->right = llist;
 901                                 llist = root;
 902                                 root = y;
 903                         }
 904                 } else
 905                         break;
 906         }
 907
 908         /*
 909          * Pass Two: Walk back up the two spines, flip the pointers
 910          * and set max_free.  The subtrees of the root go at the
 911          * bottom of llist and rlist.
 912          */
 913         ltree = root->left;
 914         while (llist != NULL) {
 915                 y = llist->right;
 916                 llist->right = ltree;
 917                 vm_map_entry_set_max_free(llist);
 918                 ltree = llist;
 919                 llist = y;
 920         }
 921         rtree = root->right;
 922         while (rlist != NULL) {
 923                 y = rlist->left;
 924                 rlist->left = rtree;
 925                 vm_map_entry_set_max_free(rlist);
 926                 rtree = rlist;
 927                 rlist = y;
 928         }
 929
 930         /*
 931          * Final assembly: add ltree and rtree as subtrees of root.
 932          */
 933         root->left = ltree;
 934         root->right = rtree;
 935         vm_map_entry_set_max_free(root);
 936
 937         return (root);
 938 }
 939
 940 /*
 941  *      vm_map_entry_{un,}link:
 942  *
 943  *      Insert/remove entries from maps.
 944  */
 945 static void
 946 vm_map_entry_link(vm_map_t map,
 947                   vm_map_entry_t after_where,
 948                   vm_map_entry_t entry)
 949 {
 950
 951         CTR4(KTR_VM,
 952             "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
 953             map->nentries, entry, after_where);
 954         VM_MAP_ASSERT_LOCKED(map);
 955         map->nentries++;
 956         entry->prev = after_where;
 957         entry->next = after_where->next;
 958         entry->next->prev = entry;
 959         after_where->next = entry;
 960
 961         if (after_where != &map->header) {
 962                 if (after_where != map->root)
 963                         vm_map_entry_splay(after_where->start, map->root);
 964                 entry->right = after_where->right;
 965                 entry->left = after_where;
 966                 after_where->right = NULL;
 967                 after_where->adj_free = entry->start - after_where->end;
 968                 vm_map_entry_set_max_free(after_where);
 969         } else {
 970                 entry->right = map->root;
 971                 entry->left = NULL;
 972         }
 973         entry->adj_free = (entry->next == &map->header ? map->max_offset :
 974             entry->next->start) - entry->end;
 975         vm_map_entry_set_max_free(entry);
 976         map->root = entry;
 977 }
 978
 979 static void
 980 vm_map_entry_unlink(vm_map_t map,
 981                     vm_map_entry_t entry)
 982 {
 983         vm_map_entry_t next, prev, root;
 984
 985         VM_MAP_ASSERT_LOCKED(map);
 986         if (entry != map->root)
 987                 vm_map_entry_splay(entry->start, map->root);
 988         if (entry->left == NULL)
 989                 root = entry->right;
 990         else {
 991                 root = vm_map_entry_splay(entry->start, entry->left);
 992                 root->right = entry->right;
 993                 root->adj_free = (entry->next == &map->header ? map->max_offset :
 994                     entry->next->start) - root->end;
 995                 vm_map_entry_set_max_free(root);
 996         }
 997         map->root = root;
 998
 999         prev = entry->prev;
1000         next = entry->next;
1001         next->prev = prev;
1002         prev->next = next;
1003         map->nentries--;
1004         CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
1005             map->nentries, entry);
1006 }
1007
1008 /*
1009  *      vm_map_entry_resize_free:
1010  *
1011  *      Recompute the amount of free space following a vm_map_entry
1012  *      and propagate that value up the tree.  Call this function after
1013  *      resizing a map entry in-place, that is, without a call to
1014  *      vm_map_entry_link() or _unlink().
1015  *
1016  *      The map must be locked, and leaves it so.
1017  */
1018 static void
1019 vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry)
1020 {
1021
1022         /*
1023          * Using splay trees without parent pointers, propagating
1024          * max_free up the tree is done by moving the entry to the
1025          * root and making the change there.
1026          */
1027         if (entry != map->root)
1028                 map->root = vm_map_entry_splay(entry->start, map->root);
1029
1030         entry->adj_free = (entry->next == &map->header ? map->max_offset :
1031             entry->next->start) - entry->end;
1032         vm_map_entry_set_max_free(entry);
1033 }
1034
1035 /*
1036  *      vm_map_lookup_entry:    [ internal use only ]
1037  *
1038  *      Finds the map entry containing (or
1039  *      immediately preceding) the specified address
1040  *      in the given map; the entry is returned
1041  *      in the "entry" parameter.  The boolean
1042  *      result indicates whether the address is
1043  *      actually contained in the map.
1044  */
1045 boolean_t
1046 vm_map_lookup_entry(
1047         vm_map_t map,
1048         vm_offset_t address,
1049         vm_map_entry_t *entry)  /* OUT */
1050 {
1051         vm_map_entry_t cur;
1052         boolean_t locked;
1053
1054         /*
1055          * If the map is empty, then the map entry immediately preceding
1056          * "address" is the map's header.
1057          */
1058         cur = map->root;
1059         if (cur == NULL)
1060                 *entry = &map->header;
1061         else if (address >= cur->start && cur->end > address) {
1062                 *entry = cur;
1063                 return (TRUE);
1064         } else if ((locked = vm_map_locked(map)) ||
1065             sx_try_upgrade(&map->lock)) {
1066                 /*
1067                  * Splay requires a write lock on the map.  However, it only
1068                  * restructures the binary search tree; it does not otherwise
1069                  * change the map.  Thus, the map's timestamp need not change
1070                  * on a temporary upgrade.
1071                  */
1072                 map->root = cur = vm_map_entry_splay(address, cur);
1073                 if (!locked)
1074                         sx_downgrade(&map->lock);
1075
1076                 /*
1077                  * If "address" is contained within a map entry, the new root
1078                  * is that map entry.  Otherwise, the new root is a map entry
1079                  * immediately before or after "address".
1080                  */
1081                 if (address >= cur->start) {
1082                         *entry = cur;
1083                         if (cur->end > address)
1084                                 return (TRUE);
1085                 } else
1086                         *entry = cur->prev;
1087         } else
1088                 /*
1089                  * Since the map is only locked for read access, perform a
1090                  * standard binary search tree lookup for "address".
1091                  */
1092                 for (;;) {
1093                         if (address < cur->start) {
1094                                 if (cur->left == NULL) {
1095                                         *entry = cur->prev;
1096                                         break;
1097                                 }
1098                                 cur = cur->left;
1099                         } else if (cur->end > address) {
1100                                 *entry = cur;
1101                                 return (TRUE);
1102                         } else {
1103                                 if (cur->right == NULL) {
1104                                         *entry = cur;
1105                                         break;
1106                                 }
1107                                 cur = cur->right;
1108                         }
1109                 }
1110         return (FALSE);
1111 }
1112
1113 /*
1114  *      vm_map_insert:
1115  *
1116  *      Inserts the given whole VM object into the target
1117  *      map at the specified address range.  The object's
1118  *      size should match that of the address range.
1119  *
1120  *      Requires that the map be locked, and leaves it so.
1121  *
1122  *      If object is non-NULL, ref count must be bumped by caller
1123  *      prior to making call to account for the new entry.
1124  */
1125 int
1126 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1127               vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
1128               int cow)
1129 {
1130         vm_map_entry_t new_entry;
1131         vm_map_entry_t prev_entry;
1132         vm_map_entry_t temp_entry;
1133         vm_eflags_t protoeflags;
1134         struct uidinfo *uip;
1135         boolean_t charge_prev_obj;
1136
1137         VM_MAP_ASSERT_LOCKED(map);
1138
1139         /*
1140          * Check that the start and end points are not bogus.
1141          */
1142         if ((start < map->min_offset) || (end > map->max_offset) ||
1143             (start >= end))
1144                 return (KERN_INVALID_ADDRESS);
1145
1146         /*
1147          * Find the entry prior to the proposed starting address; if it's part
1148          * of an existing entry, this range is bogus.
1149          */
1150         if (vm_map_lookup_entry(map, start, &temp_entry))
1151                 return (KERN_NO_SPACE);
1152
1153         prev_entry = temp_entry;
1154
1155         /*
1156          * Assert that the next entry doesn't overlap the end point.
1157          */
1158         if ((prev_entry->next != &map->header) &&
1159             (prev_entry->next->start < end))
1160                 return (KERN_NO_SPACE);
1161
1162         protoeflags = 0;
1163         charge_prev_obj = FALSE;
1164
1165         if (cow & MAP_COPY_ON_WRITE)
1166                 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
1167
1168         if (cow & MAP_NOFAULT) {
1169                 protoeflags |= MAP_ENTRY_NOFAULT;
1170
1171                 KASSERT(object == NULL,
1172                         ("vm_map_insert: paradoxical MAP_NOFAULT request"));
1173         }
1174         if (cow & MAP_DISABLE_SYNCER)
1175                 protoeflags |= MAP_ENTRY_NOSYNC;
1176         if (cow & MAP_DISABLE_COREDUMP)
1177                 protoeflags |= MAP_ENTRY_NOCOREDUMP;
1178
1179         uip = NULL;
1180         KASSERT((object != kmem_object && object != kernel_object) ||
1181             ((object == kmem_object || object == kernel_object) &&
1182                 !(protoeflags & MAP_ENTRY_NEEDS_COPY)),
1183             ("kmem or kernel object and cow"));
1184         if (cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT))
1185                 goto charged;
1186         if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
1187             ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
1188                 if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start))
1189                         return (KERN_RESOURCE_SHORTAGE);
1190                 KASSERT(object == NULL || (protoeflags & MAP_ENTRY_NEEDS_COPY) ||
1191                     object->uip == NULL,
1192                     ("OVERCOMMIT: vm_map_insert o %p", object));
1193                 uip = curthread->td_ucred->cr_ruidinfo;
1194                 uihold(uip);
1195                 if (object == NULL && !(protoeflags & MAP_ENTRY_NEEDS_COPY))
1196                         charge_prev_obj = TRUE;
1197         }
1198
1199 charged:
1200         /* Expand the kernel pmap, if necessary. */
1201         if (map == kernel_map && end > kernel_vm_end)
1202                 pmap_growkernel(end);
1203         if (object != NULL) {
1204                 /*
1205                  * OBJ_ONEMAPPING must be cleared unless this mapping
1206                  * is trivially proven to be the only mapping for any
1207                  * of the object's pages.  (Object granularity
1208                  * reference counting is insufficient to recognize
1209                  * aliases with precision.)
1210                  */
1211                 VM_OBJECT_LOCK(object);
1212                 if (object->ref_count > 1 || object->shadow_count != 0)
1213                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
1214                 VM_OBJECT_UNLOCK(object);
1215         }
1216         else if ((prev_entry != &map->header) &&
1217                  (prev_entry->eflags == protoeflags) &&
1218                  (prev_entry->end == start) &&
1219                  (prev_entry->wired_count == 0) &&
1220                  (prev_entry->uip == uip ||
1221                   (prev_entry->object.vm_object != NULL &&
1222                    (prev_entry->object.vm_object->uip == uip))) &&
1223                    vm_object_coalesce(prev_entry->object.vm_object,
1224                        prev_entry->offset,
1225                        (vm_size_t)(prev_entry->end - prev_entry->start),
1226                        (vm_size_t)(end - prev_entry->end), charge_prev_obj)) {
1227                 /*
1228                  * We were able to extend the object.  Determine if we
1229                  * can extend the previous map entry to include the
1230                  * new range as well.
1231                  */
1232                 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
1233                     (prev_entry->protection == prot) &&
1234                     (prev_entry->max_protection == max)) {
1235                         map->size += (end - prev_entry->end);
1236                         prev_entry->end = end;
1237                         vm_map_entry_resize_free(map, prev_entry);
1238                         vm_map_simplify_entry(map, prev_entry);
1239                         if (uip != NULL)
1240                                 uifree(uip);
1241                         return (KERN_SUCCESS);
1242                 }
1243
1244                 /*
1245                  * If we can extend the object but cannot extend the
1246                  * map entry, we have to create a new map entry.  We
1247                  * must bump the ref count on the extended object to
1248                  * account for it.  object may be NULL.
1249                  */
1250                 object = prev_entry->object.vm_object;
1251                 offset = prev_entry->offset +
1252                         (prev_entry->end - prev_entry->start);
1253                 vm_object_reference(object);
1254                 if (uip != NULL && object != NULL && object->uip != NULL &&
1255                     !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
1256                         /* Object already accounts for this uid. */
1257                         uifree(uip);
1258                         uip = NULL;
1259                 }
1260         }
1261
1262         /*
1263          * NOTE: if conditionals fail, object can be NULL here.  This occurs
1264          * in things like the buffer map where we manage kva but do not manage
1265          * backing objects.
1266          */
1267
1268         /*
1269          * Create a new entry
1270          */
1271         new_entry = vm_map_entry_create(map);
1272         new_entry->start = start;
1273         new_entry->end = end;
1274         new_entry->uip = NULL;
1275
1276         new_entry->eflags = protoeflags;
1277         new_entry->object.vm_object = object;
1278         new_entry->offset = offset;
1279         new_entry->avail_ssize = 0;
1280
1281         new_entry->inheritance = VM_INHERIT_DEFAULT;
1282         new_entry->protection = prot;
1283         new_entry->max_protection = max;
1284         new_entry->wired_count = 0;
1285
1286         KASSERT(uip == NULL || !ENTRY_CHARGED(new_entry),
1287             ("OVERCOMMIT: vm_map_insert leaks vm_map %p", new_entry));
1288         new_entry->uip = uip;
1289
1290         /*
1291          * Insert the new entry into the list
1292          */
1293         vm_map_entry_link(map, prev_entry, new_entry);
1294         map->size += new_entry->end - new_entry->start;
1295
1296 #if 0
1297         /*
1298          * Temporarily removed to avoid MAP_STACK panic, due to
1299          * MAP_STACK being a huge hack.  Will be added back in
1300          * when MAP_STACK (and the user stack mapping) is fixed.
1301          */
1302         /*
1303          * It may be possible to simplify the entry
1304          */
1305         vm_map_simplify_entry(map, new_entry);
1306 #endif
1307
1308         if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
1309                 vm_map_pmap_enter(map, start, prot,
1310                                     object, OFF_TO_IDX(offset), end - start,
1311                                     cow & MAP_PREFAULT_PARTIAL);
1312         }
1313
1314         return (KERN_SUCCESS);
1315 }
1316
1317 /*
1318  *      vm_map_findspace:
1319  *
1320  *      Find the first fit (lowest VM address) for "length" free bytes
1321  *      beginning at address >= start in the given map.
1322  *
1323  *      In a vm_map_entry, "adj_free" is the amount of free space
1324  *      adjacent (higher address) to this entry, and "max_free" is the
1325  *      maximum amount of contiguous free space in its subtree.  This
1326  *      allows finding a free region in one path down the tree, so
1327  *      O(log n) amortized with splay trees.
1328  *
1329  *      The map must be locked, and leaves it so.
1330  *
1331  *      Returns: 0 on success, and starting address in *addr,
1332  *               1 if insufficient space.
1333  */
1334 int
1335 vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length,
1336     vm_offset_t *addr)  /* OUT */
1337 {
1338         vm_map_entry_t entry;
1339         vm_offset_t st;
1340
1341         /*
1342          * Request must fit within min/max VM address and must avoid
1343          * address wrap.
1344          */
1345         if (start < map->min_offset)
1346                 start = map->min_offset;
1347         if (start + length > map->max_offset || start + length < start)
1348                 return (1);
1349
1350         /* Empty tree means wide open address space. */
1351         if (map->root == NULL) {
1352                 *addr = start;
1353                 return (0);
1354         }
1355
1356         /*
1357          * After splay, if start comes before root node, then there
1358          * must be a gap from start to the root.
1359          */
1360         map->root = vm_map_entry_splay(start, map->root);
1361         if (start + length <= map->root->start) {
1362                 *addr = start;
1363                 return (0);
1364         }
1365
1366         /*
1367          * Root is the last node that might begin its gap before
1368          * start, and this is the last comparison where address
1369          * wrap might be a problem.
1370          */
1371         st = (start > map->root->end) ? start : map->root->end;
1372         if (length <= map->root->end + map->root->adj_free - st) {
1373                 *addr = st;
1374                 return (0);
1375         }
1376
1377         /* With max_free, can immediately tell if no solution. */
1378         entry = map->root->right;
1379         if (entry == NULL || length > entry->max_free)
1380                 return (1);
1381
1382         /*
1383          * Search the right subtree in the order: left subtree, root,
1384          * right subtree (first fit).  The previous splay implies that
1385          * all regions in the right subtree have addresses > start.
1386          */
1387         while (entry != NULL) {
1388                 if (entry->left != NULL && entry->left->max_free >= length)
1389                         entry = entry->left;
1390                 else if (entry->adj_free >= length) {
1391                         *addr = entry->end;
1392                         return (0);
1393                 } else
1394                         entry = entry->right;
1395         }
1396
1397         /* Can't get here, so panic if we do. */
1398         panic("vm_map_findspace: max_free corrupt");
1399 }
1400
1401 int
1402 vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1403     vm_offset_t start, vm_size_t length, vm_prot_t prot,
1404     vm_prot_t max, int cow)
1405 {
1406         vm_offset_t end;
1407         int result;
1408
1409         end = start + length;
1410         vm_map_lock(map);
1411         VM_MAP_RANGE_CHECK(map, start, end);
1412         (void) vm_map_delete(map, start, end);
1413         result = vm_map_insert(map, object, offset, start, end, prot,
1414             max, cow);
1415         vm_map_unlock(map);
1416         return (result);
1417 }
1418
1419 /*
1420  *      vm_map_find finds an unallocated region in the target address
1421  *      map with the given length.  The search is defined to be
1422  *      first-fit from the specified address; the region found is
1423  *      returned in the same parameter.
1424  *
1425  *      If object is non-NULL, ref count must be bumped by caller
1426  *      prior to making call to account for the new entry.
1427  */
1428 int
1429 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1430             vm_offset_t *addr,  /* IN/OUT */
1431             vm_size_t length, int find_space, vm_prot_t prot,
1432             vm_prot_t max, int cow)
1433 {
1434         vm_offset_t start;
1435         int result;
1436
1437         start = *addr;
1438         vm_map_lock(map);
1439         do {
1440                 if (find_space != VMFS_NO_SPACE) {
1441                         if (vm_map_findspace(map, start, length, addr)) {
1442                                 vm_map_unlock(map);
1443                                 return (KERN_NO_SPACE);
1444                         }
1445                         switch (find_space) {
1446                         case VMFS_ALIGNED_SPACE:
1447                                 pmap_align_superpage(object, offset, addr,
1448                                     length);
1449                                 break;
1450 #ifdef VMFS_TLB_ALIGNED_SPACE
1451                         case VMFS_TLB_ALIGNED_SPACE:
1452                                 pmap_align_tlb(addr);
1453                                 break;
1454 #endif
1455                         default:
1456                                 break;
1457                         }
1458
1459                         start = *addr;
1460                 }
1461                 result = vm_map_insert(map, object, offset, start, start +
1462                     length, prot, max, cow);
1463         } while (result == KERN_NO_SPACE && find_space == VMFS_ALIGNED_SPACE);
1464         vm_map_unlock(map);
1465         return (result);
1466 }
1467
1468 /*
1469  *      vm_map_simplify_entry:
1470  *
1471  *      Simplify the given map entry by merging with either neighbor.  This
1472  *      routine also has the ability to merge with both neighbors.
1473  *
1474  *      The map must be locked.
1475  *
1476  *      This routine guarentees that the passed entry remains valid (though
1477  *      possibly extended).  When merging, this routine may delete one or
1478  *      both neighbors.
1479  */
1480 void
1481 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
1482 {
1483         vm_map_entry_t next, prev;
1484         vm_size_t prevsize, esize;
1485
1486         if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP))
1487                 return;
1488
1489         prev = entry->prev;
1490         if (prev != &map->header) {
1491                 prevsize = prev->end - prev->start;
1492                 if ( (prev->end == entry->start) &&
1493                      (prev->object.vm_object == entry->object.vm_object) &&
1494                      (!prev->object.vm_object ||
1495                         (prev->offset + prevsize == entry->offset)) &&
1496                      (prev->eflags == entry->eflags) &&
1497                      (prev->protection == entry->protection) &&
1498                      (prev->max_protection == entry->max_protection) &&
1499                      (prev->inheritance == entry->inheritance) &&
1500                      (prev->wired_count == entry->wired_count) &&
1501                      (prev->uip == entry->uip)) {
1502                         vm_map_entry_unlink(map, prev);
1503                         entry->start = prev->start;
1504                         entry->offset = prev->offset;
1505                         if (entry->prev != &map->header)
1506                                 vm_map_entry_resize_free(map, entry->prev);
1507
1508                         /*
1509                          * If the backing object is a vnode object,
1510                          * vm_object_deallocate() calls vrele().
1511                          * However, vrele() does not lock the vnode
1512                          * because the vnode has additional
1513                          * references.  Thus, the map lock can be kept
1514                          * without causing a lock-order reversal with
1515                          * the vnode lock.
1516                          */
1517                         if (prev->object.vm_object)
1518                                 vm_object_deallocate(prev->object.vm_object);
1519                         if (prev->uip != NULL)
1520                                 uifree(prev->uip);
1521                         vm_map_entry_dispose(map, prev);
1522                 }
1523         }
1524
1525         next = entry->next;
1526         if (next != &map->header) {
1527                 esize = entry->end - entry->start;
1528                 if ((entry->end == next->start) &&
1529                     (next->object.vm_object == entry->object.vm_object) &&
1530                      (!entry->object.vm_object ||
1531                         (entry->offset + esize == next->offset)) &&
1532                     (next->eflags == entry->eflags) &&
1533                     (next->protection == entry->protection) &&
1534                     (next->max_protection == entry->max_protection) &&
1535                     (next->inheritance == entry->inheritance) &&
1536                     (next->wired_count == entry->wired_count) &&
1537                     (next->uip == entry->uip)) {
1538                         vm_map_entry_unlink(map, next);
1539                         entry->end = next->end;
1540                         vm_map_entry_resize_free(map, entry);
1541
1542                         /*
1543                          * See comment above.
1544                          */
1545                         if (next->object.vm_object)
1546                                 vm_object_deallocate(next->object.vm_object);
1547                         if (next->uip != NULL)
1548                                 uifree(next->uip);
1549                         vm_map_entry_dispose(map, next);
1550                 }
1551         }
1552 }
1553 /*
1554  *      vm_map_clip_start:      [ internal use only ]
1555  *
1556  *      Asserts that the given entry begins at or after
1557  *      the specified address; if necessary,
1558  *      it splits the entry into two.
1559  */
1560 #define vm_map_clip_start(map, entry, startaddr) \
1561 { \
1562         if (startaddr > entry->start) \
1563                 _vm_map_clip_start(map, entry, startaddr); \
1564 }
1565
1566 /*
1567  *      This routine is called only when it is known that
1568  *      the entry must be split.
1569  */
1570 static void
1571 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
1572 {
1573         vm_map_entry_t new_entry;
1574
1575         VM_MAP_ASSERT_LOCKED(map);
1576
1577         /*
1578          * Split off the front portion -- note that we must insert the new
1579          * entry BEFORE this one, so that this entry has the specified
1580          * starting address.
1581          */
1582         vm_map_simplify_entry(map, entry);
1583
1584         /*
1585          * If there is no object backing this entry, we might as well create
1586          * one now.  If we defer it, an object can get created after the map
1587          * is clipped, and individual objects will be created for the split-up
1588          * map.  This is a bit of a hack, but is also about the best place to
1589          * put this improvement.
1590          */
1591         if (entry->object.vm_object == NULL && !map->system_map) {
1592                 vm_object_t object;
1593                 object = vm_object_allocate(OBJT_DEFAULT,
1594                                 atop(entry->end - entry->start));
1595                 entry->object.vm_object = object;
1596                 entry->offset = 0;
1597                 if (entry->uip != NULL) {
1598                         object->uip = entry->uip;
1599                         object->charge = entry->end - entry->start;
1600                         entry->uip = NULL;
1601                 }
1602         } else if (entry->object.vm_object != NULL &&
1603                    ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
1604                    entry->uip != NULL) {
1605                 VM_OBJECT_LOCK(entry->object.vm_object);
1606                 KASSERT(entry->object.vm_object->uip == NULL,
1607                     ("OVERCOMMIT: vm_entry_clip_start: both uip e %p", entry));
1608                 entry->object.vm_object->uip = entry->uip;
1609                 entry->object.vm_object->charge = entry->end - entry->start;
1610                 VM_OBJECT_UNLOCK(entry->object.vm_object);
1611                 entry->uip = NULL;
1612         }
1613
1614         new_entry = vm_map_entry_create(map);
1615         *new_entry = *entry;
1616
1617         new_entry->end = start;
1618         entry->offset += (start - entry->start);
1619         entry->start = start;
1620         if (new_entry->uip != NULL)
1621                 uihold(entry->uip);
1622
1623         vm_map_entry_link(map, entry->prev, new_entry);
1624
1625         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1626                 vm_object_reference(new_entry->object.vm_object);
1627         }
1628 }
1629
1630 /*
1631  *      vm_map_clip_end:        [ internal use only ]
1632  *
1633  *      Asserts that the given entry ends at or before
1634  *      the specified address; if necessary,
1635  *      it splits the entry into two.
1636  */
1637 #define vm_map_clip_end(map, entry, endaddr) \
1638 { \
1639         if ((endaddr) < (entry->end)) \
1640                 _vm_map_clip_end((map), (entry), (endaddr)); \
1641 }
1642
1643 /*
1644  *      This routine is called only when it is known that
1645  *      the entry must be split.
1646  */
1647 static void
1648 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1649 {
1650         vm_map_entry_t new_entry;
1651
1652         VM_MAP_ASSERT_LOCKED(map);
1653
1654         /*
1655          * If there is no object backing this entry, we might as well create
1656          * one now.  If we defer it, an object can get created after the map
1657          * is clipped, and individual objects will be created for the split-up
1658          * map.  This is a bit of a hack, but is also about the best place to
1659          * put this improvement.
1660          */
1661         if (entry->object.vm_object == NULL && !map->system_map) {
1662                 vm_object_t object;
1663                 object = vm_object_allocate(OBJT_DEFAULT,
1664                                 atop(entry->end - entry->start));
1665                 entry->object.vm_object = object;
1666                 entry->offset = 0;
1667                 if (entry->uip != NULL) {
1668                         object->uip = entry->uip;
1669                         object->charge = entry->end - entry->start;
1670                         entry->uip = NULL;
1671                 }
1672         } else if (entry->object.vm_object != NULL &&
1673                    ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
1674                    entry->uip != NULL) {
1675                 VM_OBJECT_LOCK(entry->object.vm_object);
1676                 KASSERT(entry->object.vm_object->uip == NULL,
1677                     ("OVERCOMMIT: vm_entry_clip_end: both uip e %p", entry));
1678                 entry->object.vm_object->uip = entry->uip;
1679                 entry->object.vm_object->charge = entry->end - entry->start;
1680                 VM_OBJECT_UNLOCK(entry->object.vm_object);
1681                 entry->uip = NULL;
1682         }
1683
1684         /*
1685          * Create a new entry and insert it AFTER the specified entry
1686          */
1687         new_entry = vm_map_entry_create(map);
1688         *new_entry = *entry;
1689
1690         new_entry->start = entry->end = end;
1691         new_entry->offset += (end - entry->start);
1692         if (new_entry->uip != NULL)
1693                 uihold(entry->uip);
1694
1695         vm_map_entry_link(map, entry, new_entry);
1696
1697         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1698                 vm_object_reference(new_entry->object.vm_object);
1699         }
1700 }
1701
1702 /*
1703  *      vm_map_submap:          [ kernel use only ]
1704  *
1705  *      Mark the given range as handled by a subordinate map.
1706  *
1707  *      This range must have been created with vm_map_find,
1708  *      and no other operations may have been performed on this
1709  *      range prior to calling vm_map_submap.
1710  *
1711  *      Only a limited number of operations can be performed
1712  *      within this rage after calling vm_map_submap:
1713  *              vm_fault
1714  *      [Don't try vm_map_copy!]
1715  *
1716  *      To remove a submapping, one must first remove the
1717  *      range from the superior map, and then destroy the
1718  *      submap (if desired).  [Better yet, don't try it.]
1719  */
1720 int
1721 vm_map_submap(
1722         vm_map_t map,
1723         vm_offset_t start,
1724         vm_offset_t end,
1725         vm_map_t submap)
1726 {
1727         vm_map_entry_t entry;
1728         int result = KERN_INVALID_ARGUMENT;
1729
1730         vm_map_lock(map);
1731
1732         VM_MAP_RANGE_CHECK(map, start, end);
1733
1734         if (vm_map_lookup_entry(map, start, &entry)) {
1735                 vm_map_clip_start(map, entry, start);
1736         } else
1737                 entry = entry->next;
1738
1739         vm_map_clip_end(map, entry, end);
1740
1741         if ((entry->start == start) && (entry->end == end) &&
1742             ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1743             (entry->object.vm_object == NULL)) {
1744                 entry->object.sub_map = submap;
1745                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1746                 result = KERN_SUCCESS;
1747         }
1748         vm_map_unlock(map);
1749
1750         return (result);
1751 }
1752
1753 /*
1754  * The maximum number of pages to map
1755  */
1756 #define MAX_INIT_PT     96
1757
1758 /*
1759  *      vm_map_pmap_enter:
1760  *
1761  *      Preload read-only mappings for the given object's resident pages into
1762  *      the given map.  This eliminates the soft faults on process startup and
1763  *      immediately after an mmap(2).  Because these are speculative mappings,
1764  *      cached pages are not reactivated and mapped.
1765  */
1766 void
1767 vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
1768     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags)
1769 {
1770         vm_offset_t start;
1771         vm_page_t p, p_start;
1772         vm_pindex_t psize, tmpidx;
1773         boolean_t are_queues_locked;
1774
1775         if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL)
1776                 return;
1777         VM_OBJECT_LOCK(object);
1778         if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
1779                 pmap_object_init_pt(map->pmap, addr, object, pindex, size);
1780                 goto unlock_return;
1781         }
1782
1783         psize = atop(size);
1784
1785         if ((flags & MAP_PREFAULT_PARTIAL) && psize > MAX_INIT_PT &&
1786             object->resident_page_count > MAX_INIT_PT)
1787                 goto unlock_return;
1788
1789         if (psize + pindex > object->size) {
1790                 if (object->size < pindex)
1791                         goto unlock_return;
1792                 psize = object->size - pindex;
1793         }
1794
1795         are_queues_locked = FALSE;
1796         start = 0;
1797         p_start = NULL;
1798
1799         p = vm_page_find_least(object, pindex);
1800         /*
1801          * Assert: the variable p is either (1) the page with the
1802          * least pindex greater than or equal to the parameter pindex
1803          * or (2) NULL.
1804          */
1805         for (;
1806              p != NULL && (tmpidx = p->pindex - pindex) < psize;
1807              p = TAILQ_NEXT(p, listq)) {
1808                 /*
1809                  * don't allow an madvise to blow away our really
1810                  * free pages allocating pv entries.
1811                  */
1812                 if ((flags & MAP_PREFAULT_MADVISE) &&
1813                     cnt.v_free_count < cnt.v_free_reserved) {
1814                         psize = tmpidx;
1815                         break;
1816                 }
1817                 if (p->valid == VM_PAGE_BITS_ALL) {
1818                         if (p_start == NULL) {
1819                                 start = addr + ptoa(tmpidx);
1820                                 p_start = p;
1821                         }
1822                 } else if (p_start != NULL) {
1823                         if (!are_queues_locked) {
1824                                 are_queues_locked = TRUE;
1825                                 vm_page_lock_queues();
1826                         }
1827                         pmap_enter_object(map->pmap, start, addr +
1828                             ptoa(tmpidx), p_start, prot);
1829                         p_start = NULL;
1830                 }
1831         }
1832         if (p_start != NULL) {
1833                 if (!are_queues_locked) {
1834                         are_queues_locked = TRUE;
1835                         vm_page_lock_queues();
1836                 }
1837                 pmap_enter_object(map->pmap, start, addr + ptoa(psize),
1838                     p_start, prot);
1839         }
1840         if (are_queues_locked)
1841                 vm_page_unlock_queues();
1842 unlock_return:
1843         VM_OBJECT_UNLOCK(object);
1844 }
1845
1846 /*
1847  *      vm_map_protect:
1848  *
1849  *      Sets the protection of the specified address
1850  *      region in the target map.  If "set_max" is
1851  *      specified, the maximum protection is to be set;
1852  *      otherwise, only the current protection is affected.
1853  */
1854 int
1855 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1856                vm_prot_t new_prot, boolean_t set_max)
1857 {
1858         vm_map_entry_t current, entry;
1859         vm_object_t obj;
1860         struct uidinfo *uip;
1861         vm_prot_t old_prot;
1862
1863         vm_map_lock(map);
1864
1865         VM_MAP_RANGE_CHECK(map, start, end);
1866
1867         if (vm_map_lookup_entry(map, start, &entry)) {
1868                 vm_map_clip_start(map, entry, start);
1869         } else {
1870                 entry = entry->next;
1871         }
1872
1873         /*
1874          * Make a first pass to check for protection violations.
1875          */
1876         current = entry;
1877         while ((current != &map->header) && (current->start < end)) {
1878                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1879                         vm_map_unlock(map);
1880                         return (KERN_INVALID_ARGUMENT);
1881                 }
1882                 if ((new_prot & current->max_protection) != new_prot) {
1883                         vm_map_unlock(map);
1884                         return (KERN_PROTECTION_FAILURE);
1885                 }
1886                 current = current->next;
1887         }
1888
1889
1890         /*
1891          * Do an accounting pass for private read-only mappings that
1892          * now will do cow due to allowed write (e.g. debugger sets
1893          * breakpoint on text segment)
1894          */
1895         for (current = entry; (current != &map->header) &&
1896              (current->start < end); current = current->next) {
1897
1898                 vm_map_clip_end(map, current, end);
1899
1900                 if (set_max ||
1901                     ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 ||
1902                     ENTRY_CHARGED(current)) {
1903                         continue;
1904                 }
1905
1906                 uip = curthread->td_ucred->cr_ruidinfo;
1907                 obj = current->object.vm_object;
1908
1909                 if (obj == NULL || (current->eflags & MAP_ENTRY_NEEDS_COPY)) {
1910                         if (!swap_reserve(current->end - current->start)) {
1911                                 vm_map_unlock(map);
1912                                 return (KERN_RESOURCE_SHORTAGE);
1913                         }
1914                         uihold(uip);
1915                         current->uip = uip;
1916                         continue;
1917                 }
1918
1919                 VM_OBJECT_LOCK(obj);
1920                 if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP) {
1921                         VM_OBJECT_UNLOCK(obj);
1922                         continue;
1923                 }
1924
1925                 /*
1926                  * Charge for the whole object allocation now, since
1927                  * we cannot distinguish between non-charged and
1928                  * charged clipped mapping of the same object later.
1929                  */
1930                 KASSERT(obj->charge == 0,
1931                     ("vm_map_protect: object %p overcharged\n", obj));
1932                 if (!swap_reserve(ptoa(obj->size))) {
1933                         VM_OBJECT_UNLOCK(obj);
1934                         vm_map_unlock(map);
1935                         return (KERN_RESOURCE_SHORTAGE);
1936                 }
1937
1938                 uihold(uip);
1939                 obj->uip = uip;
1940                 obj->charge = ptoa(obj->size);
1941                 VM_OBJECT_UNLOCK(obj);
1942         }
1943
1944         /*
1945          * Go back and fix up protections. [Note that clipping is not
1946          * necessary the second time.]
1947          */
1948         current = entry;
1949         while ((current != &map->header) && (current->start < end)) {
1950                 old_prot = current->protection;
1951
1952                 if (set_max)
1953                         current->protection =
1954                             (current->max_protection = new_prot) &
1955                             old_prot;
1956                 else
1957                         current->protection = new_prot;
1958
1959                 if ((current->eflags & (MAP_ENTRY_COW | MAP_ENTRY_USER_WIRED))
1960                      == (MAP_ENTRY_COW | MAP_ENTRY_USER_WIRED) &&
1961                     (current->protection & VM_PROT_WRITE) != 0 &&
1962                     (old_prot & VM_PROT_WRITE) == 0) {
1963                         vm_fault_copy_entry(map, map, current, current, NULL);
1964                 }
1965
1966                 /*
1967                  * Update physical map if necessary. Worry about copy-on-write
1968                  * here.
1969                  */
1970                 if (current->protection != old_prot) {
1971 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1972                                                         VM_PROT_ALL)
1973                         pmap_protect(map->pmap, current->start,
1974                             current->end,
1975                             current->protection & MASK(current));
1976 #undef  MASK
1977                 }
1978                 vm_map_simplify_entry(map, current);
1979                 current = current->next;
1980         }
1981         vm_map_unlock(map);
1982         return (KERN_SUCCESS);
1983 }
1984
1985 /*
1986  *      vm_map_madvise:
1987  *
1988  *      This routine traverses a processes map handling the madvise
1989  *      system call.  Advisories are classified as either those effecting
1990  *      the vm_map_entry structure, or those effecting the underlying
1991  *      objects.
1992  */
1993 int
1994 vm_map_madvise(
1995         vm_map_t map,
1996         vm_offset_t start,
1997         vm_offset_t end,
1998         int behav)
1999 {
2000         vm_map_entry_t current, entry;
2001         int modify_map = 0;
2002
2003         /*
2004          * Some madvise calls directly modify the vm_map_entry, in which case
2005          * we need to use an exclusive lock on the map and we need to perform
2006          * various clipping operations.  Otherwise we only need a read-lock
2007          * on the map.
2008          */
2009         switch(behav) {
2010         case MADV_NORMAL:
2011         case MADV_SEQUENTIAL:
2012         case MADV_RANDOM:
2013         case MADV_NOSYNC:
2014         case MADV_AUTOSYNC:
2015         case MADV_NOCORE:
2016         case MADV_CORE:
2017                 modify_map = 1;
2018                 vm_map_lock(map);
2019                 break;
2020         case MADV_WILLNEED:
2021         case MADV_DONTNEED:
2022         case MADV_FREE:
2023                 vm_map_lock_read(map);
2024                 break;
2025         default:
2026                 return (KERN_INVALID_ARGUMENT);
2027         }
2028
2029         /*
2030          * Locate starting entry and clip if necessary.
2031          */
2032         VM_MAP_RANGE_CHECK(map, start, end);
2033
2034         if (vm_map_lookup_entry(map, start, &entry)) {
2035                 if (modify_map)
2036                         vm_map_clip_start(map, entry, start);
2037         } else {
2038                 entry = entry->next;
2039         }
2040
2041         if (modify_map) {
2042                 /*
2043                  * madvise behaviors that are implemented in the vm_map_entry.
2044                  *
2045                  * We clip the vm_map_entry so that behavioral changes are
2046                  * limited to the specified address range.
2047                  */
2048                 for (current = entry;
2049                      (current != &map->header) && (current->start < end);
2050                      current = current->next
2051                 ) {
2052                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
2053                                 continue;
2054
2055                         vm_map_clip_end(map, current, end);
2056
2057                         switch (behav) {
2058                         case MADV_NORMAL:
2059                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
2060                                 break;
2061                         case MADV_SEQUENTIAL:
2062                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
2063                                 break;
2064                         case MADV_RANDOM:
2065                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
2066                                 break;
2067                         case MADV_NOSYNC:
2068                                 current->eflags |= MAP_ENTRY_NOSYNC;
2069                                 break;
2070                         case MADV_AUTOSYNC:
2071                                 current->eflags &= ~MAP_ENTRY_NOSYNC;
2072                                 break;
2073                         case MADV_NOCORE:
2074                                 current->eflags |= MAP_ENTRY_NOCOREDUMP;
2075                                 break;
2076                         case MADV_CORE:
2077                                 current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
2078                                 break;
2079                         default:
2080                                 break;
2081                         }
2082                         vm_map_simplify_entry(map, current);
2083                 }
2084                 vm_map_unlock(map);
2085         } else {
2086                 vm_pindex_t pindex;
2087                 int count;
2088
2089                 /*
2090                  * madvise behaviors that are implemented in the underlying
2091                  * vm_object.
2092                  *
2093                  * Since we don't clip the vm_map_entry, we have to clip
2094                  * the vm_object pindex and count.
2095                  */
2096                 for (current = entry;
2097                      (current != &map->header) && (current->start < end);
2098                      current = current->next
2099                 ) {
2100                         vm_offset_t useStart;
2101
2102                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
2103                                 continue;
2104
2105                         pindex = OFF_TO_IDX(current->offset);
2106                         count = atop(current->end - current->start);
2107                         useStart = current->start;
2108
2109                         if (current->start < start) {
2110                                 pindex += atop(start - current->start);
2111                                 count -= atop(start - current->start);
2112                                 useStart = start;
2113                         }
2114                         if (current->end > end)
2115                                 count -= atop(current->end - end);
2116
2117                         if (count <= 0)
2118                                 continue;
2119
2120                         vm_object_madvise(current->object.vm_object,
2121                                           pindex, count, behav);
2122                         if (behav == MADV_WILLNEED) {
2123                                 vm_map_pmap_enter(map,
2124                                     useStart,
2125                                     current->protection,
2126                                     current->object.vm_object,
2127                                     pindex,
2128                                     (count << PAGE_SHIFT),
2129                                     MAP_PREFAULT_MADVISE
2130                                 );
2131                         }
2132                 }
2133                 vm_map_unlock_read(map);
2134         }
2135         return (0);
2136 }
2137
2138
2139 /*
2140  *      vm_map_inherit:
2141  *
2142  *      Sets the inheritance of the specified address
2143  *      range in the target map.  Inheritance
2144  *      affects how the map will be shared with
2145  *      child maps at the time of vmspace_fork.
2146  */
2147 int
2148 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
2149                vm_inherit_t new_inheritance)
2150 {
2151         vm_map_entry_t entry;
2152         vm_map_entry_t temp_entry;
2153
2154         switch (new_inheritance) {
2155         case VM_INHERIT_NONE:
2156         case VM_INHERIT_COPY:
2157         case VM_INHERIT_SHARE:
2158                 break;
2159         default:
2160                 return (KERN_INVALID_ARGUMENT);
2161         }
2162         vm_map_lock(map);
2163         VM_MAP_RANGE_CHECK(map, start, end);
2164         if (vm_map_lookup_entry(map, start, &temp_entry)) {
2165                 entry = temp_entry;
2166                 vm_map_clip_start(map, entry, start);
2167         } else
2168                 entry = temp_entry->next;
2169         while ((entry != &map->header) && (entry->start < end)) {
2170                 vm_map_clip_end(map, entry, end);
2171                 entry->inheritance = new_inheritance;
2172                 vm_map_simplify_entry(map, entry);
2173                 entry = entry->next;
2174         }
2175         vm_map_unlock(map);
2176         return (KERN_SUCCESS);
2177 }
2178
2179 /*
2180  *      vm_map_unwire:
2181  *
2182  *      Implements both kernel and user unwiring.
2183  */
2184 int
2185 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
2186     int flags)
2187 {
2188         vm_map_entry_t entry, first_entry, tmp_entry;
2189         vm_offset_t saved_start;
2190         unsigned int last_timestamp;
2191         int rv;
2192         boolean_t need_wakeup, result, user_unwire;
2193
2194         user_unwire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
2195         vm_map_lock(map);
2196         VM_MAP_RANGE_CHECK(map, start, end);
2197         if (!vm_map_lookup_entry(map, start, &first_entry)) {
2198                 if (flags & VM_MAP_WIRE_HOLESOK)
2199                         first_entry = first_entry->next;
2200                 else {
2201                         vm_map_unlock(map);
2202                         return (KERN_INVALID_ADDRESS);
2203                 }
2204         }
2205         last_timestamp = map->timestamp;
2206         entry = first_entry;
2207         while (entry != &map->header && entry->start < end) {
2208                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
2209                         /*
2210                          * We have not yet clipped the entry.
2211                          */
2212                         saved_start = (start >= entry->start) ? start :
2213                             entry->start;
2214                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2215                         if (vm_map_unlock_and_wait(map, 0)) {
2216                                 /*
2217                                  * Allow interruption of user unwiring?
2218                                  */
2219                         }
2220                         vm_map_lock(map);
2221                         if (last_timestamp+1 != map->timestamp) {
2222                                 /*
2223                                  * Look again for the entry because the map was
2224                                  * modified while it was unlocked.
2225                                  * Specifically, the entry may have been
2226                                  * clipped, merged, or deleted.
2227                                  */
2228                                 if (!vm_map_lookup_entry(map, saved_start,
2229                                     &tmp_entry)) {
2230                                         if (flags & VM_MAP_WIRE_HOLESOK)
2231                                                 tmp_entry = tmp_entry->next;
2232                                         else {
2233                                                 if (saved_start == start) {
2234                                                         /*
2235                                                          * First_entry has been deleted.
2236                                                          */
2237                                                         vm_map_unlock(map);
2238                                                         return (KERN_INVALID_ADDRESS);
2239                                                 }
2240                                                 end = saved_start;
2241                                                 rv = KERN_INVALID_ADDRESS;
2242                                                 goto done;
2243                                         }
2244                                 }
2245                                 if (entry == first_entry)
2246                                         first_entry = tmp_entry;
2247                                 else
2248                                         first_entry = NULL;
2249                                 entry = tmp_entry;
2250                         }
2251                         last_timestamp = map->timestamp;
2252                         continue;
2253                 }
2254                 vm_map_clip_start(map, entry, start);
2255                 vm_map_clip_end(map, entry, end);
2256                 /*
2257                  * Mark the entry in case the map lock is released.  (See
2258                  * above.)
2259                  */
2260                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
2261                 /*
2262                  * Check the map for holes in the specified region.
2263                  * If VM_MAP_WIRE_HOLESOK was specified, skip this check.
2264                  */
2265                 if (((flags & VM_MAP_WIRE_HOLESOK) == 0) &&
2266                     (entry->end < end && (entry->next == &map->header ||
2267                     entry->next->start > entry->end))) {
2268                         end = entry->end;
2269                         rv = KERN_INVALID_ADDRESS;
2270                         goto done;
2271                 }
2272                 /*
2273                  * If system unwiring, require that the entry is system wired.
2274                  */
2275                 if (!user_unwire &&
2276                     vm_map_entry_system_wired_count(entry) == 0) {
2277                         end = entry->end;
2278                         rv = KERN_INVALID_ARGUMENT;
2279                         goto done;
2280                 }
2281                 entry = entry->next;
2282         }
2283         rv = KERN_SUCCESS;
2284 done:
2285         need_wakeup = FALSE;
2286         if (first_entry == NULL) {
2287                 result = vm_map_lookup_entry(map, start, &first_entry);
2288                 if (!result && (flags & VM_MAP_WIRE_HOLESOK))
2289                         first_entry = first_entry->next;
2290                 else
2291                         KASSERT(result, ("vm_map_unwire: lookup failed"));
2292         }
2293         entry = first_entry;
2294         while (entry != &map->header && entry->start < end) {
2295                 if (rv == KERN_SUCCESS && (!user_unwire ||
2296                     (entry->eflags & MAP_ENTRY_USER_WIRED))) {
2297                         if (user_unwire)
2298                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2299                         entry->wired_count--;
2300                         if (entry->wired_count == 0) {
2301                                 /*
2302                                  * Retain the map lock.
2303                                  */
2304                                 vm_fault_unwire(map, entry->start, entry->end,
2305                                     entry->object.vm_object != NULL &&
2306                                     (entry->object.vm_object->type == OBJT_DEVICE ||
2307                                     entry->object.vm_object->type == OBJT_SG));
2308                         }
2309                 }
2310                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
2311                         ("vm_map_unwire: in-transition flag missing"));
2312                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
2313                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
2314                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
2315                         need_wakeup = TRUE;
2316                 }
2317                 vm_map_simplify_entry(map, entry);
2318                 entry = entry->next;
2319         }
2320         vm_map_unlock(map);
2321         if (need_wakeup)
2322                 vm_map_wakeup(map);
2323         return (rv);
2324 }
2325
2326 /*
2327  *      vm_map_wire:
2328  *
2329  *      Implements both kernel and user wiring.
2330  */
2331 int
2332 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
2333     int flags)
2334 {
2335         vm_map_entry_t entry, first_entry, tmp_entry;
2336         vm_offset_t saved_end, saved_start;
2337         unsigned int last_timestamp;
2338         int rv;
2339         boolean_t fictitious, need_wakeup, result, user_wire;
2340
2341         user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
2342         vm_map_lock(map);
2343         VM_MAP_RANGE_CHECK(map, start, end);
2344         if (!vm_map_lookup_entry(map, start, &first_entry)) {
2345                 if (flags & VM_MAP_WIRE_HOLESOK)
2346                         first_entry = first_entry->next;
2347                 else {
2348                         vm_map_unlock(map);
2349                         return (KERN_INVALID_ADDRESS);
2350                 }
2351         }
2352         last_timestamp = map->timestamp;
2353         entry = first_entry;
2354         while (entry != &map->header && entry->start < end) {
2355                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
2356                         /*
2357                          * We have not yet clipped the entry.
2358                          */
2359                         saved_start = (start >= entry->start) ? start :
2360                             entry->start;
2361                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2362                         if (vm_map_unlock_and_wait(map, 0)) {
2363                                 /*
2364                                  * Allow interruption of user wiring?
2365                                  */
2366                         }
2367                         vm_map_lock(map);
2368                         if (last_timestamp + 1 != map->timestamp) {
2369                                 /*
2370                                  * Look again for the entry because the map was
2371                                  * modified while it was unlocked.
2372                                  * Specifically, the entry may have been
2373                                  * clipped, merged, or deleted.
2374                                  */
2375                                 if (!vm_map_lookup_entry(map, saved_start,
2376                                     &tmp_entry)) {
2377                                         if (flags & VM_MAP_WIRE_HOLESOK)
2378                                                 tmp_entry = tmp_entry->next;
2379                                         else {
2380                                                 if (saved_start == start) {
2381                                                         /*
2382                                                          * first_entry has been deleted.
2383                                                          */
2384                                                         vm_map_unlock(map);
2385                                                         return (KERN_INVALID_ADDRESS);
2386                                                 }
2387                                                 end = saved_start;
2388                                                 rv = KERN_INVALID_ADDRESS;
2389                                                 goto done;
2390                                         }
2391                                 }
2392                                 if (entry == first_entry)
2393                                         first_entry = tmp_entry;
2394                                 else
2395                                         first_entry = NULL;
2396                                 entry = tmp_entry;
2397                         }
2398                         last_timestamp = map->timestamp;
2399                         continue;
2400                 }
2401                 vm_map_clip_start(map, entry, start);
2402                 vm_map_clip_end(map, entry, end);
2403                 /*
2404                  * Mark the entry in case the map lock is released.  (See
2405                  * above.)
2406                  */
2407                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
2408                 /*
2409                  *
2410                  */
2411                 if (entry->wired_count == 0) {
2412                         if ((entry->protection & (VM_PROT_READ|VM_PROT_EXECUTE))
2413                             == 0) {
2414                                 entry->eflags |= MAP_ENTRY_WIRE_SKIPPED;
2415                                 if ((flags & VM_MAP_WIRE_HOLESOK) == 0) {
2416                                         end = entry->end;
2417                                         rv = KERN_INVALID_ADDRESS;
2418                                         goto done;
2419                                 }
2420                                 goto next_entry;
2421                         }
2422                         entry->wired_count++;
2423                         saved_start = entry->start;
2424                         saved_end = entry->end;
2425                         fictitious = entry->object.vm_object != NULL &&
2426                             (entry->object.vm_object->type == OBJT_DEVICE ||
2427                             entry->object.vm_object->type == OBJT_SG);
2428                         /*
2429                          * Release the map lock, relying on the in-transition
2430                          * mark.  Mark the map busy for fork.
2431                          */
2432                         vm_map_busy(map);
2433                         vm_map_unlock(map);
2434                         rv = vm_fault_wire(map, saved_start, saved_end,
2435                             user_wire, fictitious);
2436                         vm_map_lock(map);
2437                         vm_map_unbusy(map);
2438                         if (last_timestamp + 1 != map->timestamp) {
2439                                 /*
2440                                  * Look again for the entry because the map was
2441                                  * modified while it was unlocked.  The entry
2442                                  * may have been clipped, but NOT merged or
2443                                  * deleted.
2444                                  */
2445                                 result = vm_map_lookup_entry(map, saved_start,
2446                                     &tmp_entry);
2447                                 KASSERT(result, ("vm_map_wire: lookup failed"));
2448                                 if (entry == first_entry)
2449                                         first_entry = tmp_entry;
2450                                 else
2451                                         first_entry = NULL;
2452                                 entry = tmp_entry;
2453                                 while (entry->end < saved_end) {
2454                                         if (rv != KERN_SUCCESS) {
2455                                                 KASSERT(entry->wired_count == 1,
2456                                                     ("vm_map_wire: bad count"));
2457                                                 entry->wired_count = -1;
2458                                         }
2459                                         entry = entry->next;
2460                                 }
2461                         }
2462                         last_timestamp = map->timestamp;
2463                         if (rv != KERN_SUCCESS) {
2464                                 KASSERT(entry->wired_count == 1,
2465                                     ("vm_map_wire: bad count"));
2466                                 /*
2467                                  * Assign an out-of-range value to represent
2468                                  * the failure to wire this entry.
2469                                  */
2470                                 entry->wired_count = -1;
2471                                 end = entry->end;
2472                                 goto done;
2473                         }
2474                 } else if (!user_wire ||
2475                            (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
2476                         entry->wired_count++;
2477                 }
2478                 /*
2479                  * Check the map for holes in the specified region.
2480                  * If VM_MAP_WIRE_HOLESOK was specified, skip this check.
2481                  */
2482         next_entry:
2483                 if (((flags & VM_MAP_WIRE_HOLESOK) == 0) &&
2484                     (entry->end < end && (entry->next == &map->header ||
2485                     entry->next->start > entry->end))) {
2486                         end = entry->end;
2487                         rv = KERN_INVALID_ADDRESS;
2488                         goto done;
2489                 }
2490                 entry = entry->next;
2491         }
2492         rv = KERN_SUCCESS;
2493 done:
2494         need_wakeup = FALSE;
2495         if (first_entry == NULL) {
2496                 result = vm_map_lookup_entry(map, start, &first_entry);
2497                 if (!result && (flags & VM_MAP_WIRE_HOLESOK))
2498                         first_entry = first_entry->next;
2499                 else
2500                         KASSERT(result, ("vm_map_wire: lookup failed"));
2501         }
2502         entry = first_entry;
2503         while (entry != &map->header && entry->start < end) {
2504                 if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0)
2505                         goto next_entry_done;
2506                 if (rv == KERN_SUCCESS) {
2507                         if (user_wire)
2508                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
2509                 } else if (entry->wired_count == -1) {
2510                         /*
2511                          * Wiring failed on this entry.  Thus, unwiring is
2512                          * unnecessary.
2513                          */
2514                         entry->wired_count = 0;
2515                 } else {
2516                         if (!user_wire ||
2517                             (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)
2518                                 entry->wired_count--;
2519                         if (entry->wired_count == 0) {
2520                                 /*
2521                                  * Retain the map lock.
2522                                  */
2523                                 vm_fault_unwire(map, entry->start, entry->end,
2524                                     entry->object.vm_object != NULL &&
2525                                     (entry->object.vm_object->type == OBJT_DEVICE ||
2526                                     entry->object.vm_object->type == OBJT_SG));
2527                         }
2528                 }
2529         next_entry_done:
2530                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
2531                         ("vm_map_wire: in-transition flag missing"));
2532                 entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION|MAP_ENTRY_WIRE_SKIPPED);
2533                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
2534                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
2535                         need_wakeup = TRUE;
2536                 }
2537                 vm_map_simplify_entry(map, entry);
2538                 entry = entry->next;
2539         }
2540         vm_map_unlock(map);
2541         if (need_wakeup)
2542                 vm_map_wakeup(map);
2543         return (rv);
2544 }
2545
2546 /*
2547  * vm_map_sync
2548  *
2549  * Push any dirty cached pages in the address range to their pager.
2550  * If syncio is TRUE, dirty pages are written synchronously.
2551  * If invalidate is TRUE, any cached pages are freed as well.
2552  *
2553  * If the size of the region from start to end is zero, we are
2554  * supposed to flush all modified pages within the region containing
2555  * start.  Unfortunately, a region can be split or coalesced with
2556  * neighboring regions, making it difficult to determine what the
2557  * original region was.  Therefore, we approximate this requirement by
2558  * flushing the current region containing start.
2559  *
2560  * Returns an error if any part of the specified range is not mapped.
2561  */
2562 int
2563 vm_map_sync(
2564         vm_map_t map,
2565         vm_offset_t start,
2566         vm_offset_t end,
2567         boolean_t syncio,
2568         boolean_t invalidate)
2569 {
2570         vm_map_entry_t current;
2571         vm_map_entry_t entry;
2572         vm_size_t size;
2573         vm_object_t object;
2574         vm_ooffset_t offset;
2575         unsigned int last_timestamp;
2576
2577         vm_map_lock_read(map);
2578         VM_MAP_RANGE_CHECK(map, start, end);
2579         if (!vm_map_lookup_entry(map, start, &entry)) {
2580                 vm_map_unlock_read(map);
2581                 return (KERN_INVALID_ADDRESS);
2582         } else if (start == end) {
2583                 start = entry->start;
2584                 end = entry->end;
2585         }
2586         /*
2587          * Make a first pass to check for user-wired memory and holes.
2588          */
2589         for (current = entry; current != &map->header && current->start < end;
2590             current = current->next) {
2591                 if (invalidate && (current->eflags & MAP_ENTRY_USER_WIRED)) {
2592                         vm_map_unlock_read(map);
2593                         return (KERN_INVALID_ARGUMENT);
2594                 }
2595                 if (end > current->end &&
2596                     (current->next == &map->header ||
2597                         current->end != current->next->start)) {
2598                         vm_map_unlock_read(map);
2599                         return (KERN_INVALID_ADDRESS);
2600                 }
2601         }
2602
2603         if (invalidate)
2604                 pmap_remove(map->pmap, start, end);
2605
2606         /*
2607          * Make a second pass, cleaning/uncaching pages from the indicated
2608          * objects as we go.
2609          */
2610         for (current = entry; current != &map->header && current->start < end;) {
2611                 offset = current->offset + (start - current->start);
2612                 size = (end <= current->end ? end : current->end) - start;
2613                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
2614                         vm_map_t smap;
2615                         vm_map_entry_t tentry;
2616                         vm_size_t tsize;
2617
2618                         smap = current->object.sub_map;
2619                         vm_map_lock_read(smap);
2620                         (void) vm_map_lookup_entry(smap, offset, &tentry);
2621                         tsize = tentry->end - offset;
2622                         if (tsize < size)
2623                                 size = tsize;
2624                         object = tentry->object.vm_object;
2625                         offset = tentry->offset + (offset - tentry->start);
2626                         vm_map_unlock_read(smap);
2627                 } else {
2628                         object = current->object.vm_object;
2629                 }
2630                 vm_object_reference(object);
2631                 last_timestamp = map->timestamp;
2632                 vm_map_unlock_read(map);
2633                 vm_object_sync(object, offset, size, syncio, invalidate);
2634                 start += size;
2635                 vm_object_deallocate(object);
2636                 vm_map_lock_read(map);
2637                 if (last_timestamp == map->timestamp ||
2638                     !vm_map_lookup_entry(map, start, &current))
2639                         current = current->next;
2640         }
2641
2642         vm_map_unlock_read(map);
2643         return (KERN_SUCCESS);
2644 }
2645
2646 /*
2647  *      vm_map_entry_unwire:    [ internal use only ]
2648  *
2649  *      Make the region specified by this entry pageable.
2650  *
2651  *      The map in question should be locked.
2652  *      [This is the reason for this routine's existence.]
2653  */
2654 static void
2655 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
2656 {
2657         vm_fault_unwire(map, entry->start, entry->end,
2658             entry->object.vm_object != NULL &&
2659             (entry->object.vm_object->type == OBJT_DEVICE ||
2660             entry->object.vm_object->type == OBJT_SG));
2661         entry->wired_count = 0;
2662 }
2663
2664 static void
2665 vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map)
2666 {
2667
2668         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0)
2669                 vm_object_deallocate(entry->object.vm_object);
2670         uma_zfree(system_map ? kmapentzone : mapentzone, entry);
2671 }
2672
2673 /*
2674  *      vm_map_entry_delete:    [ internal use only ]
2675  *
2676  *      Deallocate the given entry from the target map.
2677  */
2678 static void
2679 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
2680 {
2681         vm_object_t object;
2682         vm_pindex_t offidxstart, offidxend, count, size1;
2683         vm_ooffset_t size;
2684
2685         vm_map_entry_unlink(map, entry);
2686         object = entry->object.vm_object;
2687         size = entry->end - entry->start;
2688         map->size -= size;
2689
2690         if (entry->uip != NULL) {
2691                 swap_release_by_uid(size, entry->uip);
2692                 uifree(entry->uip);
2693         }
2694
2695         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
2696             (object != NULL)) {
2697                 KASSERT(entry->uip == NULL || object->uip == NULL ||
2698                     (entry->eflags & MAP_ENTRY_NEEDS_COPY),
2699                     ("OVERCOMMIT vm_map_entry_delete: both uip %p", entry));
2700                 count = OFF_TO_IDX(size);
2701                 offidxstart = OFF_TO_IDX(entry->offset);
2702                 offidxend = offidxstart + count;
2703                 VM_OBJECT_LOCK(object);
2704                 if (object->ref_count != 1 &&
2705                     ((object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING ||
2706                     object == kernel_object || object == kmem_object)) {
2707                         vm_object_collapse(object);
2708                         vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2709                         if (object->type == OBJT_SWAP)
2710                                 swap_pager_freespace(object, offidxstart, count);
2711                         if (offidxend >= object->size &&
2712                             offidxstart < object->size) {
2713                                 size1 = object->size;
2714                                 object->size = offidxstart;
2715                                 if (object->uip != NULL) {
2716                                         size1 -= object->size;
2717                                         KASSERT(object->charge >= ptoa(size1),
2718                                             ("vm_map_entry_delete: object->charge < 0"));
2719                                         swap_release_by_uid(ptoa(size1), object->uip);
2720                                         object->charge -= ptoa(size1);
2721                                 }
2722                         }
2723                 }
2724                 VM_OBJECT_UNLOCK(object);
2725         } else
2726                 entry->object.vm_object = NULL;
2727         if (map->system_map)
2728                 vm_map_entry_deallocate(entry, TRUE);
2729         else {
2730                 entry->next = curthread->td_map_def_user;
2731                 curthread->td_map_def_user = entry;
2732         }
2733 }
2734
2735 /*
2736  *      vm_map_delete:  [ internal use only ]
2737  *
2738  *      Deallocates the given address range from the target
2739  *      map.
2740  */
2741 int
2742 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
2743 {
2744         vm_map_entry_t entry;
2745         vm_map_entry_t first_entry;
2746
2747         VM_MAP_ASSERT_LOCKED(map);
2748
2749         /*
2750          * Find the start of the region, and clip it
2751          */
2752         if (!vm_map_lookup_entry(map, start, &first_entry))
2753                 entry = first_entry->next;
2754         else {
2755                 entry = first_entry;
2756                 vm_map_clip_start(map, entry, start);
2757         }
2758
2759         /*
2760          * Step through all entries in this region
2761          */
2762         while ((entry != &map->header) && (entry->start < end)) {
2763                 vm_map_entry_t next;
2764
2765                 /*
2766                  * Wait for wiring or unwiring of an entry to complete.
2767                  * Also wait for any system wirings to disappear on
2768                  * user maps.
2769                  */
2770                 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 ||
2771                     (vm_map_pmap(map) != kernel_pmap &&
2772                     vm_map_entry_system_wired_count(entry) != 0)) {
2773                         unsigned int last_timestamp;
2774                         vm_offset_t saved_start;
2775                         vm_map_entry_t tmp_entry;
2776
2777                         saved_start = entry->start;
2778                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2779                         last_timestamp = map->timestamp;
2780                         (void) vm_map_unlock_and_wait(map, 0);
2781                         vm_map_lock(map);
2782                         if (last_timestamp + 1 != map->timestamp) {
2783                                 /*
2784                                  * Look again for the entry because the map was
2785                                  * modified while it was unlocked.
2786                                  * Specifically, the entry may have been
2787                                  * clipped, merged, or deleted.
2788                                  */
2789                                 if (!vm_map_lookup_entry(map, saved_start,
2790                                                          &tmp_entry))
2791                                         entry = tmp_entry->next;
2792                                 else {
2793                                         entry = tmp_entry;
2794                                         vm_map_clip_start(map, entry,
2795                                                           saved_start);
2796                                 }
2797                         }
2798                         continue;
2799                 }
2800                 vm_map_clip_end(map, entry, end);
2801
2802                 next = entry->next;
2803
2804                 /*
2805                  * Unwire before removing addresses from the pmap; otherwise,
2806                  * unwiring will put the entries back in the pmap.
2807                  */
2808                 if (entry->wired_count != 0) {
2809                         vm_map_entry_unwire(map, entry);
2810                 }
2811
2812                 pmap_remove(map->pmap, entry->start, entry->end);
2813
2814                 /*
2815                  * Delete the entry only after removing all pmap
2816                  * entries pointing to its pages.  (Otherwise, its
2817                  * page frames may be reallocated, and any modify bits
2818                  * will be set in the wrong object!)
2819                  */
2820                 vm_map_entry_delete(map, entry);
2821                 entry = next;
2822         }
2823         return (KERN_SUCCESS);
2824 }
2825
2826 /*
2827  *      vm_map_remove:
2828  *
2829  *      Remove the given address range from the target map.
2830  *      This is the exported form of vm_map_delete.
2831  */
2832 int
2833 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2834 {
2835         int result;
2836
2837         vm_map_lock(map);
2838         VM_MAP_RANGE_CHECK(map, start, end);
2839         result = vm_map_delete(map, start, end);
2840         vm_map_unlock(map);
2841         return (result);
2842 }
2843
2844 /*
2845  *      vm_map_check_protection:
2846  *
2847  *      Assert that the target map allows the specified privilege on the
2848  *      entire address region given.  The entire region must be allocated.
2849  *
2850  *      WARNING!  This code does not and should not check whether the
2851  *      contents of the region is accessible.  For example a smaller file
2852  *      might be mapped into a larger address space.
2853  *
2854  *      NOTE!  This code is also called by munmap().
2855  *
2856  *      The map must be locked.  A read lock is sufficient.
2857  */
2858 boolean_t
2859 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2860                         vm_prot_t protection)
2861 {
2862         vm_map_entry_t entry;
2863         vm_map_entry_t tmp_entry;
2864
2865         if (!vm_map_lookup_entry(map, start, &tmp_entry))
2866                 return (FALSE);
2867         entry = tmp_entry;
2868
2869         while (start < end) {
2870                 if (entry == &map->header)
2871                         return (FALSE);
2872                 /*
2873                  * No holes allowed!
2874                  */
2875                 if (start < entry->start)
2876                         return (FALSE);
2877                 /*
2878                  * Check protection associated with entry.
2879                  */
2880                 if ((entry->protection & protection) != protection)
2881                         return (FALSE);
2882                 /* go to next entry */
2883                 start = entry->end;
2884                 entry = entry->next;
2885         }
2886         return (TRUE);
2887 }
2888
2889 /*
2890  *      vm_map_copy_entry:
2891  *
2892  *      Copies the contents of the source entry to the destination
2893  *      entry.  The entries *must* be aligned properly.
2894  */
2895 static void
2896 vm_map_copy_entry(
2897         vm_map_t src_map,
2898         vm_map_t dst_map,
2899         vm_map_entry_t src_entry,
2900         vm_map_entry_t dst_entry,
2901         vm_ooffset_t *fork_charge)
2902 {
2903         vm_object_t src_object;
2904         vm_offset_t size;
2905         struct uidinfo *uip;
2906         int charged;
2907
2908         VM_MAP_ASSERT_LOCKED(dst_map);
2909
2910         if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2911                 return;
2912
2913         if (src_entry->wired_count == 0) {
2914
2915                 /*
2916                  * If the source entry is marked needs_copy, it is already
2917                  * write-protected.
2918                  */
2919                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2920                         pmap_protect(src_map->pmap,
2921                             src_entry->start,
2922                             src_entry->end,
2923                             src_entry->protection & ~VM_PROT_WRITE);
2924                 }
2925
2926                 /*
2927                  * Make a copy of the object.
2928                  */
2929                 size = src_entry->end - src_entry->start;
2930                 if ((src_object = src_entry->object.vm_object) != NULL) {
2931                         VM_OBJECT_LOCK(src_object);
2932                         charged = ENTRY_CHARGED(src_entry);
2933                         if ((src_object->handle == NULL) &&
2934                                 (src_object->type == OBJT_DEFAULT ||
2935                                  src_object->type == OBJT_SWAP)) {
2936                                 vm_object_collapse(src_object);
2937                                 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2938                                         vm_object_split(src_entry);
2939                                         src_object = src_entry->object.vm_object;
2940                                 }
2941                         }
2942                         vm_object_reference_locked(src_object);
2943                         vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2944                         if (src_entry->uip != NULL &&
2945                             !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
2946                                 KASSERT(src_object->uip == NULL,
2947                                     ("OVERCOMMIT: vm_map_copy_entry: uip %p",
2948                                      src_object));
2949                                 src_object->uip = src_entry->uip;
2950                                 src_object->charge = size;
2951                         }
2952                         VM_OBJECT_UNLOCK(src_object);
2953                         dst_entry->object.vm_object = src_object;
2954                         if (charged) {
2955                                 uip = curthread->td_ucred->cr_ruidinfo;
2956                                 uihold(uip);
2957                                 dst_entry->uip = uip;
2958                                 *fork_charge += size;
2959                                 if (!(src_entry->eflags &
2960                                       MAP_ENTRY_NEEDS_COPY)) {
2961                                         uihold(uip);
2962                                         src_entry->uip = uip;
2963                                         *fork_charge += size;
2964                                 }
2965                         }
2966                         src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2967                         dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2968                         dst_entry->offset = src_entry->offset;
2969                 } else {
2970                         dst_entry->object.vm_object = NULL;
2971                         dst_entry->offset = 0;
2972                         if (src_entry->uip != NULL) {
2973                                 dst_entry->uip = curthread->td_ucred->cr_ruidinfo;
2974                                 uihold(dst_entry->uip);
2975                                 *fork_charge += size;
2976                         }
2977                 }
2978
2979                 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2980                     dst_entry->end - dst_entry->start, src_entry->start);
2981         } else {
2982                 /*
2983                  * Of course, wired down pages can't be set copy-on-write.
2984                  * Cause wired pages to be copied into the new map by
2985                  * simulating faults (the new pages are pageable)
2986                  */
2987                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry,
2988                     fork_charge);
2989         }
2990 }
2991
2992 /*
2993  * vmspace_map_entry_forked:
2994  * Update the newly-forked vmspace each time a map entry is inherited
2995  * or copied.  The values for vm_dsize and vm_tsize are approximate
2996  * (and mostly-obsolete ideas in the face of mmap(2) et al.)
2997  */
2998 static void
2999 vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2,
3000     vm_map_entry_t entry)
3001 {
3002         vm_size_t entrysize;
3003         vm_offset_t newend;
3004
3005         entrysize = entry->end - entry->start;
3006         vm2->vm_map.size += entrysize;
3007         if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
3008                 vm2->vm_ssize += btoc(entrysize);
3009         } else if (entry->start >= (vm_offset_t)vm1->vm_daddr &&
3010             entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) {
3011                 newend = MIN(entry->end,
3012                     (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize));
3013                 vm2->vm_dsize += btoc(newend - entry->start);
3014         } else if (entry->start >= (vm_offset_t)vm1->vm_taddr &&
3015             entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) {
3016                 newend = MIN(entry->end,
3017                     (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize));
3018                 vm2->vm_tsize += btoc(newend - entry->start);
3019         }
3020 }
3021
3022 /*
3023  * vmspace_fork:
3024  * Create a new process vmspace structure and vm_map
3025  * based on those of an existing process.  The new map
3026  * is based on the old map, according to the inheritance
3027  * values on the regions in that map.
3028  *
3029  * XXX It might be worth coalescing the entries added to the new vmspace.
3030  *
3031  * The source map must not be locked.
3032  */
3033 struct vmspace *
3034 vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
3035 {
3036         struct vmspace *vm2;
3037         vm_map_t old_map = &vm1->vm_map;
3038         vm_map_t new_map;
3039         vm_map_entry_t old_entry;
3040         vm_map_entry_t new_entry;
3041         vm_object_t object;
3042         int locked;
3043
3044         vm_map_lock(old_map);
3045         if (old_map->busy)
3046                 vm_map_wait_busy(old_map);
3047         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
3048         if (vm2 == NULL)
3049                 goto unlock_and_return;
3050         vm2->vm_taddr = vm1->vm_taddr;
3051         vm2->vm_daddr = vm1->vm_daddr;
3052         vm2->vm_maxsaddr = vm1->vm_maxsaddr;
3053         new_map = &vm2->vm_map; /* XXX */
3054         locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
3055         KASSERT(locked, ("vmspace_fork: lock failed"));
3056         new_map->timestamp = 1;
3057
3058         old_entry = old_map->header.next;
3059
3060         while (old_entry != &old_map->header) {
3061                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
3062                         panic("vm_map_fork: encountered a submap");
3063
3064                 switch (old_entry->inheritance) {
3065                 case VM_INHERIT_NONE:
3066                         break;
3067
3068                 case VM_INHERIT_SHARE:
3069                         /*
3070                          * Clone the entry, creating the shared object if necessary.
3071                          */
3072                         object = old_entry->object.vm_object;
3073                         if (object == NULL) {
3074                                 object = vm_object_allocate(OBJT_DEFAULT,
3075                                         atop(old_entry->end - old_entry->start));
3076                                 old_entry->object.vm_object = object;
3077                                 old_entry->offset = 0;
3078                                 if (old_entry->uip != NULL) {
3079                                         object->uip = old_entry->uip;
3080                                         object->charge = old_entry->end -
3081                                             old_entry->start;
3082                                         old_entry->uip = NULL;
3083                                 }
3084                         }
3085
3086                         /*
3087                          * Add the reference before calling vm_object_shadow
3088                          * to insure that a shadow object is created.
3089                          */
3090                         vm_object_reference(object);
3091                         if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
3092                                 vm_object_shadow(&old_entry->object.vm_object,
3093                                         &old_entry->offset,
3094                                         atop(old_entry->end - old_entry->start));
3095                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
3096                                 /* Transfer the second reference too. */
3097                                 vm_object_reference(
3098                                     old_entry->object.vm_object);
3099
3100                                 /*
3101                                  * As in vm_map_simplify_entry(), the
3102                                  * vnode lock will not be acquired in
3103                                  * this call to vm_object_deallocate().
3104                                  */
3105                                 vm_object_deallocate(object);
3106                                 object = old_entry->object.vm_object;
3107                         }
3108                         VM_OBJECT_LOCK(object);
3109                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
3110                         if (old_entry->uip != NULL) {
3111                                 KASSERT(object->uip == NULL, ("vmspace_fork both uip"));
3112                                 object->uip = old_entry->uip;
3113                                 object->charge = old_entry->end - old_entry->start;
3114                                 old_entry->uip = NULL;
3115                         }
3116                         VM_OBJECT_UNLOCK(object);
3117
3118                         /*
3119                          * Clone the entry, referencing the shared object.
3120                          */
3121                         new_entry = vm_map_entry_create(new_map);
3122                         *new_entry = *old_entry;
3123                         new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
3124                             MAP_ENTRY_IN_TRANSITION);
3125                         new_entry->wired_count = 0;
3126
3127                         /*
3128                          * Insert the entry into the new map -- we know we're
3129                          * inserting at the end of the new map.
3130                          */
3131                         vm_map_entry_link(new_map, new_map->header.prev,
3132                             new_entry);
3133                         vmspace_map_entry_forked(vm1, vm2, new_entry);
3134
3135                         /*
3136                          * Update the physical map
3137                          */
3138                         pmap_copy(new_map->pmap, old_map->pmap,
3139                             new_entry->start,
3140                             (old_entry->end - old_entry->start),
3141                             old_entry->start);
3142                         break;
3143
3144                 case VM_INHERIT_COPY:
3145                         /*
3146                          * Clone the entry and link into the map.
3147                          */
3148                         new_entry = vm_map_entry_create(new_map);
3149                         *new_entry = *old_entry;
3150                         new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
3151                             MAP_ENTRY_IN_TRANSITION);
3152                         new_entry->wired_count = 0;
3153                         new_entry->object.vm_object = NULL;
3154                         new_entry->uip = NULL;
3155                         vm_map_entry_link(new_map, new_map->header.prev,
3156                             new_entry);
3157                         vmspace_map_entry_forked(vm1, vm2, new_entry);
3158                         vm_map_copy_entry(old_map, new_map, old_entry,
3159                             new_entry, fork_charge);
3160                         break;
3161                 }
3162                 old_entry = old_entry->next;
3163         }
3164 unlock_and_return:
3165         vm_map_unlock(old_map);
3166         if (vm2 != NULL)
3167                 vm_map_unlock(new_map);
3168
3169         return (vm2);
3170 }
3171
3172 int
3173 vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
3174     vm_prot_t prot, vm_prot_t max, int cow)
3175 {
3176         vm_map_entry_t new_entry, prev_entry;
3177         vm_offset_t bot, top;
3178         vm_size_t init_ssize;
3179         int orient, rv;
3180         rlim_t vmemlim;
3181
3182         /*
3183          * The stack orientation is piggybacked with the cow argument.
3184          * Extract it into orient and mask the cow argument so that we
3185          * don't pass it around further.
3186          * NOTE: We explicitly allow bi-directional stacks.
3187          */
3188         orient = cow & (MAP_STACK_GROWS_DOWN|MAP_STACK_GROWS_UP);
3189         cow &= ~orient;
3190         KASSERT(orient != 0, ("No stack grow direction"));
3191
3192         if (addrbos < vm_map_min(map) ||
3193             addrbos > vm_map_max(map) ||
3194             addrbos + max_ssize < addrbos)
3195                 return (KERN_NO_SPACE);
3196
3197         init_ssize = (max_ssize < sgrowsiz) ? max_ssize : sgrowsiz;
3198
3199         PROC_LOCK(curthread->td_proc);
3200         vmemlim = lim_cur(curthread->td_proc, RLIMIT_VMEM);
3201         PROC_UNLOCK(curthread->td_proc);
3202
3203         vm_map_lock(map);
3204
3205         /* If addr is already mapped, no go */
3206         if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
3207                 vm_map_unlock(map);
3208                 return (KERN_NO_SPACE);
3209         }
3210
3211         /* If we would blow our VMEM resource limit, no go */
3212         if (map->size + init_ssize > vmemlim) {
3213                 vm_map_unlock(map);
3214                 return (KERN_NO_SPACE);
3215         }
3216
3217         /*
3218          * If we can't accomodate max_ssize in the current mapping, no go.
3219          * However, we need to be aware that subsequent user mappings might
3220          * map into the space we have reserved for stack, and currently this
3221          * space is not protected.
3222          *
3223          * Hopefully we will at least detect this condition when we try to
3224          * grow the stack.
3225          */
3226         if ((prev_entry->next != &map->header) &&
3227             (prev_entry->next->start < addrbos + max_ssize)) {
3228                 vm_map_unlock(map);
3229                 return (KERN_NO_SPACE);
3230         }
3231
3232         /*
3233          * We initially map a stack of only init_ssize.  We will grow as
3234          * needed later.  Depending on the orientation of the stack (i.e.
3235          * the grow direction) we either map at the top of the range, the
3236          * bottom of the range or in the middle.
3237          *
3238          * Note: we would normally expect prot and max to be VM_PROT_ALL,
3239          * and cow to be 0.  Possibly we should eliminate these as input
3240          * parameters, and just pass these values here in the insert call.
3241          */
3242         if (orient == MAP_STACK_GROWS_DOWN)
3243                 bot = addrbos + max_ssize - init_ssize;
3244         else if (orient == MAP_STACK_GROWS_UP)
3245                 bot = addrbos;
3246         else
3247                 bot = round_page(addrbos + max_ssize/2 - init_ssize/2);
3248         top = bot + init_ssize;
3249         rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
3250
3251         /* Now set the avail_ssize amount. */
3252         if (rv == KERN_SUCCESS) {
3253                 if (prev_entry != &map->header)
3254                         vm_map_clip_end(map, prev_entry, bot);
3255                 new_entry = prev_entry->next;
3256                 if (new_entry->end != top || new_entry->start != bot)
3257                         panic("Bad entry start/end for new stack entry");
3258
3259                 new_entry->avail_ssize = max_ssize - init_ssize;
3260                 if (orient & MAP_STACK_GROWS_DOWN)
3261                         new_entry->eflags |= MAP_ENTRY_GROWS_DOWN;
3262                 if (orient & MAP_STACK_GROWS_UP)
3263                         new_entry->eflags |= MAP_ENTRY_GROWS_UP;
3264         }
3265
3266         vm_map_unlock(map);
3267         return (rv);
3268 }
3269
3270 static int stack_guard_page = 0;
3271 TUNABLE_INT("security.bsd.stack_guard_page", &stack_guard_page);
3272 SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RW,
3273     &stack_guard_page, 0,
3274     "Insert stack guard page ahead of the growable segments.");
3275
3276 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
3277  * desired address is already mapped, or if we successfully grow
3278  * the stack.  Also returns KERN_SUCCESS if addr is outside the
3279  * stack range (this is strange, but preserves compatibility with
3280  * the grow function in vm_machdep.c).
3281  */
3282 int
3283 vm_map_growstack(struct proc *p, vm_offset_t addr)
3284 {
3285         vm_map_entry_t next_entry, prev_entry;
3286         vm_map_entry_t new_entry, stack_entry;
3287         struct vmspace *vm = p->p_vmspace;
3288         vm_map_t map = &vm->vm_map;
3289         vm_offset_t end;
3290         size_t grow_amount, max_grow;
3291         rlim_t stacklim, vmemlim;
3292         int is_procstack, rv;
3293         struct uidinfo *uip;
3294
3295 Retry:
3296         PROC_LOCK(p);
3297         stacklim = lim_cur(p, RLIMIT_STACK);
3298         vmemlim = lim_cur(p, RLIMIT_VMEM);
3299         PROC_UNLOCK(p);
3300
3301         vm_map_lock_read(map);
3302
3303         /* If addr is already in the entry range, no need to grow.*/
3304         if (vm_map_lookup_entry(map, addr, &prev_entry)) {
3305                 vm_map_unlock_read(map);
3306                 return (KERN_SUCCESS);
3307         }
3308
3309         next_entry = prev_entry->next;
3310         if (!(prev_entry->eflags & MAP_ENTRY_GROWS_UP)) {
3311                 /*
3312                  * This entry does not grow upwards. Since the address lies
3313                  * beyond this entry, the next entry (if one exists) has to
3314                  * be a downward growable entry. The entry list header is
3315                  * never a growable entry, so it suffices to check the flags.
3316                  */
3317                 if (!(next_entry->eflags & MAP_ENTRY_GROWS_DOWN)) {
3318                         vm_map_unlock_read(map);
3319                         return (KERN_SUCCESS);
3320                 }
3321                 stack_entry = next_entry;
3322         } else {
3323                 /*
3324                  * This entry grows upward. If the next entry does not at
3325                  * least grow downwards, this is the entry we need to grow.
3326                  * otherwise we have two possible choices and we have to
3327                  * select one.
3328                  */
3329                 if (next_entry->eflags & MAP_ENTRY_GROWS_DOWN) {
3330                         /*
3331                          * We have two choices; grow the entry closest to
3332                          * the address to minimize the amount of growth.
3333                          */
3334                         if (addr - prev_entry->end <= next_entry->start - addr)
3335                                 stack_entry = prev_entry;
3336                         else
3337                                 stack_entry = next_entry;
3338                 } else
3339                         stack_entry = prev_entry;
3340         }
3341
3342         if (stack_entry == next_entry) {
3343                 KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_DOWN, ("foo"));
3344                 KASSERT(addr < stack_entry->start, ("foo"));
3345                 end = (prev_entry != &map->header) ? prev_entry->end :
3346                     stack_entry->start - stack_entry->avail_ssize;
3347                 grow_amount = roundup(stack_entry->start - addr, PAGE_SIZE);
3348                 max_grow = stack_entry->start - end;
3349         } else {
3350                 KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_UP, ("foo"));
3351                 KASSERT(addr >= stack_entry->end, ("foo"));
3352                 end = (next_entry != &map->header) ? next_entry->start :
3353                     stack_entry->end + stack_entry->avail_ssize;
3354                 grow_amount = roundup(addr + 1 - stack_entry->end, PAGE_SIZE);
3355                 max_grow = end - stack_entry->end;
3356         }
3357
3358         if (grow_amount > stack_entry->avail_ssize) {
3359                 vm_map_unlock_read(map);
3360                 return (KERN_NO_SPACE);
3361         }
3362
3363         /*
3364          * If there is no longer enough space between the entries nogo, and
3365          * adjust the available space.  Note: this  should only happen if the
3366          * user has mapped into the stack area after the stack was created,
3367          * and is probably an error.
3368          *
3369          * This also effectively destroys any guard page the user might have
3370          * intended by limiting the stack size.
3371          */
3372         if (grow_amount + (stack_guard_page ? PAGE_SIZE : 0) > max_grow) {
3373                 if (vm_map_lock_upgrade(map))
3374                         goto Retry;
3375
3376                 stack_entry->avail_ssize = max_grow;
3377
3378                 vm_map_unlock(map);
3379                 return (KERN_NO_SPACE);
3380         }
3381
3382         is_procstack = (addr >= (vm_offset_t)vm->vm_maxsaddr) ? 1 : 0;
3383
3384         /*
3385          * If this is the main process stack, see if we're over the stack
3386          * limit.
3387          */
3388         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
3389                 vm_map_unlock_read(map);
3390                 return (KERN_NO_SPACE);
3391         }
3392
3393         /* Round up the grow amount modulo SGROWSIZ */
3394         grow_amount = roundup (grow_amount, sgrowsiz);
3395         if (grow_amount > stack_entry->avail_ssize)
3396                 grow_amount = stack_entry->avail_ssize;
3397         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
3398                 grow_amount = trunc_page((vm_size_t)stacklim) -
3399                     ctob(vm->vm_ssize);
3400         }
3401
3402         /* If we would blow our VMEM resource limit, no go */
3403         if (map->size + grow_amount > vmemlim) {
3404                 vm_map_unlock_read(map);
3405                 return (KERN_NO_SPACE);
3406         }
3407
3408         if (vm_map_lock_upgrade(map))
3409                 goto Retry;
3410
3411         if (stack_entry == next_entry) {
3412                 /*
3413                  * Growing downward.
3414                  */
3415                 /* Get the preliminary new entry start value */
3416                 addr = stack_entry->start - grow_amount;
3417
3418                 /*
3419                  * If this puts us into the previous entry, cut back our
3420                  * growth to the available space. Also, see the note above.
3421                  */
3422                 if (addr < end) {
3423                         stack_entry->avail_ssize = max_grow;
3424                         addr = end;
3425                         if (stack_guard_page)
3426                                 addr += PAGE_SIZE;
3427                 }
3428
3429                 rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
3430                     p->p_sysent->sv_stackprot, VM_PROT_ALL, 0);
3431
3432                 /* Adjust the available stack space by the amount we grew. */
3433                 if (rv == KERN_SUCCESS) {
3434                         if (prev_entry != &map->header)
3435                                 vm_map_clip_end(map, prev_entry, addr);
3436                         new_entry = prev_entry->next;
3437                         KASSERT(new_entry == stack_entry->prev, ("foo"));
3438                         KASSERT(new_entry->end == stack_entry->start, ("foo"));
3439                         KASSERT(new_entry->start == addr, ("foo"));
3440                         grow_amount = new_entry->end - new_entry->start;
3441                         new_entry->avail_ssize = stack_entry->avail_ssize -
3442                             grow_amount;
3443                         stack_entry->eflags &= ~MAP_ENTRY_GROWS_DOWN;
3444                         new_entry->eflags |= MAP_ENTRY_GROWS_DOWN;
3445                 }
3446         } else {
3447                 /*
3448                  * Growing upward.
3449                  */
3450                 addr = stack_entry->end + grow_amount;
3451
3452                 /*
3453                  * If this puts us into the next entry, cut back our growth
3454                  * to the available space. Also, see the note above.
3455                  */
3456                 if (addr > end) {
3457                         stack_entry->avail_ssize = end - stack_entry->end;
3458                         addr = end;
3459                         if (stack_guard_page)
3460                                 addr -= PAGE_SIZE;
3461                 }
3462
3463                 grow_amount = addr - stack_entry->end;
3464                 uip = stack_entry->uip;
3465                 if (uip == NULL && stack_entry->object.vm_object != NULL)
3466                         uip = stack_entry->object.vm_object->uip;
3467                 if (uip != NULL && !swap_reserve_by_uid(grow_amount, uip))
3468                         rv = KERN_NO_SPACE;
3469                 /* Grow the underlying object if applicable. */
3470                 else if (stack_entry->object.vm_object == NULL ||
3471                          vm_object_coalesce(stack_entry->object.vm_object,
3472                          stack_entry->offset,
3473                          (vm_size_t)(stack_entry->end - stack_entry->start),
3474                          (vm_size_t)grow_amount, uip != NULL)) {
3475                         map->size += (addr - stack_entry->end);
3476                         /* Update the current entry. */
3477                         stack_entry->end = addr;
3478                         stack_entry->avail_ssize -= grow_amount;
3479                         vm_map_entry_resize_free(map, stack_entry);
3480                         rv = KERN_SUCCESS;
3481
3482                         if (next_entry != &map->header)
3483                                 vm_map_clip_start(map, next_entry, addr);
3484                 } else
3485                         rv = KERN_FAILURE;
3486         }
3487
3488         if (rv == KERN_SUCCESS && is_procstack)
3489                 vm->vm_ssize += btoc(grow_amount);
3490
3491         vm_map_unlock(map);
3492
3493         /*
3494          * Heed the MAP_WIREFUTURE flag if it was set for this process.
3495          */
3496         if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE)) {
3497                 vm_map_wire(map,
3498                     (stack_entry == next_entry) ? addr : addr - grow_amount,
3499                     (stack_entry == next_entry) ? stack_entry->start : addr,
3500                     (p->p_flag & P_SYSTEM)
3501                     ? VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES
3502                     : VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES);
3503         }
3504
3505         return (rv);
3506 }
3507
3508 /*
3509  * Unshare the specified VM space for exec.  If other processes are
3510  * mapped to it, then create a new one.  The new vmspace is null.
3511  */
3512 int
3513 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
3514 {
3515         struct vmspace *oldvmspace = p->p_vmspace;
3516         struct vmspace *newvmspace;
3517
3518         newvmspace = vmspace_alloc(minuser, maxuser);
3519         if (newvmspace == NULL)
3520                 return (ENOMEM);
3521         newvmspace->vm_swrss = oldvmspace->vm_swrss;
3522         /*
3523          * This code is written like this for prototype purposes.  The
3524          * goal is to avoid running down the vmspace here, but let the
3525          * other process's that are still using the vmspace to finally
3526          * run it down.  Even though there is little or no chance of blocking
3527          * here, it is a good idea to keep this form for future mods.
3528          */
3529         PROC_VMSPACE_LOCK(p);
3530         p->p_vmspace = newvmspace;
3531         PROC_VMSPACE_UNLOCK(p);
3532         if (p == curthread->td_proc)
3533                 pmap_activate(curthread);
3534         vmspace_free(oldvmspace);
3535         return (0);
3536 }
3537
3538 /*
3539  * Unshare the specified VM space for forcing COW.  This
3540  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
3541  */
3542 int
3543 vmspace_unshare(struct proc *p)
3544 {
3545         struct vmspace *oldvmspace = p->p_vmspace;
3546         struct vmspace *newvmspace;
3547         vm_ooffset_t fork_charge;
3548
3549         if (oldvmspace->vm_refcnt == 1)
3550                 return (0);
3551         fork_charge = 0;
3552         newvmspace = vmspace_fork(oldvmspace, &fork_charge);
3553         if (newvmspace == NULL)
3554                 return (ENOMEM);
3555         if (!swap_reserve_by_uid(fork_charge, p->p_ucred->cr_ruidinfo)) {
3556                 vmspace_free(newvmspace);
3557                 return (ENOMEM);
3558         }
3559         PROC_VMSPACE_LOCK(p);
3560         p->p_vmspace = newvmspace;
3561         PROC_VMSPACE_UNLOCK(p);
3562         if (p == curthread->td_proc)
3563                 pmap_activate(curthread);
3564         vmspace_free(oldvmspace);
3565         return (0);
3566 }
3567
3568 /*
3569  *      vm_map_lookup:
3570  *
3571  *      Finds the VM object, offset, and
3572  *      protection for a given virtual address in the
3573  *      specified map, assuming a page fault of the
3574  *      type specified.
3575  *
3576  *      Leaves the map in question locked for read; return
3577  *      values are guaranteed until a vm_map_lookup_done
3578  *      call is performed.  Note that the map argument
3579  *      is in/out; the returned map must be used in
3580  *      the call to vm_map_lookup_done.
3581  *
3582  *      A handle (out_entry) is returned for use in
3583  *      vm_map_lookup_done, to make that fast.
3584  *
3585  *      If a lookup is requested with "write protection"
3586  *      specified, the map may be changed to perform virtual
3587  *      copying operations, although the data referenced will
3588  *      remain the same.
3589  */
3590 int
3591 vm_map_lookup(vm_map_t *var_map,                /* IN/OUT */
3592               vm_offset_t vaddr,
3593               vm_prot_t fault_typea,
3594               vm_map_entry_t *out_entry,        /* OUT */
3595               vm_object_t *object,              /* OUT */
3596               vm_pindex_t *pindex,              /* OUT */
3597               vm_prot_t *out_prot,              /* OUT */
3598               boolean_t *wired)                 /* OUT */
3599 {
3600         vm_map_entry_t entry;
3601         vm_map_t map = *var_map;
3602         vm_prot_t prot;
3603         vm_prot_t fault_type = fault_typea;
3604         vm_object_t eobject;
3605         struct uidinfo *uip;
3606         vm_ooffset_t size;
3607
3608 RetryLookup:;
3609
3610         vm_map_lock_read(map);
3611
3612         /*
3613          * Lookup the faulting address.
3614          */
3615         if (!vm_map_lookup_entry(map, vaddr, out_entry)) {
3616                 vm_map_unlock_read(map);
3617                 return (KERN_INVALID_ADDRESS);
3618         }
3619
3620         entry = *out_entry;
3621
3622         /*
3623          * Handle submaps.
3624          */
3625         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3626                 vm_map_t old_map = map;
3627
3628                 *var_map = map = entry->object.sub_map;
3629                 vm_map_unlock_read(old_map);
3630                 goto RetryLookup;
3631         }
3632
3633         /*
3634          * Check whether this task is allowed to have this page.
3635          * Note the special case for MAP_ENTRY_COW
3636          * pages with an override.  This is to implement a forced
3637          * COW for debuggers.
3638          */
3639         if (fault_type & VM_PROT_OVERRIDE_WRITE)
3640                 prot = entry->max_protection;
3641         else
3642                 prot = entry->protection;
3643         fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
3644         if ((fault_type & prot) != fault_type) {
3645                 vm_map_unlock_read(map);
3646                 return (KERN_PROTECTION_FAILURE);
3647         }
3648         if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
3649             (entry->eflags & MAP_ENTRY_COW) &&
3650             (fault_type & VM_PROT_WRITE) &&
3651             (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
3652                 vm_map_unlock_read(map);
3653                 return (KERN_PROTECTION_FAILURE);
3654         }
3655
3656         /*
3657          * If this page is not pageable, we have to get it for all possible
3658          * accesses.
3659          */
3660         *wired = (entry->wired_count != 0);
3661         if (*wired)
3662                 prot = fault_type = entry->protection;
3663         size = entry->end - entry->start;
3664         /*
3665          * If the entry was copy-on-write, we either ...
3666          */
3667         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
3668                 /*
3669                  * If we want to write the page, we may as well handle that
3670                  * now since we've got the map locked.
3671                  *
3672                  * If we don't need to write the page, we just demote the
3673                  * permissions allowed.
3674                  */
3675                 if (fault_type & VM_PROT_WRITE) {
3676                         /*
3677                          * Make a new object, and place it in the object
3678                          * chain.  Note that no new references have appeared
3679                          * -- one just moved from the map to the new
3680                          * object.
3681                          */
3682                         if (vm_map_lock_upgrade(map))
3683                                 goto RetryLookup;
3684
3685                         if (entry->uip == NULL) {
3686                                 /*
3687                                  * The debugger owner is charged for
3688                                  * the memory.
3689                                  */
3690                                 uip = curthread->td_ucred->cr_ruidinfo;
3691                                 uihold(uip);
3692                                 if (!swap_reserve_by_uid(size, uip)) {
3693                                         uifree(uip);
3694                                         vm_map_unlock(map);
3695                                         return (KERN_RESOURCE_SHORTAGE);
3696                                 }
3697                                 entry->uip = uip;
3698                         }
3699                         vm_object_shadow(
3700                             &entry->object.vm_object,
3701                             &entry->offset,
3702                             atop(size));
3703                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
3704                         eobject = entry->object.vm_object;
3705                         if (eobject->uip != NULL) {
3706                                 /*
3707                                  * The object was not shadowed.
3708                                  */
3709                                 swap_release_by_uid(size, entry->uip);
3710                                 uifree(entry->uip);
3711                                 entry->uip = NULL;
3712                         } else if (entry->uip != NULL) {
3713                                 VM_OBJECT_LOCK(eobject);
3714                                 eobject->uip = entry->uip;
3715                                 eobject->charge = size;
3716                                 VM_OBJECT_UNLOCK(eobject);
3717                                 entry->uip = NULL;
3718                         }
3719
3720                         vm_map_lock_downgrade(map);
3721                 } else {
3722                         /*
3723                          * We're attempting to read a copy-on-write page --
3724                          * don't allow writes.
3725                          */
3726                         prot &= ~VM_PROT_WRITE;
3727                 }
3728         }
3729
3730         /*
3731          * Create an object if necessary.
3732          */
3733         if (entry->object.vm_object == NULL &&
3734             !map->system_map) {
3735                 if (vm_map_lock_upgrade(map))
3736                         goto RetryLookup;
3737                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
3738                     atop(size));
3739                 entry->offset = 0;
3740                 if (entry->uip != NULL) {
3741                         VM_OBJECT_LOCK(entry->object.vm_object);
3742                         entry->object.vm_object->uip = entry->uip;
3743                         entry->object.vm_object->charge = size;
3744                         VM_OBJECT_UNLOCK(entry->object.vm_object);
3745                         entry->uip = NULL;
3746                 }
3747                 vm_map_lock_downgrade(map);
3748         }
3749
3750         /*
3751          * Return the object/offset from this entry.  If the entry was
3752          * copy-on-write or empty, it has been fixed up.
3753          */
3754         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
3755         *object = entry->object.vm_object;
3756
3757         *out_prot = prot;
3758         return (KERN_SUCCESS);
3759 }
3760
3761 /*
3762  *      vm_map_lookup_locked:
3763  *
3764  *      Lookup the faulting address.  A version of vm_map_lookup that returns
3765  *      KERN_FAILURE instead of blocking on map lock or memory allocation.
3766  */
3767 int
3768 vm_map_lookup_locked(vm_map_t *var_map,         /* IN/OUT */
3769                      vm_offset_t vaddr,
3770                      vm_prot_t fault_typea,
3771                      vm_map_entry_t *out_entry, /* OUT */
3772                      vm_object_t *object,       /* OUT */
3773                      vm_pindex_t *pindex,       /* OUT */
3774                      vm_prot_t *out_prot,       /* OUT */
3775                      boolean_t *wired)          /* OUT */
3776 {
3777         vm_map_entry_t entry;
3778         vm_map_t map = *var_map;
3779         vm_prot_t prot;
3780         vm_prot_t fault_type = fault_typea;
3781
3782         /*
3783          * Lookup the faulting address.
3784          */
3785         if (!vm_map_lookup_entry(map, vaddr, out_entry))
3786                 return (KERN_INVALID_ADDRESS);
3787
3788         entry = *out_entry;
3789
3790         /*
3791          * Fail if the entry refers to a submap.
3792          */
3793         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
3794                 return (KERN_FAILURE);
3795
3796         /*
3797          * Check whether this task is allowed to have this page.
3798          * Note the special case for MAP_ENTRY_COW
3799          * pages with an override.  This is to implement a forced
3800          * COW for debuggers.
3801          */
3802         if (fault_type & VM_PROT_OVERRIDE_WRITE)
3803                 prot = entry->max_protection;
3804         else
3805                 prot = entry->protection;
3806         fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
3807         if ((fault_type & prot) != fault_type)
3808                 return (KERN_PROTECTION_FAILURE);
3809         if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
3810             (entry->eflags & MAP_ENTRY_COW) &&
3811             (fault_type & VM_PROT_WRITE) &&
3812             (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0)
3813                 return (KERN_PROTECTION_FAILURE);
3814
3815         /*
3816          * If this page is not pageable, we have to get it for all possible
3817          * accesses.
3818          */
3819         *wired = (entry->wired_count != 0);
3820         if (*wired)
3821                 prot = fault_type = entry->protection;
3822
3823         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
3824                 /*
3825                  * Fail if the entry was copy-on-write for a write fault.
3826                  */
3827                 if (fault_type & VM_PROT_WRITE)
3828                         return (KERN_FAILURE);
3829                 /*
3830                  * We're attempting to read a copy-on-write page --
3831                  * don't allow writes.
3832                  */
3833                 prot &= ~VM_PROT_WRITE;
3834         }
3835
3836         /*
3837          * Fail if an object should be created.
3838          */
3839         if (entry->object.vm_object == NULL && !map->system_map)
3840                 return (KERN_FAILURE);
3841
3842         /*
3843          * Return the object/offset from this entry.  If the entry was
3844          * copy-on-write or empty, it has been fixed up.
3845          */
3846         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
3847         *object = entry->object.vm_object;
3848
3849         *out_prot = prot;
3850         return (KERN_SUCCESS);
3851 }
3852
3853 /*
3854  *      vm_map_lookup_done:
3855  *
3856  *      Releases locks acquired by a vm_map_lookup
3857  *      (according to the handle returned by that lookup).
3858  */
3859 void
3860 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
3861 {
3862         /*
3863          * Unlock the main-level map
3864          */
3865         vm_map_unlock_read(map);
3866 }
3867
3868 #include "opt_ddb.h"
3869 #ifdef DDB
3870 #include <sys/kernel.h>
3871
3872 #include <ddb/ddb.h>
3873
3874 /*
3875  *      vm_map_print:   [ debug ]
3876  */
3877 DB_SHOW_COMMAND(map, vm_map_print)
3878 {
3879         static int nlines;
3880         /* XXX convert args. */
3881         vm_map_t map = (vm_map_t)addr;
3882         boolean_t full = have_addr;
3883
3884         vm_map_entry_t entry;
3885
3886         db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3887             (void *)map,
3888             (void *)map->pmap, map->nentries, map->timestamp);
3889         nlines++;
3890
3891         if (!full && db_indent)
3892                 return;
3893
3894         db_indent += 2;
3895         for (entry = map->header.next; entry != &map->header;
3896             entry = entry->next) {
3897                 db_iprintf("map entry %p: start=%p, end=%p\n",
3898                     (void *)entry, (void *)entry->start, (void *)entry->end);
3899                 nlines++;
3900                 {
3901                         static char *inheritance_name[4] =
3902                         {"share", "copy", "none", "donate_copy"};
3903
3904                         db_iprintf(" prot=%x/%x/%s",
3905                             entry->protection,
3906                             entry->max_protection,
3907                             inheritance_name[(int)(unsigned char)entry->inheritance]);
3908                         if (entry->wired_count != 0)
3909                                 db_printf(", wired");
3910                 }
3911                 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3912                         db_printf(", share=%p, offset=0x%jx\n",
3913                             (void *)entry->object.sub_map,
3914                             (uintmax_t)entry->offset);
3915                         nlines++;
3916                         if ((entry->prev == &map->header) ||
3917                             (entry->prev->object.sub_map !=
3918                                 entry->object.sub_map)) {
3919                                 db_indent += 2;
3920                                 vm_map_print((db_expr_t)(intptr_t)
3921                                              entry->object.sub_map,
3922                                              full, 0, (char *)0);
3923                                 db_indent -= 2;
3924                         }
3925                 } else {
3926                         if (entry->uip != NULL)
3927                                 db_printf(", uip %d", entry->uip->ui_uid);
3928                         db_printf(", object=%p, offset=0x%jx",
3929                             (void *)entry->object.vm_object,
3930                             (uintmax_t)entry->offset);
3931                         if (entry->object.vm_object && entry->object.vm_object->uip)
3932                                 db_printf(", obj uip %d charge %jx",
3933                                     entry->object.vm_object->uip->ui_uid,
3934                                     (uintmax_t)entry->object.vm_object->charge);
3935                         if (entry->eflags & MAP_ENTRY_COW)
3936                                 db_printf(", copy (%s)",
3937                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3938                         db_printf("\n");
3939                         nlines++;
3940
3941                         if ((entry->prev == &map->header) ||
3942                             (entry->prev->object.vm_object !=
3943                                 entry->object.vm_object)) {
3944                                 db_indent += 2;
3945                                 vm_object_print((db_expr_t)(intptr_t)
3946                                                 entry->object.vm_object,
3947                                                 full, 0, (char *)0);
3948                                 nlines += 4;
3949                                 db_indent -= 2;
3950                         }
3951                 }
3952         }
3953         db_indent -= 2;
3954         if (db_indent == 0)
3955                 nlines = 0;
3956 }
3957
3958
3959 DB_SHOW_COMMAND(procvm, procvm)
3960 {
3961         struct proc *p;
3962
3963         if (have_addr) {
3964                 p = (struct proc *) addr;
3965         } else {
3966                 p = curproc;
3967         }
3968
3969         db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3970             (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3971             (void *)vmspace_pmap(p->p_vmspace));
3972
3973         vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3974 }
3975
3976 #endif /* DDB */