sys/vm/vm_map.c

   1 /*
   2  * Copyright (c) 1991, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * This code is derived from software contributed to Berkeley by
   6  * The Mach Operating System project at Carnegie-Mellon University.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  * 3. All advertising materials mentioning features or use of this software
  17  *    must display the following acknowledgement:
  18  *      This product includes software developed by the University of
  19  *      California, Berkeley and its contributors.
  20  * 4. Neither the name of the University nor the names of its contributors
  21  *    may be used to endorse or promote products derived from this software
  22  *    without specific prior written permission.
  23  *
  24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34  * SUCH DAMAGE.
  35  *
  36  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
  37  *
  38  *
  39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  40  * All rights reserved.
  41  *
  42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  43  *
  44  * Permission to use, copy, modify and distribute this software and
  45  * its documentation is hereby granted, provided that both the copyright
  46  * notice and this permission notice appear in all copies of the
  47  * software, derivative works or modified versions, and any portions
  48  * thereof, and that both notices appear in supporting documentation.
  49  *
  50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  53  *
  54  * Carnegie Mellon requests users of this software to return to
  55  *
  56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  57  *  School of Computer Science
  58  *  Carnegie Mellon University
  59  *  Pittsburgh PA 15213-3890
  60  *
  61  * any improvements or extensions that they make and grant Carnegie the
  62  * rights to redistribute these changes.
  63  *
  64  * $FreeBSD$
  65  */
  66
  67 /*
  68  *      Virtual memory mapping module.
  69  */
  70
  71 #include <sys/param.h>
  72 #include <sys/systm.h>
  73 #include <sys/ktr.h>
  74 #include <sys/lock.h>
  75 #include <sys/mutex.h>
  76 #include <sys/proc.h>
  77 #include <sys/vmmeter.h>
  78 #include <sys/mman.h>
  79 #include <sys/vnode.h>
  80 #include <sys/resourcevar.h>
  81
  82 #include <vm/vm.h>
  83 #include <vm/vm_param.h>
  84 #include <vm/pmap.h>
  85 #include <vm/vm_map.h>
  86 #include <vm/vm_page.h>
  87 #include <vm/vm_object.h>
  88 #include <vm/vm_pager.h>
  89 #include <vm/vm_kern.h>
  90 #include <vm/vm_extern.h>
  91 #include <vm/swap_pager.h>
  92 #include <vm/uma.h>
  93
  94 /*
  95  *      Virtual memory maps provide for the mapping, protection,
  96  *      and sharing of virtual memory objects.  In addition,
  97  *      this module provides for an efficient virtual copy of
  98  *      memory from one map to another.
  99  *
 100  *      Synchronization is required prior to most operations.
 101  *
 102  *      Maps consist of an ordered doubly-linked list of simple
 103  *      entries; a single hint is used to speed up lookups.
 104  *
 105  *      Since portions of maps are specified by start/end addresses,
 106  *      which may not align with existing map entries, all
 107  *      routines merely "clip" entries to these start/end values.
 108  *      [That is, an entry is split into two, bordering at a
 109  *      start or end value.]  Note that these clippings may not
 110  *      always be necessary (as the two resulting entries are then
 111  *      not changed); however, the clipping is done for convenience.
 112  *
 113  *      As mentioned above, virtual copy operations are performed
 114  *      by copying VM object references from one map to
 115  *      another, and then marking both regions as copy-on-write.
 116  */
 117
 118 /*
 119  *      vm_map_startup:
 120  *
 121  *      Initialize the vm_map module.  Must be called before
 122  *      any other vm_map routines.
 123  *
 124  *      Map and entry structures are allocated from the general
 125  *      purpose memory pool with some exceptions:
 126  *
 127  *      - The kernel map and kmem submap are allocated statically.
 128  *      - Kernel map entries are allocated out of a static pool.
 129  *
 130  *      These restrictions are necessary since malloc() uses the
 131  *      maps and requires map entries.
 132  */
 133
 134 static uma_zone_t mapentzone;
 135 static uma_zone_t kmapentzone;
 136 static uma_zone_t mapzone;
 137 static uma_zone_t vmspace_zone;
 138 static struct vm_object kmapentobj;
 139 static void vmspace_zinit(void *mem, int size);
 140 static void vmspace_zfini(void *mem, int size);
 141 static void vm_map_zinit(void *mem, int size);
 142 static void vm_map_zfini(void *mem, int size);
 143 static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max);
 144
 145 #ifdef INVARIANTS
 146 static void vm_map_zdtor(void *mem, int size, void *arg);
 147 static void vmspace_zdtor(void *mem, int size, void *arg);
 148 #endif
 149
 150 void
 151 vm_map_startup(void)
 152 {
 153         mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
 154 #ifdef INVARIANTS
 155             vm_map_zdtor,
 156 #else
 157             NULL,
 158 #endif
 159             vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 160         uma_prealloc(mapzone, MAX_KMAP);
 161         kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
 162             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 163             UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
 164         uma_prealloc(kmapentzone, MAX_KMAPENT);
 165         mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
 166             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 167         uma_prealloc(mapentzone, MAX_MAPENT);
 168 }
 169
 170 static void
 171 vmspace_zfini(void *mem, int size)
 172 {
 173         struct vmspace *vm;
 174
 175         vm = (struct vmspace *)mem;
 176
 177         vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map));
 178 }
 179
 180 static void
 181 vmspace_zinit(void *mem, int size)
 182 {
 183         struct vmspace *vm;
 184
 185         vm = (struct vmspace *)mem;
 186
 187         vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map));
 188 }
 189
 190 static void
 191 vm_map_zfini(void *mem, int size)
 192 {
 193         vm_map_t map;
 194
 195         map = (vm_map_t)mem;
 196
 197         lockdestroy(&map->lock);
 198 }
 199
 200 static void
 201 vm_map_zinit(void *mem, int size)
 202 {
 203         vm_map_t map;
 204
 205         map = (vm_map_t)mem;
 206         map->nentries = 0;
 207         map->size = 0;
 208         map->infork = 0;
 209         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
 210 }
 211
 212 #ifdef INVARIANTS
 213 static void
 214 vmspace_zdtor(void *mem, int size, void *arg)
 215 {
 216         struct vmspace *vm;
 217
 218         vm = (struct vmspace *)mem;
 219
 220         vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
 221 }
 222 static void
 223 vm_map_zdtor(void *mem, int size, void *arg)
 224 {
 225         vm_map_t map;
 226
 227         map = (vm_map_t)mem;
 228         KASSERT(map->nentries == 0,
 229             ("map %p nentries == %d on free.",
 230             map, map->nentries));
 231         KASSERT(map->size == 0,
 232             ("map %p size == %lu on free.",
 233             map, (unsigned long)map->size));
 234         KASSERT(map->infork == 0,
 235             ("map %p infork == %d on free.",
 236             map, map->infork));
 237 }
 238 #endif  /* INVARIANTS */
 239
 240 /*
 241  * Allocate a vmspace structure, including a vm_map and pmap,
 242  * and initialize those structures.  The refcnt is set to 1.
 243  * The remaining fields must be initialized by the caller.
 244  */
 245 struct vmspace *
 246 vmspace_alloc(min, max)
 247         vm_offset_t min, max;
 248 {
 249         struct vmspace *vm;
 250
 251         GIANT_REQUIRED;
 252         vm = uma_zalloc(vmspace_zone, M_WAITOK);
 253         CTR1(KTR_VM, "vmspace_alloc: %p", vm);
 254         _vm_map_init(&vm->vm_map, min, max);
 255         pmap_pinit(vmspace_pmap(vm));
 256         vm->vm_map.pmap = vmspace_pmap(vm);             /* XXX */
 257         vm->vm_refcnt = 1;
 258         vm->vm_shm = NULL;
 259         vm->vm_freer = NULL;
 260         return (vm);
 261 }
 262
 263 void
 264 vm_init2(void)
 265 {
 266         uma_zone_set_obj(kmapentzone, &kmapentobj, lmin(cnt.v_page_count,
 267             (VM_MAX_KERNEL_ADDRESS - KERNBASE) / PAGE_SIZE) / 8);
 268         vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
 269 #ifdef INVARIANTS
 270             vmspace_zdtor,
 271 #else
 272             NULL,
 273 #endif
 274             vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 275         pmap_init2();
 276         vm_object_init2();
 277 }
 278
 279 static __inline void
 280 vmspace_dofree(struct vmspace *vm)
 281 {
 282         CTR1(KTR_VM, "vmspace_free: %p", vm);
 283         /*
 284          * Lock the map, to wait out all other references to it.
 285          * Delete all of the mappings and pages they hold, then call
 286          * the pmap module to reclaim anything left.
 287          */
 288         vm_map_lock(&vm->vm_map);
 289         (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
 290             vm->vm_map.max_offset);
 291         vm_map_unlock(&vm->vm_map);
 292
 293         pmap_release(vmspace_pmap(vm));
 294         uma_zfree(vmspace_zone, vm);
 295 }
 296
 297 void
 298 vmspace_free(struct vmspace *vm)
 299 {
 300         GIANT_REQUIRED;
 301
 302         if (vm->vm_refcnt == 0)
 303                 panic("vmspace_free: attempt to free already freed vmspace");
 304
 305         if (--vm->vm_refcnt == 0)
 306                 vmspace_dofree(vm);
 307 }
 308
 309 void
 310 vmspace_exitfree(struct proc *p)
 311 {
 312         struct vmspace *vm;
 313
 314         GIANT_REQUIRED;
 315         if (p == p->p_vmspace->vm_freer) {
 316                 vm = p->p_vmspace;
 317                 p->p_vmspace = NULL;
 318                 vmspace_dofree(vm);
 319         }
 320 }
 321
 322 /*
 323  * vmspace_swap_count() - count the approximate swap useage in pages for a
 324  *                        vmspace.
 325  *
 326  *      Swap useage is determined by taking the proportional swap used by
 327  *      VM objects backing the VM map.  To make up for fractional losses,
 328  *      if the VM object has any swap use at all the associated map entries
 329  *      count for at least 1 swap page.
 330  */
 331 int
 332 vmspace_swap_count(struct vmspace *vmspace)
 333 {
 334         vm_map_t map = &vmspace->vm_map;
 335         vm_map_entry_t cur;
 336         int count = 0;
 337
 338         vm_map_lock_read(map);
 339         for (cur = map->header.next; cur != &map->header; cur = cur->next) {
 340                 vm_object_t object;
 341
 342                 if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
 343                     (object = cur->object.vm_object) != NULL &&
 344                     object->type == OBJT_SWAP
 345                 ) {
 346                         int n = (cur->end - cur->start) / PAGE_SIZE;
 347
 348                         if (object->un_pager.swp.swp_bcount) {
 349                                 count += object->un_pager.swp.swp_bcount *
 350                                     SWAP_META_PAGES * n / object->size + 1;
 351                         }
 352                 }
 353         }
 354         vm_map_unlock_read(map);
 355         return (count);
 356 }
 357
 358 void
 359 _vm_map_lock(vm_map_t map, const char *file, int line)
 360 {
 361         int error;
 362
 363         if (map->system_map)
 364                 GIANT_REQUIRED;
 365         error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
 366         KASSERT(error == 0, ("%s: failed to get lock", __func__));
 367         map->timestamp++;
 368 }
 369
 370 void
 371 _vm_map_unlock(vm_map_t map, const char *file, int line)
 372 {
 373
 374         lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
 375 }
 376
 377 void
 378 _vm_map_lock_read(vm_map_t map, const char *file, int line)
 379 {
 380         int error;
 381
 382         if (map->system_map)
 383                 GIANT_REQUIRED;
 384         error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
 385         KASSERT(error == 0, ("%s: failed to get lock", __func__));
 386 }
 387
 388 void
 389 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
 390 {
 391
 392         lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
 393 }
 394
 395 int
 396 _vm_map_trylock(vm_map_t map, const char *file, int line)
 397 {
 398         int error;
 399
 400         if (map->system_map)
 401                 GIANT_REQUIRED;
 402         error = lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, curthread);
 403         return (error == 0);
 404 }
 405
 406 int
 407 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
 408 {
 409
 410         KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
 411                 ("%s: lock not held", __func__));
 412         map->timestamp++;
 413         return (0);
 414 }
 415
 416 void
 417 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
 418 {
 419
 420         KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
 421                 ("%s: lock not held", __func__));
 422 }
 423
 424 /*
 425  *      vm_map_unlock_and_wait:
 426  */
 427 int
 428 vm_map_unlock_and_wait(vm_map_t map, boolean_t user_wait)
 429 {
 430         int retval;
 431
 432         mtx_lock(&Giant);
 433         vm_map_unlock(map);
 434         retval = tsleep(&map->root, PVM, "vmmapw", 0);
 435         mtx_unlock(&Giant);
 436         return (retval);
 437 }
 438
 439 /*
 440  *      vm_map_wakeup:
 441  */
 442 void
 443 vm_map_wakeup(vm_map_t map)
 444 {
 445
 446         /*
 447          * Acquire and release Giant to prevent a wakeup() from being
 448          * performed (and lost) between the vm_map_unlock() and the
 449          * tsleep() in vm_map_unlock_and_wait().
 450          */
 451         mtx_lock(&Giant);
 452         mtx_unlock(&Giant);
 453         wakeup(&map->root);
 454 }
 455
 456 long
 457 vmspace_resident_count(struct vmspace *vmspace)
 458 {
 459         return pmap_resident_count(vmspace_pmap(vmspace));
 460 }
 461
 462 /*
 463  *      vm_map_create:
 464  *
 465  *      Creates and returns a new empty VM map with
 466  *      the given physical map structure, and having
 467  *      the given lower and upper address bounds.
 468  */
 469 vm_map_t
 470 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
 471 {
 472         vm_map_t result;
 473
 474         result = uma_zalloc(mapzone, M_WAITOK);
 475         CTR1(KTR_VM, "vm_map_create: %p", result);
 476         _vm_map_init(result, min, max);
 477         result->pmap = pmap;
 478         return (result);
 479 }
 480
 481 /*
 482  * Initialize an existing vm_map structure
 483  * such as that in the vmspace structure.
 484  * The pmap is set elsewhere.
 485  */
 486 static void
 487 _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
 488 {
 489
 490         map->header.next = map->header.prev = &map->header;
 491         map->needs_wakeup = FALSE;
 492         map->system_map = 0;
 493         map->min_offset = min;
 494         map->max_offset = max;
 495         map->first_free = &map->header;
 496         map->root = NULL;
 497         map->timestamp = 0;
 498 }
 499
 500 void
 501 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
 502 {
 503         _vm_map_init(map, min, max);
 504         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
 505 }
 506
 507 /*
 508  *      vm_map_entry_dispose:   [ internal use only ]
 509  *
 510  *      Inverse of vm_map_entry_create.
 511  */
 512 static void
 513 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
 514 {
 515         uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
 516 }
 517
 518 /*
 519  *      vm_map_entry_create:    [ internal use only ]
 520  *
 521  *      Allocates a VM map entry for insertion.
 522  *      No entry fields are filled in.
 523  */
 524 static vm_map_entry_t
 525 vm_map_entry_create(vm_map_t map)
 526 {
 527         vm_map_entry_t new_entry;
 528
 529         if (map->system_map)
 530                 new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
 531         else
 532                 new_entry = uma_zalloc(mapentzone, M_WAITOK);
 533         if (new_entry == NULL)
 534                 panic("vm_map_entry_create: kernel resources exhausted");
 535         return (new_entry);
 536 }
 537
 538 /*
 539  *      vm_map_entry_set_behavior:
 540  *
 541  *      Set the expected access behavior, either normal, random, or
 542  *      sequential.
 543  */
 544 static __inline void
 545 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
 546 {
 547         entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
 548             (behavior & MAP_ENTRY_BEHAV_MASK);
 549 }
 550
 551 /*
 552  *      vm_map_entry_splay:
 553  *
 554  *      Implements Sleator and Tarjan's top-down splay algorithm.  Returns
 555  *      the vm_map_entry containing the given address.  If, however, that
 556  *      address is not found in the vm_map, returns a vm_map_entry that is
 557  *      adjacent to the address, coming before or after it.
 558  */
 559 static vm_map_entry_t
 560 vm_map_entry_splay(vm_offset_t address, vm_map_entry_t root)
 561 {
 562         struct vm_map_entry dummy;
 563         vm_map_entry_t lefttreemax, righttreemin, y;
 564
 565         if (root == NULL)
 566                 return (root);
 567         lefttreemax = righttreemin = &dummy;
 568         for (;; root = y) {
 569                 if (address < root->start) {
 570                         if ((y = root->left) == NULL)
 571                                 break;
 572                         if (address < y->start) {
 573                                 /* Rotate right. */
 574                                 root->left = y->right;
 575                                 y->right = root;
 576                                 root = y;
 577                                 if ((y = root->left) == NULL)
 578                                         break;
 579                         }
 580                         /* Link into the new root's right tree. */
 581                         righttreemin->left = root;
 582                         righttreemin = root;
 583                 } else if (address >= root->end) {
 584                         if ((y = root->right) == NULL)
 585                                 break;
 586                         if (address >= y->end) {
 587                                 /* Rotate left. */
 588                                 root->right = y->left;
 589                                 y->left = root;
 590                                 root = y;
 591                                 if ((y = root->right) == NULL)
 592                                         break;
 593                         }
 594                         /* Link into the new root's left tree. */
 595                         lefttreemax->right = root;
 596                         lefttreemax = root;
 597                 } else
 598                         break;
 599         }
 600         /* Assemble the new root. */
 601         lefttreemax->right = root->left;
 602         righttreemin->left = root->right;
 603         root->left = dummy.right;
 604         root->right = dummy.left;
 605         return (root);
 606 }
 607
 608 /*
 609  *      vm_map_entry_{un,}link:
 610  *
 611  *      Insert/remove entries from maps.
 612  */
 613 static void
 614 vm_map_entry_link(vm_map_t map,
 615                   vm_map_entry_t after_where,
 616                   vm_map_entry_t entry)
 617 {
 618
 619         CTR4(KTR_VM,
 620             "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
 621             map->nentries, entry, after_where);
 622         map->nentries++;
 623         entry->prev = after_where;
 624         entry->next = after_where->next;
 625         entry->next->prev = entry;
 626         after_where->next = entry;
 627
 628         if (after_where != &map->header) {
 629                 if (after_where != map->root)
 630                         vm_map_entry_splay(after_where->start, map->root);
 631                 entry->right = after_where->right;
 632                 entry->left = after_where;
 633                 after_where->right = NULL;
 634         } else {
 635                 entry->right = map->root;
 636                 entry->left = NULL;
 637         }
 638         map->root = entry;
 639 }
 640
 641 static void
 642 vm_map_entry_unlink(vm_map_t map,
 643                     vm_map_entry_t entry)
 644 {
 645         vm_map_entry_t next, prev, root;
 646
 647         if (entry != map->root)
 648                 vm_map_entry_splay(entry->start, map->root);
 649         if (entry->left == NULL)
 650                 root = entry->right;
 651         else {
 652                 root = vm_map_entry_splay(entry->start, entry->left);
 653                 root->right = entry->right;
 654         }
 655         map->root = root;
 656
 657         prev = entry->prev;
 658         next = entry->next;
 659         next->prev = prev;
 660         prev->next = next;
 661         map->nentries--;
 662         CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
 663             map->nentries, entry);
 664 }
 665
 666 /*
 667  *      vm_map_lookup_entry:    [ internal use only ]
 668  *
 669  *      Finds the map entry containing (or
 670  *      immediately preceding) the specified address
 671  *      in the given map; the entry is returned
 672  *      in the "entry" parameter.  The boolean
 673  *      result indicates whether the address is
 674  *      actually contained in the map.
 675  */
 676 boolean_t
 677 vm_map_lookup_entry(
 678         vm_map_t map,
 679         vm_offset_t address,
 680         vm_map_entry_t *entry)  /* OUT */
 681 {
 682         vm_map_entry_t cur;
 683
 684         cur = vm_map_entry_splay(address, map->root);
 685         if (cur == NULL)
 686                 *entry = &map->header;
 687         else {
 688                 map->root = cur;
 689
 690                 if (address >= cur->start) {
 691                         *entry = cur;
 692                         if (cur->end > address)
 693                                 return (TRUE);
 694                 } else
 695                         *entry = cur->prev;
 696         }
 697         return (FALSE);
 698 }
 699
 700 /*
 701  *      vm_map_insert:
 702  *
 703  *      Inserts the given whole VM object into the target
 704  *      map at the specified address range.  The object's
 705  *      size should match that of the address range.
 706  *
 707  *      Requires that the map be locked, and leaves it so.
 708  *
 709  *      If object is non-NULL, ref count must be bumped by caller
 710  *      prior to making call to account for the new entry.
 711  */
 712 int
 713 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
 714               vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
 715               int cow)
 716 {
 717         vm_map_entry_t new_entry;
 718         vm_map_entry_t prev_entry;
 719         vm_map_entry_t temp_entry;
 720         vm_eflags_t protoeflags;
 721
 722         /*
 723          * Check that the start and end points are not bogus.
 724          */
 725         if ((start < map->min_offset) || (end > map->max_offset) ||
 726             (start >= end))
 727                 return (KERN_INVALID_ADDRESS);
 728
 729         /*
 730          * Find the entry prior to the proposed starting address; if it's part
 731          * of an existing entry, this range is bogus.
 732          */
 733         if (vm_map_lookup_entry(map, start, &temp_entry))
 734                 return (KERN_NO_SPACE);
 735
 736         prev_entry = temp_entry;
 737
 738         /*
 739          * Assert that the next entry doesn't overlap the end point.
 740          */
 741         if ((prev_entry->next != &map->header) &&
 742             (prev_entry->next->start < end))
 743                 return (KERN_NO_SPACE);
 744
 745         protoeflags = 0;
 746
 747         if (cow & MAP_COPY_ON_WRITE)
 748                 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
 749
 750         if (cow & MAP_NOFAULT) {
 751                 protoeflags |= MAP_ENTRY_NOFAULT;
 752
 753                 KASSERT(object == NULL,
 754                         ("vm_map_insert: paradoxical MAP_NOFAULT request"));
 755         }
 756         if (cow & MAP_DISABLE_SYNCER)
 757                 protoeflags |= MAP_ENTRY_NOSYNC;
 758         if (cow & MAP_DISABLE_COREDUMP)
 759                 protoeflags |= MAP_ENTRY_NOCOREDUMP;
 760
 761         if (object) {
 762                 /*
 763                  * When object is non-NULL, it could be shared with another
 764                  * process.  We have to set or clear OBJ_ONEMAPPING
 765                  * appropriately.
 766                  */
 767                 mtx_lock(&Giant);
 768                 if ((object->ref_count > 1) || (object->shadow_count != 0)) {
 769                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
 770                 }
 771                 mtx_unlock(&Giant);
 772         }
 773         else if ((prev_entry != &map->header) &&
 774                  (prev_entry->eflags == protoeflags) &&
 775                  (prev_entry->end == start) &&
 776                  (prev_entry->wired_count == 0) &&
 777                  ((prev_entry->object.vm_object == NULL) ||
 778                   vm_object_coalesce(prev_entry->object.vm_object,
 779                                      OFF_TO_IDX(prev_entry->offset),
 780                                      (vm_size_t)(prev_entry->end - prev_entry->start),
 781                                      (vm_size_t)(end - prev_entry->end)))) {
 782                 /*
 783                  * We were able to extend the object.  Determine if we
 784                  * can extend the previous map entry to include the
 785                  * new range as well.
 786                  */
 787                 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
 788                     (prev_entry->protection == prot) &&
 789                     (prev_entry->max_protection == max)) {
 790                         map->size += (end - prev_entry->end);
 791                         prev_entry->end = end;
 792                         vm_map_simplify_entry(map, prev_entry);
 793                         return (KERN_SUCCESS);
 794                 }
 795
 796                 /*
 797                  * If we can extend the object but cannot extend the
 798                  * map entry, we have to create a new map entry.  We
 799                  * must bump the ref count on the extended object to
 800                  * account for it.  object may be NULL.
 801                  */
 802                 object = prev_entry->object.vm_object;
 803                 offset = prev_entry->offset +
 804                         (prev_entry->end - prev_entry->start);
 805                 vm_object_reference(object);
 806         }
 807
 808         /*
 809          * NOTE: if conditionals fail, object can be NULL here.  This occurs
 810          * in things like the buffer map where we manage kva but do not manage
 811          * backing objects.
 812          */
 813
 814         /*
 815          * Create a new entry
 816          */
 817         new_entry = vm_map_entry_create(map);
 818         new_entry->start = start;
 819         new_entry->end = end;
 820
 821         new_entry->eflags = protoeflags;
 822         new_entry->object.vm_object = object;
 823         new_entry->offset = offset;
 824         new_entry->avail_ssize = 0;
 825
 826         new_entry->inheritance = VM_INHERIT_DEFAULT;
 827         new_entry->protection = prot;
 828         new_entry->max_protection = max;
 829         new_entry->wired_count = 0;
 830
 831         /*
 832          * Insert the new entry into the list
 833          */
 834         vm_map_entry_link(map, prev_entry, new_entry);
 835         map->size += new_entry->end - new_entry->start;
 836
 837         /*
 838          * Update the free space hint
 839          */
 840         if ((map->first_free == prev_entry) &&
 841             (prev_entry->end >= new_entry->start)) {
 842                 map->first_free = new_entry;
 843         }
 844
 845 #if 0
 846         /*
 847          * Temporarily removed to avoid MAP_STACK panic, due to
 848          * MAP_STACK being a huge hack.  Will be added back in
 849          * when MAP_STACK (and the user stack mapping) is fixed.
 850          */
 851         /*
 852          * It may be possible to simplify the entry
 853          */
 854         vm_map_simplify_entry(map, new_entry);
 855 #endif
 856
 857         if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
 858                 mtx_lock(&Giant);
 859                 pmap_object_init_pt(map->pmap, start,
 860                                     object, OFF_TO_IDX(offset), end - start,
 861                                     cow & MAP_PREFAULT_PARTIAL);
 862                 mtx_unlock(&Giant);
 863         }
 864
 865         return (KERN_SUCCESS);
 866 }
 867
 868 /*
 869  * Find sufficient space for `length' bytes in the given map, starting at
 870  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
 871  */
 872 int
 873 vm_map_findspace(
 874         vm_map_t map,
 875         vm_offset_t start,
 876         vm_size_t length,
 877         vm_offset_t *addr)
 878 {
 879         vm_map_entry_t entry, next;
 880         vm_offset_t end;
 881
 882         if (start < map->min_offset)
 883                 start = map->min_offset;
 884         if (start > map->max_offset)
 885                 return (1);
 886
 887         /*
 888          * Look for the first possible address; if there's already something
 889          * at this address, we have to start after it.
 890          */
 891         if (start == map->min_offset) {
 892                 if ((entry = map->first_free) != &map->header)
 893                         start = entry->end;
 894         } else {
 895                 vm_map_entry_t tmp;
 896
 897                 if (vm_map_lookup_entry(map, start, &tmp))
 898                         start = tmp->end;
 899                 entry = tmp;
 900         }
 901
 902         /*
 903          * Look through the rest of the map, trying to fit a new region in the
 904          * gap between existing regions, or after the very last region.
 905          */
 906         for (;; start = (entry = next)->end) {
 907                 /*
 908                  * Find the end of the proposed new region.  Be sure we didn't
 909                  * go beyond the end of the map, or wrap around the address;
 910                  * if so, we lose.  Otherwise, if this is the last entry, or
 911                  * if the proposed new region fits before the next entry, we
 912                  * win.
 913                  */
 914                 end = start + length;
 915                 if (end > map->max_offset || end < start)
 916                         return (1);
 917                 next = entry->next;
 918                 if (next == &map->header || next->start >= end)
 919                         break;
 920         }
 921         *addr = start;
 922         if (map == kernel_map) {
 923                 vm_offset_t ksize;
 924                 if ((ksize = round_page(start + length)) > kernel_vm_end) {
 925                         mtx_lock(&Giant);
 926                         pmap_growkernel(ksize);
 927                         mtx_unlock(&Giant);
 928                 }
 929         }
 930         return (0);
 931 }
 932
 933 /*
 934  *      vm_map_find finds an unallocated region in the target address
 935  *      map with the given length.  The search is defined to be
 936  *      first-fit from the specified address; the region found is
 937  *      returned in the same parameter.
 938  *
 939  *      If object is non-NULL, ref count must be bumped by caller
 940  *      prior to making call to account for the new entry.
 941  */
 942 int
 943 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
 944             vm_offset_t *addr,  /* IN/OUT */
 945             vm_size_t length, boolean_t find_space, vm_prot_t prot,
 946             vm_prot_t max, int cow)
 947 {
 948         vm_offset_t start;
 949         int result, s = 0;
 950
 951         start = *addr;
 952
 953         if (map == kmem_map)
 954                 s = splvm();
 955
 956         vm_map_lock(map);
 957         if (find_space) {
 958                 if (vm_map_findspace(map, start, length, addr)) {
 959                         vm_map_unlock(map);
 960                         if (map == kmem_map)
 961                                 splx(s);
 962                         return (KERN_NO_SPACE);
 963                 }
 964                 start = *addr;
 965         }
 966         result = vm_map_insert(map, object, offset,
 967                 start, start + length, prot, max, cow);
 968         vm_map_unlock(map);
 969
 970         if (map == kmem_map)
 971                 splx(s);
 972
 973         return (result);
 974 }
 975
 976 /*
 977  *      vm_map_simplify_entry:
 978  *
 979  *      Simplify the given map entry by merging with either neighbor.  This
 980  *      routine also has the ability to merge with both neighbors.
 981  *
 982  *      The map must be locked.
 983  *
 984  *      This routine guarentees that the passed entry remains valid (though
 985  *      possibly extended).  When merging, this routine may delete one or
 986  *      both neighbors.
 987  */
 988 void
 989 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
 990 {
 991         vm_map_entry_t next, prev;
 992         vm_size_t prevsize, esize;
 993
 994         if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP))
 995                 return;
 996
 997         prev = entry->prev;
 998         if (prev != &map->header) {
 999                 prevsize = prev->end - prev->start;
1000                 if ( (prev->end == entry->start) &&
1001                      (prev->object.vm_object == entry->object.vm_object) &&
1002                      (!prev->object.vm_object ||
1003                         (prev->offset + prevsize == entry->offset)) &&
1004                      (prev->eflags == entry->eflags) &&
1005                      (prev->protection == entry->protection) &&
1006                      (prev->max_protection == entry->max_protection) &&
1007                      (prev->inheritance == entry->inheritance) &&
1008                      (prev->wired_count == entry->wired_count)) {
1009                         if (map->first_free == prev)
1010                                 map->first_free = entry;
1011                         vm_map_entry_unlink(map, prev);
1012                         entry->start = prev->start;
1013                         entry->offset = prev->offset;
1014                         if (prev->object.vm_object)
1015                                 vm_object_deallocate(prev->object.vm_object);
1016                         vm_map_entry_dispose(map, prev);
1017                 }
1018         }
1019
1020         next = entry->next;
1021         if (next != &map->header) {
1022                 esize = entry->end - entry->start;
1023                 if ((entry->end == next->start) &&
1024                     (next->object.vm_object == entry->object.vm_object) &&
1025                      (!entry->object.vm_object ||
1026                         (entry->offset + esize == next->offset)) &&
1027                     (next->eflags == entry->eflags) &&
1028                     (next->protection == entry->protection) &&
1029                     (next->max_protection == entry->max_protection) &&
1030                     (next->inheritance == entry->inheritance) &&
1031                     (next->wired_count == entry->wired_count)) {
1032                         if (map->first_free == next)
1033                                 map->first_free = entry;
1034                         vm_map_entry_unlink(map, next);
1035                         entry->end = next->end;
1036                         if (next->object.vm_object)
1037                                 vm_object_deallocate(next->object.vm_object);
1038                         vm_map_entry_dispose(map, next);
1039                 }
1040         }
1041 }
1042 /*
1043  *      vm_map_clip_start:      [ internal use only ]
1044  *
1045  *      Asserts that the given entry begins at or after
1046  *      the specified address; if necessary,
1047  *      it splits the entry into two.
1048  */
1049 #define vm_map_clip_start(map, entry, startaddr) \
1050 { \
1051         if (startaddr > entry->start) \
1052                 _vm_map_clip_start(map, entry, startaddr); \
1053 }
1054
1055 /*
1056  *      This routine is called only when it is known that
1057  *      the entry must be split.
1058  */
1059 static void
1060 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
1061 {
1062         vm_map_entry_t new_entry;
1063
1064         /*
1065          * Split off the front portion -- note that we must insert the new
1066          * entry BEFORE this one, so that this entry has the specified
1067          * starting address.
1068          */
1069         vm_map_simplify_entry(map, entry);
1070
1071         /*
1072          * If there is no object backing this entry, we might as well create
1073          * one now.  If we defer it, an object can get created after the map
1074          * is clipped, and individual objects will be created for the split-up
1075          * map.  This is a bit of a hack, but is also about the best place to
1076          * put this improvement.
1077          */
1078         if (entry->object.vm_object == NULL && !map->system_map) {
1079                 vm_object_t object;
1080                 object = vm_object_allocate(OBJT_DEFAULT,
1081                                 atop(entry->end - entry->start));
1082                 entry->object.vm_object = object;
1083                 entry->offset = 0;
1084         }
1085
1086         new_entry = vm_map_entry_create(map);
1087         *new_entry = *entry;
1088
1089         new_entry->end = start;
1090         entry->offset += (start - entry->start);
1091         entry->start = start;
1092
1093         vm_map_entry_link(map, entry->prev, new_entry);
1094
1095         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1096                 vm_object_reference(new_entry->object.vm_object);
1097         }
1098 }
1099
1100 /*
1101  *      vm_map_clip_end:        [ internal use only ]
1102  *
1103  *      Asserts that the given entry ends at or before
1104  *      the specified address; if necessary,
1105  *      it splits the entry into two.
1106  */
1107 #define vm_map_clip_end(map, entry, endaddr) \
1108 { \
1109         if (endaddr < entry->end) \
1110                 _vm_map_clip_end(map, entry, endaddr); \
1111 }
1112
1113 /*
1114  *      This routine is called only when it is known that
1115  *      the entry must be split.
1116  */
1117 static void
1118 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1119 {
1120         vm_map_entry_t new_entry;
1121
1122         /*
1123          * If there is no object backing this entry, we might as well create
1124          * one now.  If we defer it, an object can get created after the map
1125          * is clipped, and individual objects will be created for the split-up
1126          * map.  This is a bit of a hack, but is also about the best place to
1127          * put this improvement.
1128          */
1129         if (entry->object.vm_object == NULL && !map->system_map) {
1130                 vm_object_t object;
1131                 object = vm_object_allocate(OBJT_DEFAULT,
1132                                 atop(entry->end - entry->start));
1133                 entry->object.vm_object = object;
1134                 entry->offset = 0;
1135         }
1136
1137         /*
1138          * Create a new entry and insert it AFTER the specified entry
1139          */
1140         new_entry = vm_map_entry_create(map);
1141         *new_entry = *entry;
1142
1143         new_entry->start = entry->end = end;
1144         new_entry->offset += (end - entry->start);
1145
1146         vm_map_entry_link(map, entry, new_entry);
1147
1148         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1149                 vm_object_reference(new_entry->object.vm_object);
1150         }
1151 }
1152
1153 /*
1154  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
1155  *
1156  *      Asserts that the starting and ending region
1157  *      addresses fall within the valid range of the map.
1158  */
1159 #define VM_MAP_RANGE_CHECK(map, start, end)             \
1160                 {                                       \
1161                 if (start < vm_map_min(map))            \
1162                         start = vm_map_min(map);        \
1163                 if (end > vm_map_max(map))              \
1164                         end = vm_map_max(map);          \
1165                 if (start > end)                        \
1166                         start = end;                    \
1167                 }
1168
1169 /*
1170  *      vm_map_submap:          [ kernel use only ]
1171  *
1172  *      Mark the given range as handled by a subordinate map.
1173  *
1174  *      This range must have been created with vm_map_find,
1175  *      and no other operations may have been performed on this
1176  *      range prior to calling vm_map_submap.
1177  *
1178  *      Only a limited number of operations can be performed
1179  *      within this rage after calling vm_map_submap:
1180  *              vm_fault
1181  *      [Don't try vm_map_copy!]
1182  *
1183  *      To remove a submapping, one must first remove the
1184  *      range from the superior map, and then destroy the
1185  *      submap (if desired).  [Better yet, don't try it.]
1186  */
1187 int
1188 vm_map_submap(
1189         vm_map_t map,
1190         vm_offset_t start,
1191         vm_offset_t end,
1192         vm_map_t submap)
1193 {
1194         vm_map_entry_t entry;
1195         int result = KERN_INVALID_ARGUMENT;
1196
1197         vm_map_lock(map);
1198
1199         VM_MAP_RANGE_CHECK(map, start, end);
1200
1201         if (vm_map_lookup_entry(map, start, &entry)) {
1202                 vm_map_clip_start(map, entry, start);
1203         } else
1204                 entry = entry->next;
1205
1206         vm_map_clip_end(map, entry, end);
1207
1208         if ((entry->start == start) && (entry->end == end) &&
1209             ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1210             (entry->object.vm_object == NULL)) {
1211                 entry->object.sub_map = submap;
1212                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1213                 result = KERN_SUCCESS;
1214         }
1215         vm_map_unlock(map);
1216
1217         return (result);
1218 }
1219
1220 /*
1221  *      vm_map_protect:
1222  *
1223  *      Sets the protection of the specified address
1224  *      region in the target map.  If "set_max" is
1225  *      specified, the maximum protection is to be set;
1226  *      otherwise, only the current protection is affected.
1227  */
1228 int
1229 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1230                vm_prot_t new_prot, boolean_t set_max)
1231 {
1232         vm_map_entry_t current;
1233         vm_map_entry_t entry;
1234
1235         vm_map_lock(map);
1236
1237         VM_MAP_RANGE_CHECK(map, start, end);
1238
1239         if (vm_map_lookup_entry(map, start, &entry)) {
1240                 vm_map_clip_start(map, entry, start);
1241         } else {
1242                 entry = entry->next;
1243         }
1244
1245         /*
1246          * Make a first pass to check for protection violations.
1247          */
1248         current = entry;
1249         while ((current != &map->header) && (current->start < end)) {
1250                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1251                         vm_map_unlock(map);
1252                         return (KERN_INVALID_ARGUMENT);
1253                 }
1254                 if ((new_prot & current->max_protection) != new_prot) {
1255                         vm_map_unlock(map);
1256                         return (KERN_PROTECTION_FAILURE);
1257                 }
1258                 current = current->next;
1259         }
1260
1261         /*
1262          * Go back and fix up protections. [Note that clipping is not
1263          * necessary the second time.]
1264          */
1265         current = entry;
1266         while ((current != &map->header) && (current->start < end)) {
1267                 vm_prot_t old_prot;
1268
1269                 vm_map_clip_end(map, current, end);
1270
1271                 old_prot = current->protection;
1272                 if (set_max)
1273                         current->protection =
1274                             (current->max_protection = new_prot) &
1275                             old_prot;
1276                 else
1277                         current->protection = new_prot;
1278
1279                 /*
1280                  * Update physical map if necessary. Worry about copy-on-write
1281                  * here -- CHECK THIS XXX
1282                  */
1283                 if (current->protection != old_prot) {
1284                         mtx_lock(&Giant);
1285 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1286                                                         VM_PROT_ALL)
1287                         pmap_protect(map->pmap, current->start,
1288                             current->end,
1289                             current->protection & MASK(current));
1290 #undef  MASK
1291                         mtx_unlock(&Giant);
1292                 }
1293                 vm_map_simplify_entry(map, current);
1294                 current = current->next;
1295         }
1296         vm_map_unlock(map);
1297         return (KERN_SUCCESS);
1298 }
1299
1300 /*
1301  *      vm_map_madvise:
1302  *
1303  *      This routine traverses a processes map handling the madvise
1304  *      system call.  Advisories are classified as either those effecting
1305  *      the vm_map_entry structure, or those effecting the underlying
1306  *      objects.
1307  */
1308 int
1309 vm_map_madvise(
1310         vm_map_t map,
1311         vm_offset_t start,
1312         vm_offset_t end,
1313         int behav)
1314 {
1315         vm_map_entry_t current, entry;
1316         int modify_map = 0;
1317
1318         /*
1319          * Some madvise calls directly modify the vm_map_entry, in which case
1320          * we need to use an exclusive lock on the map and we need to perform
1321          * various clipping operations.  Otherwise we only need a read-lock
1322          * on the map.
1323          */
1324         switch(behav) {
1325         case MADV_NORMAL:
1326         case MADV_SEQUENTIAL:
1327         case MADV_RANDOM:
1328         case MADV_NOSYNC:
1329         case MADV_AUTOSYNC:
1330         case MADV_NOCORE:
1331         case MADV_CORE:
1332                 modify_map = 1;
1333                 vm_map_lock(map);
1334                 break;
1335         case MADV_WILLNEED:
1336         case MADV_DONTNEED:
1337         case MADV_FREE:
1338                 vm_map_lock_read(map);
1339                 break;
1340         default:
1341                 return (KERN_INVALID_ARGUMENT);
1342         }
1343
1344         /*
1345          * Locate starting entry and clip if necessary.
1346          */
1347         VM_MAP_RANGE_CHECK(map, start, end);
1348
1349         if (vm_map_lookup_entry(map, start, &entry)) {
1350                 if (modify_map)
1351                         vm_map_clip_start(map, entry, start);
1352         } else {
1353                 entry = entry->next;
1354         }
1355
1356         if (modify_map) {
1357                 /*
1358                  * madvise behaviors that are implemented in the vm_map_entry.
1359                  *
1360                  * We clip the vm_map_entry so that behavioral changes are
1361                  * limited to the specified address range.
1362                  */
1363                 for (current = entry;
1364                      (current != &map->header) && (current->start < end);
1365                      current = current->next
1366                 ) {
1367                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1368                                 continue;
1369
1370                         vm_map_clip_end(map, current, end);
1371
1372                         switch (behav) {
1373                         case MADV_NORMAL:
1374                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1375                                 break;
1376                         case MADV_SEQUENTIAL:
1377                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1378                                 break;
1379                         case MADV_RANDOM:
1380                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1381                                 break;
1382                         case MADV_NOSYNC:
1383                                 current->eflags |= MAP_ENTRY_NOSYNC;
1384                                 break;
1385                         case MADV_AUTOSYNC:
1386                                 current->eflags &= ~MAP_ENTRY_NOSYNC;
1387                                 break;
1388                         case MADV_NOCORE:
1389                                 current->eflags |= MAP_ENTRY_NOCOREDUMP;
1390                                 break;
1391                         case MADV_CORE:
1392                                 current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1393                                 break;
1394                         default:
1395                                 break;
1396                         }
1397                         vm_map_simplify_entry(map, current);
1398                 }
1399                 vm_map_unlock(map);
1400         } else {
1401                 vm_pindex_t pindex;
1402                 int count;
1403
1404                 /*
1405                  * madvise behaviors that are implemented in the underlying
1406                  * vm_object.
1407                  *
1408                  * Since we don't clip the vm_map_entry, we have to clip
1409                  * the vm_object pindex and count.
1410                  */
1411                 for (current = entry;
1412                      (current != &map->header) && (current->start < end);
1413                      current = current->next
1414                 ) {
1415                         vm_offset_t useStart;
1416
1417                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1418                                 continue;
1419
1420                         pindex = OFF_TO_IDX(current->offset);
1421                         count = atop(current->end - current->start);
1422                         useStart = current->start;
1423
1424                         if (current->start < start) {
1425                                 pindex += atop(start - current->start);
1426                                 count -= atop(start - current->start);
1427                                 useStart = start;
1428                         }
1429                         if (current->end > end)
1430                                 count -= atop(current->end - end);
1431
1432                         if (count <= 0)
1433                                 continue;
1434
1435                         vm_object_madvise(current->object.vm_object,
1436                                           pindex, count, behav);
1437                         if (behav == MADV_WILLNEED) {
1438                                 mtx_lock(&Giant);
1439                                 pmap_object_init_pt(
1440                                     map->pmap,
1441                                     useStart,
1442                                     current->object.vm_object,
1443                                     pindex,
1444                                     (count << PAGE_SHIFT),
1445                                     MAP_PREFAULT_MADVISE
1446                                 );
1447                                 mtx_unlock(&Giant);
1448                         }
1449                 }
1450                 vm_map_unlock_read(map);
1451         }
1452         return (0);
1453 }
1454
1455
1456 /*
1457  *      vm_map_inherit:
1458  *
1459  *      Sets the inheritance of the specified address
1460  *      range in the target map.  Inheritance
1461  *      affects how the map will be shared with
1462  *      child maps at the time of vm_map_fork.
1463  */
1464 int
1465 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1466                vm_inherit_t new_inheritance)
1467 {
1468         vm_map_entry_t entry;
1469         vm_map_entry_t temp_entry;
1470
1471         switch (new_inheritance) {
1472         case VM_INHERIT_NONE:
1473         case VM_INHERIT_COPY:
1474         case VM_INHERIT_SHARE:
1475                 break;
1476         default:
1477                 return (KERN_INVALID_ARGUMENT);
1478         }
1479         vm_map_lock(map);
1480         VM_MAP_RANGE_CHECK(map, start, end);
1481         if (vm_map_lookup_entry(map, start, &temp_entry)) {
1482                 entry = temp_entry;
1483                 vm_map_clip_start(map, entry, start);
1484         } else
1485                 entry = temp_entry->next;
1486         while ((entry != &map->header) && (entry->start < end)) {
1487                 vm_map_clip_end(map, entry, end);
1488                 entry->inheritance = new_inheritance;
1489                 vm_map_simplify_entry(map, entry);
1490                 entry = entry->next;
1491         }
1492         vm_map_unlock(map);
1493         return (KERN_SUCCESS);
1494 }
1495
1496 /*
1497  *      vm_map_unwire:
1498  *
1499  *      Implements both kernel and user unwiring.
1500  */
1501 int
1502 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1503         boolean_t user_unwire)
1504 {
1505         vm_map_entry_t entry, first_entry, tmp_entry;
1506         vm_offset_t saved_start;
1507         unsigned int last_timestamp;
1508         int rv;
1509         boolean_t need_wakeup, result;
1510
1511         vm_map_lock(map);
1512         VM_MAP_RANGE_CHECK(map, start, end);
1513         if (!vm_map_lookup_entry(map, start, &first_entry)) {
1514                 vm_map_unlock(map);
1515                 return (KERN_INVALID_ADDRESS);
1516         }
1517         last_timestamp = map->timestamp;
1518         entry = first_entry;
1519         while (entry != &map->header && entry->start < end) {
1520                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1521                         /*
1522                          * We have not yet clipped the entry.
1523                          */
1524                         saved_start = (start >= entry->start) ? start :
1525                             entry->start;
1526                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1527                         if (vm_map_unlock_and_wait(map, user_unwire)) {
1528                                 /*
1529                                  * Allow interruption of user unwiring?
1530                                  */
1531                         }
1532                         vm_map_lock(map);
1533                         if (last_timestamp+1 != map->timestamp) {
1534                                 /*
1535                                  * Look again for the entry because the map was
1536                                  * modified while it was unlocked.
1537                                  * Specifically, the entry may have been
1538                                  * clipped, merged, or deleted.
1539                                  */
1540                                 if (!vm_map_lookup_entry(map, saved_start,
1541                                     &tmp_entry)) {
1542                                         if (saved_start == start) {
1543                                                 /*
1544                                                  * First_entry has been deleted.
1545                                                  */
1546                                                 vm_map_unlock(map);
1547                                                 return (KERN_INVALID_ADDRESS);
1548                                         }
1549                                         end = saved_start;
1550                                         rv = KERN_INVALID_ADDRESS;
1551                                         goto done;
1552                                 }
1553                                 if (entry == first_entry)
1554                                         first_entry = tmp_entry;
1555                                 else
1556                                         first_entry = NULL;
1557                                 entry = tmp_entry;
1558                         }
1559                         last_timestamp = map->timestamp;
1560                         continue;
1561                 }
1562                 vm_map_clip_start(map, entry, start);
1563                 vm_map_clip_end(map, entry, end);
1564                 /*
1565                  * Mark the entry in case the map lock is released.  (See
1566                  * above.)
1567                  */
1568                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1569                 /*
1570                  * Check the map for holes in the specified region.
1571                  */
1572                 if (entry->end < end && (entry->next == &map->header ||
1573                     entry->next->start > entry->end)) {
1574                         end = entry->end;
1575                         rv = KERN_INVALID_ADDRESS;
1576                         goto done;
1577                 }
1578                 /*
1579                  * Require that the entry is wired.
1580                  */
1581                 if (entry->wired_count == 0 || (user_unwire &&
1582                     (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)) {
1583                         end = entry->end;
1584                         rv = KERN_INVALID_ARGUMENT;
1585                         goto done;
1586                 }
1587                 entry = entry->next;
1588         }
1589         rv = KERN_SUCCESS;
1590 done:
1591         need_wakeup = FALSE;
1592         if (first_entry == NULL) {
1593                 result = vm_map_lookup_entry(map, start, &first_entry);
1594                 KASSERT(result, ("vm_map_unwire: lookup failed"));
1595         }
1596         entry = first_entry;
1597         while (entry != &map->header && entry->start < end) {
1598                 if (rv == KERN_SUCCESS) {
1599                         if (user_unwire)
1600                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1601                         entry->wired_count--;
1602                         if (entry->wired_count == 0) {
1603                                 /*
1604                                  * Retain the map lock.
1605                                  */
1606                                 vm_fault_unwire(map, entry->start, entry->end);
1607                         }
1608                 }
1609                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1610                         ("vm_map_unwire: in-transition flag missing"));
1611                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1612                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1613                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1614                         need_wakeup = TRUE;
1615                 }
1616                 vm_map_simplify_entry(map, entry);
1617                 entry = entry->next;
1618         }
1619         vm_map_unlock(map);
1620         if (need_wakeup)
1621                 vm_map_wakeup(map);
1622         return (rv);
1623 }
1624
1625 /*
1626  *      vm_map_wire:
1627  *
1628  *      Implements both kernel and user wiring.
1629  */
1630 int
1631 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1632         boolean_t user_wire)
1633 {
1634         vm_map_entry_t entry, first_entry, tmp_entry;
1635         vm_offset_t saved_end, saved_start;
1636         unsigned int last_timestamp;
1637         int rv;
1638         boolean_t need_wakeup, result;
1639
1640         vm_map_lock(map);
1641         VM_MAP_RANGE_CHECK(map, start, end);
1642         if (!vm_map_lookup_entry(map, start, &first_entry)) {
1643                 vm_map_unlock(map);
1644                 return (KERN_INVALID_ADDRESS);
1645         }
1646         last_timestamp = map->timestamp;
1647         entry = first_entry;
1648         while (entry != &map->header && entry->start < end) {
1649                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1650                         /*
1651                          * We have not yet clipped the entry.
1652                          */
1653                         saved_start = (start >= entry->start) ? start :
1654                             entry->start;
1655                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1656                         if (vm_map_unlock_and_wait(map, user_wire)) {
1657                                 /*
1658                                  * Allow interruption of user wiring?
1659                                  */
1660                         }
1661                         vm_map_lock(map);
1662                         if (last_timestamp + 1 != map->timestamp) {
1663                                 /*
1664                                  * Look again for the entry because the map was
1665                                  * modified while it was unlocked.
1666                                  * Specifically, the entry may have been
1667                                  * clipped, merged, or deleted.
1668                                  */
1669                                 if (!vm_map_lookup_entry(map, saved_start,
1670                                     &tmp_entry)) {
1671                                         if (saved_start == start) {
1672                                                 /*
1673                                                  * first_entry has been deleted.
1674                                                  */
1675                                                 vm_map_unlock(map);
1676                                                 return (KERN_INVALID_ADDRESS);
1677                                         }
1678                                         end = saved_start;
1679                                         rv = KERN_INVALID_ADDRESS;
1680                                         goto done;
1681                                 }
1682                                 if (entry == first_entry)
1683                                         first_entry = tmp_entry;
1684                                 else
1685                                         first_entry = NULL;
1686                                 entry = tmp_entry;
1687                         }
1688                         last_timestamp = map->timestamp;
1689                         continue;
1690                 }
1691                 vm_map_clip_start(map, entry, start);
1692                 vm_map_clip_end(map, entry, end);
1693                 /*
1694                  * Mark the entry in case the map lock is released.  (See
1695                  * above.)
1696                  */
1697                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1698                 /*
1699                  *
1700                  */
1701                 if (entry->wired_count == 0) {
1702                         entry->wired_count++;
1703                         saved_start = entry->start;
1704                         saved_end = entry->end;
1705                         /*
1706                          * Release the map lock, relying on the in-transition
1707                          * mark.
1708                          */
1709                         vm_map_unlock(map);
1710                         if (user_wire)
1711                                 rv = vm_fault_user_wire(map, saved_start,
1712                                     saved_end);
1713                         else
1714                                 rv = vm_fault_wire(map, saved_start, saved_end);
1715                         vm_map_lock(map);
1716                         if (last_timestamp + 1 != map->timestamp) {
1717                                 /*
1718                                  * Look again for the entry because the map was
1719                                  * modified while it was unlocked.  The entry
1720                                  * may have been clipped, but NOT merged or
1721                                  * deleted.
1722                                  */
1723                                 result = vm_map_lookup_entry(map, saved_start,
1724                                     &tmp_entry);
1725                                 KASSERT(result, ("vm_map_wire: lookup failed"));
1726                                 if (entry == first_entry)
1727                                         first_entry = tmp_entry;
1728                                 else
1729                                         first_entry = NULL;
1730                                 entry = tmp_entry;
1731                                 while (entry->end < saved_end) {
1732                                         if (rv != KERN_SUCCESS) {
1733                                                 KASSERT(entry->wired_count == 1,
1734                                                     ("vm_map_wire: bad count"));
1735                                                 entry->wired_count = -1;
1736                                         }
1737                                         entry = entry->next;
1738                                 }
1739                         }
1740                         last_timestamp = map->timestamp;
1741                         if (rv != KERN_SUCCESS) {
1742                                 KASSERT(entry->wired_count == 1,
1743                                     ("vm_map_wire: bad count"));
1744                                 /*
1745                                  * Assign an out-of-range value to represent
1746                                  * the failure to wire this entry.
1747                                  */
1748                                 entry->wired_count = -1;
1749                                 end = entry->end;
1750                                 goto done;
1751                         }
1752                 } else if (!user_wire ||
1753                            (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
1754                         entry->wired_count++;
1755                 }
1756                 /*
1757                  * Check the map for holes in the specified region.
1758                  */
1759                 if (entry->end < end && (entry->next == &map->header ||
1760                     entry->next->start > entry->end)) {
1761                         end = entry->end;
1762                         rv = KERN_INVALID_ADDRESS;
1763                         goto done;
1764                 }
1765                 entry = entry->next;
1766         }
1767         rv = KERN_SUCCESS;
1768 done:
1769         need_wakeup = FALSE;
1770         if (first_entry == NULL) {
1771                 result = vm_map_lookup_entry(map, start, &first_entry);
1772                 KASSERT(result, ("vm_map_wire: lookup failed"));
1773         }
1774         entry = first_entry;
1775         while (entry != &map->header && entry->start < end) {
1776                 if (rv == KERN_SUCCESS) {
1777                         if (user_wire)
1778                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
1779                 } else if (entry->wired_count == -1) {
1780                         /*
1781                          * Wiring failed on this entry.  Thus, unwiring is
1782                          * unnecessary.
1783                          */
1784                         entry->wired_count = 0;
1785                 } else {
1786                         if (!user_wire || (entry->wired_count == 1 &&
1787                             (entry->eflags & MAP_ENTRY_USER_WIRED) == 0))
1788                                 entry->wired_count--;
1789                         if (entry->wired_count == 0) {
1790                                 /*
1791                                  * Retain the map lock.
1792                                  */
1793                                 vm_fault_unwire(map, entry->start, entry->end);
1794                         }
1795                 }
1796                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1797                         ("vm_map_wire: in-transition flag missing"));
1798                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1799                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1800                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1801                         need_wakeup = TRUE;
1802                 }
1803                 vm_map_simplify_entry(map, entry);
1804                 entry = entry->next;
1805         }
1806         vm_map_unlock(map);
1807         if (need_wakeup)
1808                 vm_map_wakeup(map);
1809         return (rv);
1810 }
1811
1812 /*
1813  * vm_map_clean
1814  *
1815  * Push any dirty cached pages in the address range to their pager.
1816  * If syncio is TRUE, dirty pages are written synchronously.
1817  * If invalidate is TRUE, any cached pages are freed as well.
1818  *
1819  * Returns an error if any part of the specified range is not mapped.
1820  */
1821 int
1822 vm_map_clean(
1823         vm_map_t map,
1824         vm_offset_t start,
1825         vm_offset_t end,
1826         boolean_t syncio,
1827         boolean_t invalidate)
1828 {
1829         vm_map_entry_t current;
1830         vm_map_entry_t entry;
1831         vm_size_t size;
1832         vm_object_t object;
1833         vm_ooffset_t offset;
1834
1835         GIANT_REQUIRED;
1836
1837         vm_map_lock_read(map);
1838         VM_MAP_RANGE_CHECK(map, start, end);
1839         if (!vm_map_lookup_entry(map, start, &entry)) {
1840                 vm_map_unlock_read(map);
1841                 return (KERN_INVALID_ADDRESS);
1842         }
1843         /*
1844          * Make a first pass to check for holes.
1845          */
1846         for (current = entry; current->start < end; current = current->next) {
1847                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1848                         vm_map_unlock_read(map);
1849                         return (KERN_INVALID_ARGUMENT);
1850                 }
1851                 if (end > current->end &&
1852                     (current->next == &map->header ||
1853                         current->end != current->next->start)) {
1854                         vm_map_unlock_read(map);
1855                         return (KERN_INVALID_ADDRESS);
1856                 }
1857         }
1858
1859         if (invalidate)
1860                 pmap_remove(vm_map_pmap(map), start, end);
1861         /*
1862          * Make a second pass, cleaning/uncaching pages from the indicated
1863          * objects as we go.
1864          */
1865         for (current = entry; current->start < end; current = current->next) {
1866                 offset = current->offset + (start - current->start);
1867                 size = (end <= current->end ? end : current->end) - start;
1868                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1869                         vm_map_t smap;
1870                         vm_map_entry_t tentry;
1871                         vm_size_t tsize;
1872
1873                         smap = current->object.sub_map;
1874                         vm_map_lock_read(smap);
1875                         (void) vm_map_lookup_entry(smap, offset, &tentry);
1876                         tsize = tentry->end - offset;
1877                         if (tsize < size)
1878                                 size = tsize;
1879                         object = tentry->object.vm_object;
1880                         offset = tentry->offset + (offset - tentry->start);
1881                         vm_map_unlock_read(smap);
1882                 } else {
1883                         object = current->object.vm_object;
1884                 }
1885                 /*
1886                  * Note that there is absolutely no sense in writing out
1887                  * anonymous objects, so we track down the vnode object
1888                  * to write out.
1889                  * We invalidate (remove) all pages from the address space
1890                  * anyway, for semantic correctness.
1891                  *
1892                  * note: certain anonymous maps, such as MAP_NOSYNC maps,
1893                  * may start out with a NULL object.
1894                  */
1895                 while (object && object->backing_object) {
1896                         object = object->backing_object;
1897                         offset += object->backing_object_offset;
1898                         if (object->size < OFF_TO_IDX(offset + size))
1899                                 size = IDX_TO_OFF(object->size) - offset;
1900                 }
1901                 if (object && (object->type == OBJT_VNODE) &&
1902                     (current->protection & VM_PROT_WRITE)) {
1903                         /*
1904                          * Flush pages if writing is allowed, invalidate them
1905                          * if invalidation requested.  Pages undergoing I/O
1906                          * will be ignored by vm_object_page_remove().
1907                          *
1908                          * We cannot lock the vnode and then wait for paging
1909                          * to complete without deadlocking against vm_fault.
1910                          * Instead we simply call vm_object_page_remove() and
1911                          * allow it to block internally on a page-by-page
1912                          * basis when it encounters pages undergoing async
1913                          * I/O.
1914                          */
1915                         int flags;
1916
1917                         vm_object_reference(object);
1918                         vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread);
1919                         flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1920                         flags |= invalidate ? OBJPC_INVAL : 0;
1921                         vm_object_page_clean(object,
1922                             OFF_TO_IDX(offset),
1923                             OFF_TO_IDX(offset + size + PAGE_MASK),
1924                             flags);
1925                         if (invalidate) {
1926                                 /*vm_object_pip_wait(object, "objmcl");*/
1927                                 vm_object_page_remove(object,
1928                                     OFF_TO_IDX(offset),
1929                                     OFF_TO_IDX(offset + size + PAGE_MASK),
1930                                     FALSE);
1931                         }
1932                         VOP_UNLOCK(object->handle, 0, curthread);
1933                         vm_object_deallocate(object);
1934                 }
1935                 start += size;
1936         }
1937
1938         vm_map_unlock_read(map);
1939         return (KERN_SUCCESS);
1940 }
1941
1942 /*
1943  *      vm_map_entry_unwire:    [ internal use only ]
1944  *
1945  *      Make the region specified by this entry pageable.
1946  *
1947  *      The map in question should be locked.
1948  *      [This is the reason for this routine's existence.]
1949  */
1950 static void
1951 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
1952 {
1953         vm_fault_unwire(map, entry->start, entry->end);
1954         entry->wired_count = 0;
1955 }
1956
1957 /*
1958  *      vm_map_entry_delete:    [ internal use only ]
1959  *
1960  *      Deallocate the given entry from the target map.
1961  */
1962 static void
1963 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
1964 {
1965         vm_map_entry_unlink(map, entry);
1966         map->size -= entry->end - entry->start;
1967
1968         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1969                 vm_object_deallocate(entry->object.vm_object);
1970         }
1971
1972         vm_map_entry_dispose(map, entry);
1973 }
1974
1975 /*
1976  *      vm_map_delete:  [ internal use only ]
1977  *
1978  *      Deallocates the given address range from the target
1979  *      map.
1980  */
1981 int
1982 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
1983 {
1984         vm_object_t object;
1985         vm_map_entry_t entry;
1986         vm_map_entry_t first_entry;
1987
1988         /*
1989          * Find the start of the region, and clip it
1990          */
1991         if (!vm_map_lookup_entry(map, start, &first_entry))
1992                 entry = first_entry->next;
1993         else {
1994                 entry = first_entry;
1995                 vm_map_clip_start(map, entry, start);
1996         }
1997
1998         /*
1999          * Save the free space hint
2000          */
2001         if (entry == &map->header) {
2002                 map->first_free = &map->header;
2003         } else if (map->first_free->start >= start) {
2004                 map->first_free = entry->prev;
2005         }
2006
2007         /*
2008          * Step through all entries in this region
2009          */
2010         while ((entry != &map->header) && (entry->start < end)) {
2011                 vm_map_entry_t next;
2012                 vm_offset_t s, e;
2013                 vm_pindex_t offidxstart, offidxend, count;
2014
2015                 /*
2016                  * Wait for wiring or unwiring of an entry to complete.
2017                  */
2018                 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0) {
2019                         unsigned int last_timestamp;
2020                         vm_offset_t saved_start;
2021                         vm_map_entry_t tmp_entry;
2022
2023                         saved_start = entry->start;
2024                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2025                         last_timestamp = map->timestamp;
2026                         (void) vm_map_unlock_and_wait(map, FALSE);
2027                         vm_map_lock(map);
2028                         if (last_timestamp + 1 != map->timestamp) {
2029                                 /*
2030                                  * Look again for the entry because the map was
2031                                  * modified while it was unlocked.
2032                                  * Specifically, the entry may have been
2033                                  * clipped, merged, or deleted.
2034                                  */
2035                                 if (!vm_map_lookup_entry(map, saved_start,
2036                                                          &tmp_entry))
2037                                         entry = tmp_entry->next;
2038                                 else {
2039                                         entry = tmp_entry;
2040                                         vm_map_clip_start(map, entry,
2041                                                           saved_start);
2042                                 }
2043                         }
2044                         continue;
2045                 }
2046                 vm_map_clip_end(map, entry, end);
2047
2048                 s = entry->start;
2049                 e = entry->end;
2050                 next = entry->next;
2051
2052                 offidxstart = OFF_TO_IDX(entry->offset);
2053                 count = OFF_TO_IDX(e - s);
2054                 object = entry->object.vm_object;
2055
2056                 /*
2057                  * Unwire before removing addresses from the pmap; otherwise,
2058                  * unwiring will put the entries back in the pmap.
2059                  */
2060                 if (entry->wired_count != 0) {
2061                         vm_map_entry_unwire(map, entry);
2062                 }
2063
2064                 offidxend = offidxstart + count;
2065
2066                 if ((object == kernel_object) || (object == kmem_object)) {
2067                         vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2068                 } else {
2069                         mtx_lock(&Giant);
2070                         pmap_remove(map->pmap, s, e);
2071                         if (object != NULL &&
2072                             object->ref_count != 1 &&
2073                             (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
2074                             (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2075                                 vm_object_collapse(object);
2076                                 vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2077                                 if (object->type == OBJT_SWAP) {
2078                                         swap_pager_freespace(object, offidxstart, count);
2079                                 }
2080                                 if (offidxend >= object->size &&
2081                                     offidxstart < object->size) {
2082                                         object->size = offidxstart;
2083                                 }
2084                         }
2085                         mtx_unlock(&Giant);
2086                 }
2087
2088                 /*
2089                  * Delete the entry (which may delete the object) only after
2090                  * removing all pmap entries pointing to its pages.
2091                  * (Otherwise, its page frames may be reallocated, and any
2092                  * modify bits will be set in the wrong object!)
2093                  */
2094                 vm_map_entry_delete(map, entry);
2095                 entry = next;
2096         }
2097         return (KERN_SUCCESS);
2098 }
2099
2100 /*
2101  *      vm_map_remove:
2102  *
2103  *      Remove the given address range from the target map.
2104  *      This is the exported form of vm_map_delete.
2105  */
2106 int
2107 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2108 {
2109         int result, s = 0;
2110
2111         if (map == kmem_map)
2112                 s = splvm();
2113
2114         vm_map_lock(map);
2115         VM_MAP_RANGE_CHECK(map, start, end);
2116         result = vm_map_delete(map, start, end);
2117         vm_map_unlock(map);
2118
2119         if (map == kmem_map)
2120                 splx(s);
2121
2122         return (result);
2123 }
2124
2125 /*
2126  *      vm_map_check_protection:
2127  *
2128  *      Assert that the target map allows the specified
2129  *      privilege on the entire address region given.
2130  *      The entire region must be allocated.
2131  */
2132 boolean_t
2133 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2134                         vm_prot_t protection)
2135 {
2136         vm_map_entry_t entry;
2137         vm_map_entry_t tmp_entry;
2138
2139         vm_map_lock_read(map);
2140         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2141                 vm_map_unlock_read(map);
2142                 return (FALSE);
2143         }
2144         entry = tmp_entry;
2145
2146         while (start < end) {
2147                 if (entry == &map->header) {
2148                         vm_map_unlock_read(map);
2149                         return (FALSE);
2150                 }
2151                 /*
2152                  * No holes allowed!
2153                  */
2154                 if (start < entry->start) {
2155                         vm_map_unlock_read(map);
2156                         return (FALSE);
2157                 }
2158                 /*
2159                  * Check protection associated with entry.
2160                  */
2161                 if ((entry->protection & protection) != protection) {
2162                         vm_map_unlock_read(map);
2163                         return (FALSE);
2164                 }
2165                 /* go to next entry */
2166                 start = entry->end;
2167                 entry = entry->next;
2168         }
2169         vm_map_unlock_read(map);
2170         return (TRUE);
2171 }
2172
2173 /*
2174  *      vm_map_copy_entry:
2175  *
2176  *      Copies the contents of the source entry to the destination
2177  *      entry.  The entries *must* be aligned properly.
2178  */
2179 static void
2180 vm_map_copy_entry(
2181         vm_map_t src_map,
2182         vm_map_t dst_map,
2183         vm_map_entry_t src_entry,
2184         vm_map_entry_t dst_entry)
2185 {
2186         vm_object_t src_object;
2187
2188         if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2189                 return;
2190
2191         if (src_entry->wired_count == 0) {
2192
2193                 /*
2194                  * If the source entry is marked needs_copy, it is already
2195                  * write-protected.
2196                  */
2197                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2198                         pmap_protect(src_map->pmap,
2199                             src_entry->start,
2200                             src_entry->end,
2201                             src_entry->protection & ~VM_PROT_WRITE);
2202                 }
2203
2204                 /*
2205                  * Make a copy of the object.
2206                  */
2207                 if ((src_object = src_entry->object.vm_object) != NULL) {
2208
2209                         if ((src_object->handle == NULL) &&
2210                                 (src_object->type == OBJT_DEFAULT ||
2211                                  src_object->type == OBJT_SWAP)) {
2212                                 vm_object_collapse(src_object);
2213                                 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2214                                         vm_object_split(src_entry);
2215                                         src_object = src_entry->object.vm_object;
2216                                 }
2217                         }
2218
2219                         vm_object_reference(src_object);
2220                         vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2221                         dst_entry->object.vm_object = src_object;
2222                         src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2223                         dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2224                         dst_entry->offset = src_entry->offset;
2225                 } else {
2226                         dst_entry->object.vm_object = NULL;
2227                         dst_entry->offset = 0;
2228                 }
2229
2230                 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2231                     dst_entry->end - dst_entry->start, src_entry->start);
2232         } else {
2233                 /*
2234                  * Of course, wired down pages can't be set copy-on-write.
2235                  * Cause wired pages to be copied into the new map by
2236                  * simulating faults (the new pages are pageable)
2237                  */
2238                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2239         }
2240 }
2241
2242 /*
2243  * vmspace_fork:
2244  * Create a new process vmspace structure and vm_map
2245  * based on those of an existing process.  The new map
2246  * is based on the old map, according to the inheritance
2247  * values on the regions in that map.
2248  *
2249  * The source map must not be locked.
2250  */
2251 struct vmspace *
2252 vmspace_fork(struct vmspace *vm1)
2253 {
2254         struct vmspace *vm2;
2255         vm_map_t old_map = &vm1->vm_map;
2256         vm_map_t new_map;
2257         vm_map_entry_t old_entry;
2258         vm_map_entry_t new_entry;
2259         vm_object_t object;
2260
2261         GIANT_REQUIRED;
2262
2263         vm_map_lock(old_map);
2264         old_map->infork = 1;
2265
2266         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2267         bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2268             (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
2269         new_map = &vm2->vm_map; /* XXX */
2270         new_map->timestamp = 1;
2271
2272         old_entry = old_map->header.next;
2273
2274         while (old_entry != &old_map->header) {
2275                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2276                         panic("vm_map_fork: encountered a submap");
2277
2278                 switch (old_entry->inheritance) {
2279                 case VM_INHERIT_NONE:
2280                         break;
2281
2282                 case VM_INHERIT_SHARE:
2283                         /*
2284                          * Clone the entry, creating the shared object if necessary.
2285                          */
2286                         object = old_entry->object.vm_object;
2287                         if (object == NULL) {
2288                                 object = vm_object_allocate(OBJT_DEFAULT,
2289                                         atop(old_entry->end - old_entry->start));
2290                                 old_entry->object.vm_object = object;
2291                                 old_entry->offset = (vm_offset_t) 0;
2292                         }
2293
2294                         /*
2295                          * Add the reference before calling vm_object_shadow
2296                          * to insure that a shadow object is created.
2297                          */
2298                         vm_object_reference(object);
2299                         if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2300                                 vm_object_shadow(&old_entry->object.vm_object,
2301                                         &old_entry->offset,
2302                                         atop(old_entry->end - old_entry->start));
2303                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2304                                 /* Transfer the second reference too. */
2305                                 vm_object_reference(
2306                                     old_entry->object.vm_object);
2307                                 vm_object_deallocate(object);
2308                                 object = old_entry->object.vm_object;
2309                         }
2310                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
2311
2312                         /*
2313                          * Clone the entry, referencing the shared object.
2314                          */
2315                         new_entry = vm_map_entry_create(new_map);
2316                         *new_entry = *old_entry;
2317                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2318                         new_entry->wired_count = 0;
2319
2320                         /*
2321                          * Insert the entry into the new map -- we know we're
2322                          * inserting at the end of the new map.
2323                          */
2324                         vm_map_entry_link(new_map, new_map->header.prev,
2325                             new_entry);
2326
2327                         /*
2328                          * Update the physical map
2329                          */
2330                         pmap_copy(new_map->pmap, old_map->pmap,
2331                             new_entry->start,
2332                             (old_entry->end - old_entry->start),
2333                             old_entry->start);
2334                         break;
2335
2336                 case VM_INHERIT_COPY:
2337                         /*
2338                          * Clone the entry and link into the map.
2339                          */
2340                         new_entry = vm_map_entry_create(new_map);
2341                         *new_entry = *old_entry;
2342                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2343                         new_entry->wired_count = 0;
2344                         new_entry->object.vm_object = NULL;
2345                         vm_map_entry_link(new_map, new_map->header.prev,
2346                             new_entry);
2347                         vm_map_copy_entry(old_map, new_map, old_entry,
2348                             new_entry);
2349                         break;
2350                 }
2351                 old_entry = old_entry->next;
2352         }
2353
2354         new_map->size = old_map->size;
2355         old_map->infork = 0;
2356         vm_map_unlock(old_map);
2357
2358         return (vm2);
2359 }
2360
2361 int
2362 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2363               vm_prot_t prot, vm_prot_t max, int cow)
2364 {
2365         vm_map_entry_t prev_entry;
2366         vm_map_entry_t new_stack_entry;
2367         vm_size_t      init_ssize;
2368         int            rv;
2369
2370         if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
2371                 return (KERN_NO_SPACE);
2372
2373         if (max_ssize < sgrowsiz)
2374                 init_ssize = max_ssize;
2375         else
2376                 init_ssize = sgrowsiz;
2377
2378         vm_map_lock(map);
2379
2380         /* If addr is already mapped, no go */
2381         if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2382                 vm_map_unlock(map);
2383                 return (KERN_NO_SPACE);
2384         }
2385
2386         /* If we would blow our VMEM resource limit, no go */
2387         if (map->size + init_ssize >
2388             curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2389                 vm_map_unlock(map);
2390                 return (KERN_NO_SPACE);
2391         }
2392
2393         /* If we can't accomodate max_ssize in the current mapping,
2394          * no go.  However, we need to be aware that subsequent user
2395          * mappings might map into the space we have reserved for
2396          * stack, and currently this space is not protected.
2397          *
2398          * Hopefully we will at least detect this condition
2399          * when we try to grow the stack.
2400          */
2401         if ((prev_entry->next != &map->header) &&
2402             (prev_entry->next->start < addrbos + max_ssize)) {
2403                 vm_map_unlock(map);
2404                 return (KERN_NO_SPACE);
2405         }
2406
2407         /* We initially map a stack of only init_ssize.  We will
2408          * grow as needed later.  Since this is to be a grow
2409          * down stack, we map at the top of the range.
2410          *
2411          * Note: we would normally expect prot and max to be
2412          * VM_PROT_ALL, and cow to be 0.  Possibly we should
2413          * eliminate these as input parameters, and just
2414          * pass these values here in the insert call.
2415          */
2416         rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2417                            addrbos + max_ssize, prot, max, cow);
2418
2419         /* Now set the avail_ssize amount */
2420         if (rv == KERN_SUCCESS){
2421                 if (prev_entry != &map->header)
2422                         vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2423                 new_stack_entry = prev_entry->next;
2424                 if (new_stack_entry->end   != addrbos + max_ssize ||
2425                     new_stack_entry->start != addrbos + max_ssize - init_ssize)
2426                         panic ("Bad entry start/end for new stack entry");
2427                 else
2428                         new_stack_entry->avail_ssize = max_ssize - init_ssize;
2429         }
2430
2431         vm_map_unlock(map);
2432         return (rv);
2433 }
2434
2435 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2436  * desired address is already mapped, or if we successfully grow
2437  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2438  * stack range (this is strange, but preserves compatibility with
2439  * the grow function in vm_machdep.c).
2440  */
2441 int
2442 vm_map_growstack (struct proc *p, vm_offset_t addr)
2443 {
2444         vm_map_entry_t prev_entry;
2445         vm_map_entry_t stack_entry;
2446         vm_map_entry_t new_stack_entry;
2447         struct vmspace *vm = p->p_vmspace;
2448         vm_map_t map = &vm->vm_map;
2449         vm_offset_t    end;
2450         int      grow_amount;
2451         int      rv;
2452         int      is_procstack;
2453
2454         GIANT_REQUIRED;
2455
2456 Retry:
2457         vm_map_lock_read(map);
2458
2459         /* If addr is already in the entry range, no need to grow.*/
2460         if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2461                 vm_map_unlock_read(map);
2462                 return (KERN_SUCCESS);
2463         }
2464
2465         if ((stack_entry = prev_entry->next) == &map->header) {
2466                 vm_map_unlock_read(map);
2467                 return (KERN_SUCCESS);
2468         }
2469         if (prev_entry == &map->header)
2470                 end = stack_entry->start - stack_entry->avail_ssize;
2471         else
2472                 end = prev_entry->end;
2473
2474         /* This next test mimics the old grow function in vm_machdep.c.
2475          * It really doesn't quite make sense, but we do it anyway
2476          * for compatibility.
2477          *
2478          * If not growable stack, return success.  This signals the
2479          * caller to proceed as he would normally with normal vm.
2480          */
2481         if (stack_entry->avail_ssize < 1 ||
2482             addr >= stack_entry->start ||
2483             addr <  stack_entry->start - stack_entry->avail_ssize) {
2484                 vm_map_unlock_read(map);
2485                 return (KERN_SUCCESS);
2486         }
2487
2488         /* Find the minimum grow amount */
2489         grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2490         if (grow_amount > stack_entry->avail_ssize) {
2491                 vm_map_unlock_read(map);
2492                 return (KERN_NO_SPACE);
2493         }
2494
2495         /* If there is no longer enough space between the entries
2496          * nogo, and adjust the available space.  Note: this
2497          * should only happen if the user has mapped into the
2498          * stack area after the stack was created, and is
2499          * probably an error.
2500          *
2501          * This also effectively destroys any guard page the user
2502          * might have intended by limiting the stack size.
2503          */
2504         if (grow_amount > stack_entry->start - end) {
2505                 if (vm_map_lock_upgrade(map))
2506                         goto Retry;
2507
2508                 stack_entry->avail_ssize = stack_entry->start - end;
2509
2510                 vm_map_unlock(map);
2511                 return (KERN_NO_SPACE);
2512         }
2513
2514         is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2515
2516         /* If this is the main process stack, see if we're over the
2517          * stack limit.
2518          */
2519         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2520                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2521                 vm_map_unlock_read(map);
2522                 return (KERN_NO_SPACE);
2523         }
2524
2525         /* Round up the grow amount modulo SGROWSIZ */
2526         grow_amount = roundup (grow_amount, sgrowsiz);
2527         if (grow_amount > stack_entry->avail_ssize) {
2528                 grow_amount = stack_entry->avail_ssize;
2529         }
2530         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2531                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2532                 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2533                               ctob(vm->vm_ssize);
2534         }
2535
2536         /* If we would blow our VMEM resource limit, no go */
2537         if (map->size + grow_amount >
2538             curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2539                 vm_map_unlock_read(map);
2540                 return (KERN_NO_SPACE);
2541         }
2542
2543         if (vm_map_lock_upgrade(map))
2544                 goto Retry;
2545
2546         /* Get the preliminary new entry start value */
2547         addr = stack_entry->start - grow_amount;
2548
2549         /* If this puts us into the previous entry, cut back our growth
2550          * to the available space.  Also, see the note above.
2551          */
2552         if (addr < end) {
2553                 stack_entry->avail_ssize = stack_entry->start - end;
2554                 addr = end;
2555         }
2556
2557         rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2558                            VM_PROT_ALL,
2559                            VM_PROT_ALL,
2560                            0);
2561
2562         /* Adjust the available stack space by the amount we grew. */
2563         if (rv == KERN_SUCCESS) {
2564                 if (prev_entry != &map->header)
2565                         vm_map_clip_end(map, prev_entry, addr);
2566                 new_stack_entry = prev_entry->next;
2567                 if (new_stack_entry->end   != stack_entry->start  ||
2568                     new_stack_entry->start != addr)
2569                         panic ("Bad stack grow start/end in new stack entry");
2570                 else {
2571                         new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2572                                                         (new_stack_entry->end -
2573                                                          new_stack_entry->start);
2574                         if (is_procstack)
2575                                 vm->vm_ssize += btoc(new_stack_entry->end -
2576                                                      new_stack_entry->start);
2577                 }
2578         }
2579
2580         vm_map_unlock(map);
2581         return (rv);
2582 }
2583
2584 /*
2585  * Unshare the specified VM space for exec.  If other processes are
2586  * mapped to it, then create a new one.  The new vmspace is null.
2587  */
2588 void
2589 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
2590 {
2591         struct vmspace *oldvmspace = p->p_vmspace;
2592         struct vmspace *newvmspace;
2593
2594         GIANT_REQUIRED;
2595         newvmspace = vmspace_alloc(minuser, maxuser);
2596         bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2597             (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2598         /*
2599          * This code is written like this for prototype purposes.  The
2600          * goal is to avoid running down the vmspace here, but let the
2601          * other process's that are still using the vmspace to finally
2602          * run it down.  Even though there is little or no chance of blocking
2603          * here, it is a good idea to keep this form for future mods.
2604          */
2605         p->p_vmspace = newvmspace;
2606         pmap_pinit2(vmspace_pmap(newvmspace));
2607         vmspace_free(oldvmspace);
2608         if (p == curthread->td_proc)            /* XXXKSE ? */
2609                 pmap_activate(curthread);
2610 }
2611
2612 /*
2613  * Unshare the specified VM space for forcing COW.  This
2614  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2615  */
2616 void
2617 vmspace_unshare(struct proc *p)
2618 {
2619         struct vmspace *oldvmspace = p->p_vmspace;
2620         struct vmspace *newvmspace;
2621
2622         GIANT_REQUIRED;
2623         if (oldvmspace->vm_refcnt == 1)
2624                 return;
2625         newvmspace = vmspace_fork(oldvmspace);
2626         p->p_vmspace = newvmspace;
2627         pmap_pinit2(vmspace_pmap(newvmspace));
2628         vmspace_free(oldvmspace);
2629         if (p == curthread->td_proc)            /* XXXKSE ? */
2630                 pmap_activate(curthread);
2631 }
2632
2633 /*
2634  *      vm_map_lookup:
2635  *
2636  *      Finds the VM object, offset, and
2637  *      protection for a given virtual address in the
2638  *      specified map, assuming a page fault of the
2639  *      type specified.
2640  *
2641  *      Leaves the map in question locked for read; return
2642  *      values are guaranteed until a vm_map_lookup_done
2643  *      call is performed.  Note that the map argument
2644  *      is in/out; the returned map must be used in
2645  *      the call to vm_map_lookup_done.
2646  *
2647  *      A handle (out_entry) is returned for use in
2648  *      vm_map_lookup_done, to make that fast.
2649  *
2650  *      If a lookup is requested with "write protection"
2651  *      specified, the map may be changed to perform virtual
2652  *      copying operations, although the data referenced will
2653  *      remain the same.
2654  */
2655 int
2656 vm_map_lookup(vm_map_t *var_map,                /* IN/OUT */
2657               vm_offset_t vaddr,
2658               vm_prot_t fault_typea,
2659               vm_map_entry_t *out_entry,        /* OUT */
2660               vm_object_t *object,              /* OUT */
2661               vm_pindex_t *pindex,              /* OUT */
2662               vm_prot_t *out_prot,              /* OUT */
2663               boolean_t *wired)                 /* OUT */
2664 {
2665         vm_map_entry_t entry;
2666         vm_map_t map = *var_map;
2667         vm_prot_t prot;
2668         vm_prot_t fault_type = fault_typea;
2669
2670 RetryLookup:;
2671         /*
2672          * Lookup the faulting address.
2673          */
2674
2675         vm_map_lock_read(map);
2676 #define RETURN(why) \
2677                 { \
2678                 vm_map_unlock_read(map); \
2679                 return (why); \
2680                 }
2681
2682         /*
2683          * If the map has an interesting hint, try it before calling full
2684          * blown lookup routine.
2685          */
2686         entry = map->root;
2687         *out_entry = entry;
2688         if (entry == NULL ||
2689             (vaddr < entry->start) || (vaddr >= entry->end)) {
2690                 /*
2691                  * Entry was either not a valid hint, or the vaddr was not
2692                  * contained in the entry, so do a full lookup.
2693                  */
2694                 if (!vm_map_lookup_entry(map, vaddr, out_entry))
2695                         RETURN(KERN_INVALID_ADDRESS);
2696
2697                 entry = *out_entry;
2698         }
2699
2700         /*
2701          * Handle submaps.
2702          */
2703         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2704                 vm_map_t old_map = map;
2705
2706                 *var_map = map = entry->object.sub_map;
2707                 vm_map_unlock_read(old_map);
2708                 goto RetryLookup;
2709         }
2710
2711         /*
2712          * Check whether this task is allowed to have this page.
2713          * Note the special case for MAP_ENTRY_COW
2714          * pages with an override.  This is to implement a forced
2715          * COW for debuggers.
2716          */
2717         if (fault_type & VM_PROT_OVERRIDE_WRITE)
2718                 prot = entry->max_protection;
2719         else
2720                 prot = entry->protection;
2721         fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2722         if ((fault_type & prot) != fault_type) {
2723                         RETURN(KERN_PROTECTION_FAILURE);
2724         }
2725         if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2726             (entry->eflags & MAP_ENTRY_COW) &&
2727             (fault_type & VM_PROT_WRITE) &&
2728             (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2729                 RETURN(KERN_PROTECTION_FAILURE);
2730         }
2731
2732         /*
2733          * If this page is not pageable, we have to get it for all possible
2734          * accesses.
2735          */
2736         *wired = (entry->wired_count != 0);
2737         if (*wired)
2738                 prot = fault_type = entry->protection;
2739
2740         /*
2741          * If the entry was copy-on-write, we either ...
2742          */
2743         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2744                 /*
2745                  * If we want to write the page, we may as well handle that
2746                  * now since we've got the map locked.
2747                  *
2748                  * If we don't need to write the page, we just demote the
2749                  * permissions allowed.
2750                  */
2751                 if (fault_type & VM_PROT_WRITE) {
2752                         /*
2753                          * Make a new object, and place it in the object
2754                          * chain.  Note that no new references have appeared
2755                          * -- one just moved from the map to the new
2756                          * object.
2757                          */
2758                         if (vm_map_lock_upgrade(map))
2759                                 goto RetryLookup;
2760
2761                         vm_object_shadow(
2762                             &entry->object.vm_object,
2763                             &entry->offset,
2764                             atop(entry->end - entry->start));
2765                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2766
2767                         vm_map_lock_downgrade(map);
2768                 } else {
2769                         /*
2770                          * We're attempting to read a copy-on-write page --
2771                          * don't allow writes.
2772                          */
2773                         prot &= ~VM_PROT_WRITE;
2774                 }
2775         }
2776
2777         /*
2778          * Create an object if necessary.
2779          */
2780         if (entry->object.vm_object == NULL &&
2781             !map->system_map) {
2782                 if (vm_map_lock_upgrade(map))
2783                         goto RetryLookup;
2784                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2785                     atop(entry->end - entry->start));
2786                 entry->offset = 0;
2787                 vm_map_lock_downgrade(map);
2788         }
2789
2790         /*
2791          * Return the object/offset from this entry.  If the entry was
2792          * copy-on-write or empty, it has been fixed up.
2793          */
2794         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2795         *object = entry->object.vm_object;
2796
2797         /*
2798          * Return whether this is the only map sharing this data.
2799          */
2800         *out_prot = prot;
2801         return (KERN_SUCCESS);
2802
2803 #undef  RETURN
2804 }
2805
2806 /*
2807  *      vm_map_lookup_done:
2808  *
2809  *      Releases locks acquired by a vm_map_lookup
2810  *      (according to the handle returned by that lookup).
2811  */
2812 void
2813 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
2814 {
2815         /*
2816          * Unlock the main-level map
2817          */
2818         vm_map_unlock_read(map);
2819 }
2820
2821 #ifdef ENABLE_VFS_IOOPT
2822 /*
2823  * Experimental support for zero-copy I/O
2824  *
2825  * Implement uiomove with VM operations.  This handles (and collateral changes)
2826  * support every combination of source object modification, and COW type
2827  * operations.
2828  */
2829 int
2830 vm_uiomove(
2831         vm_map_t mapa,
2832         vm_object_t srcobject,
2833         off_t cp,
2834         int cnta,
2835         vm_offset_t uaddra,
2836         int *npages)
2837 {
2838         vm_map_t map;
2839         vm_object_t first_object, oldobject, object;
2840         vm_map_entry_t entry;
2841         vm_prot_t prot;
2842         boolean_t wired;
2843         int tcnt, rv;
2844         vm_offset_t uaddr, start, end, tend;
2845         vm_pindex_t first_pindex, oindex;
2846         vm_size_t osize;
2847         off_t ooffset;
2848         int cnt;
2849
2850         GIANT_REQUIRED;
2851
2852         if (npages)
2853                 *npages = 0;
2854
2855         cnt = cnta;
2856         uaddr = uaddra;
2857
2858         while (cnt > 0) {
2859                 map = mapa;
2860
2861                 if ((vm_map_lookup(&map, uaddr,
2862                         VM_PROT_READ, &entry, &first_object,
2863                         &first_pindex, &prot, &wired)) != KERN_SUCCESS) {
2864                         return EFAULT;
2865                 }
2866
2867                 vm_map_clip_start(map, entry, uaddr);
2868
2869                 tcnt = cnt;
2870                 tend = uaddr + tcnt;
2871                 if (tend > entry->end) {
2872                         tcnt = entry->end - uaddr;
2873                         tend = entry->end;
2874                 }
2875
2876                 vm_map_clip_end(map, entry, tend);
2877
2878                 start = entry->start;
2879                 end = entry->end;
2880
2881                 osize = atop(tcnt);
2882
2883                 oindex = OFF_TO_IDX(cp);
2884                 if (npages) {
2885                         vm_size_t idx;
2886                         for (idx = 0; idx < osize; idx++) {
2887                                 vm_page_t m;
2888                                 if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
2889                                         vm_map_lookup_done(map, entry);
2890                                         return 0;
2891                                 }
2892                                 /*
2893                                  * disallow busy or invalid pages, but allow
2894                                  * m->busy pages if they are entirely valid.
2895                                  */
2896                                 if ((m->flags & PG_BUSY) ||
2897                                         ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
2898                                         vm_map_lookup_done(map, entry);
2899                                         return 0;
2900                                 }
2901                         }
2902                 }
2903
2904 /*
2905  * If we are changing an existing map entry, just redirect
2906  * the object, and change mappings.
2907  */
2908                 if ((first_object->type == OBJT_VNODE) &&
2909                         ((oldobject = entry->object.vm_object) == first_object)) {
2910
2911                         if ((entry->offset != cp) || (oldobject != srcobject)) {
2912                                 /*
2913                                 * Remove old window into the file
2914                                 */
2915                                 pmap_remove (map->pmap, uaddr, tend);
2916
2917                                 /*
2918                                 * Force copy on write for mmaped regions
2919                                 */
2920                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2921
2922                                 /*
2923                                 * Point the object appropriately
2924                                 */
2925                                 if (oldobject != srcobject) {
2926
2927                                 /*
2928                                 * Set the object optimization hint flag
2929                                 */
2930                                         vm_object_set_flag(srcobject, OBJ_OPT);
2931                                         vm_object_reference(srcobject);
2932                                         entry->object.vm_object = srcobject;
2933
2934                                         if (oldobject) {
2935                                                 vm_object_deallocate(oldobject);
2936                                         }
2937                                 }
2938
2939                                 entry->offset = cp;
2940                                 map->timestamp++;
2941                         } else {
2942                                 pmap_remove (map->pmap, uaddr, tend);
2943                         }
2944
2945                 } else if ((first_object->ref_count == 1) &&
2946                         (first_object->size == osize) &&
2947                         ((first_object->type == OBJT_DEFAULT) ||
2948                                 (first_object->type == OBJT_SWAP)) ) {
2949
2950                         oldobject = first_object->backing_object;
2951
2952                         if ((first_object->backing_object_offset != cp) ||
2953                                 (oldobject != srcobject)) {
2954                                 /*
2955                                 * Remove old window into the file
2956                                 */
2957                                 pmap_remove (map->pmap, uaddr, tend);
2958
2959                                 /*
2960                                  * Remove unneeded old pages
2961                                  */
2962                                 vm_object_page_remove(first_object, 0, 0, 0);
2963
2964                                 /*
2965                                  * Invalidate swap space
2966                                  */
2967                                 if (first_object->type == OBJT_SWAP) {
2968                                         swap_pager_freespace(first_object,
2969                                                 0,
2970                                                 first_object->size);
2971                                 }
2972
2973                                 /*
2974                                  * Force copy on write for mmaped regions
2975                                  */
2976                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2977
2978                                 /*
2979                                  * Point the object appropriately
2980                                  */
2981                                 if (oldobject != srcobject) {
2982                                         /*
2983                                          * Set the object optimization hint flag
2984                                          */
2985                                         vm_object_set_flag(srcobject, OBJ_OPT);
2986                                         vm_object_reference(srcobject);
2987
2988                                         if (oldobject) {
2989                                                 TAILQ_REMOVE(&oldobject->shadow_head,
2990                                                         first_object, shadow_list);
2991                                                 oldobject->shadow_count--;
2992                                                 /* XXX bump generation? */
2993                                                 vm_object_deallocate(oldobject);
2994                                         }
2995
2996                                         TAILQ_INSERT_TAIL(&srcobject->shadow_head,
2997                                                 first_object, shadow_list);
2998                                         srcobject->shadow_count++;
2999                                         /* XXX bump generation? */
3000
3001                                         first_object->backing_object = srcobject;
3002                                 }
3003                                 first_object->backing_object_offset = cp;
3004                                 map->timestamp++;
3005                         } else {
3006                                 pmap_remove (map->pmap, uaddr, tend);
3007                         }
3008 /*
3009  * Otherwise, we have to do a logical mmap.
3010  */
3011                 } else {
3012
3013                         vm_object_set_flag(srcobject, OBJ_OPT);
3014                         vm_object_reference(srcobject);
3015
3016                         pmap_remove (map->pmap, uaddr, tend);
3017
3018                         vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3019                         vm_map_lock_upgrade(map);
3020
3021                         if (entry == &map->header) {
3022                                 map->first_free = &map->header;
3023                         } else if (map->first_free->start >= start) {
3024                                 map->first_free = entry->prev;
3025                         }
3026
3027                         vm_map_entry_delete(map, entry);
3028
3029                         object = srcobject;
3030                         ooffset = cp;
3031
3032                         rv = vm_map_insert(map, object, ooffset, start, tend,
3033                                 VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
3034
3035                         if (rv != KERN_SUCCESS)
3036                                 panic("vm_uiomove: could not insert new entry: %d", rv);
3037                 }
3038
3039 /*
3040  * Map the window directly, if it is already in memory
3041  */
3042                 pmap_object_init_pt(map->pmap, uaddr,
3043                         srcobject, oindex, tcnt, 0);
3044
3045                 map->timestamp++;
3046                 vm_map_unlock(map);
3047
3048                 cnt -= tcnt;
3049                 uaddr += tcnt;
3050                 cp += tcnt;
3051                 if (npages)
3052                         *npages += osize;
3053         }
3054         return 0;
3055 }
3056 #endif
3057
3058 #include "opt_ddb.h"
3059 #ifdef DDB
3060 #include <sys/kernel.h>
3061
3062 #include <ddb/ddb.h>
3063
3064 /*
3065  *      vm_map_print:   [ debug ]
3066  */
3067 DB_SHOW_COMMAND(map, vm_map_print)
3068 {
3069         static int nlines;
3070         /* XXX convert args. */
3071         vm_map_t map = (vm_map_t)addr;
3072         boolean_t full = have_addr;
3073
3074         vm_map_entry_t entry;
3075
3076         db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3077             (void *)map,
3078             (void *)map->pmap, map->nentries, map->timestamp);
3079         nlines++;
3080
3081         if (!full && db_indent)
3082                 return;
3083
3084         db_indent += 2;
3085         for (entry = map->header.next; entry != &map->header;
3086             entry = entry->next) {
3087                 db_iprintf("map entry %p: start=%p, end=%p\n",
3088                     (void *)entry, (void *)entry->start, (void *)entry->end);
3089                 nlines++;
3090                 {
3091                         static char *inheritance_name[4] =
3092                         {"share", "copy", "none", "donate_copy"};
3093
3094                         db_iprintf(" prot=%x/%x/%s",
3095                             entry->protection,
3096                             entry->max_protection,
3097                             inheritance_name[(int)(unsigned char)entry->inheritance]);
3098                         if (entry->wired_count != 0)
3099                                 db_printf(", wired");
3100                 }
3101                 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3102                         /* XXX no %qd in kernel.  Truncate entry->offset. */
3103                         db_printf(", share=%p, offset=0x%lx\n",
3104                             (void *)entry->object.sub_map,
3105                             (long)entry->offset);
3106                         nlines++;
3107                         if ((entry->prev == &map->header) ||
3108                             (entry->prev->object.sub_map !=
3109                                 entry->object.sub_map)) {
3110                                 db_indent += 2;
3111                                 vm_map_print((db_expr_t)(intptr_t)
3112                                              entry->object.sub_map,
3113                                              full, 0, (char *)0);
3114                                 db_indent -= 2;
3115                         }
3116                 } else {
3117                         /* XXX no %qd in kernel.  Truncate entry->offset. */
3118                         db_printf(", object=%p, offset=0x%lx",
3119                             (void *)entry->object.vm_object,
3120                             (long)entry->offset);
3121                         if (entry->eflags & MAP_ENTRY_COW)
3122                                 db_printf(", copy (%s)",
3123                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3124                         db_printf("\n");
3125                         nlines++;
3126
3127                         if ((entry->prev == &map->header) ||
3128                             (entry->prev->object.vm_object !=
3129                                 entry->object.vm_object)) {
3130                                 db_indent += 2;
3131                                 vm_object_print((db_expr_t)(intptr_t)
3132                                                 entry->object.vm_object,
3133                                                 full, 0, (char *)0);
3134                                 nlines += 4;
3135                                 db_indent -= 2;
3136                         }
3137                 }
3138         }
3139         db_indent -= 2;
3140         if (db_indent == 0)
3141                 nlines = 0;
3142 }
3143
3144
3145 DB_SHOW_COMMAND(procvm, procvm)
3146 {
3147         struct proc *p;
3148
3149         if (have_addr) {
3150                 p = (struct proc *) addr;
3151         } else {
3152                 p = curproc;
3153         }
3154
3155         db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3156             (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3157             (void *)vmspace_pmap(p->p_vmspace));
3158
3159         vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3160 }
3161
3162 #endif /* DDB */