]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/vm/vm_map.c
Copy libarchive from vendor branch to contrib
[FreeBSD/FreeBSD.git] / sys / vm / vm_map.c
1 /*-
2  * Copyright (c) 1991, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
33  *
34  *
35  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
36  * All rights reserved.
37  *
38  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
39  *
40  * Permission to use, copy, modify and distribute this software and
41  * its documentation is hereby granted, provided that both the copyright
42  * notice and this permission notice appear in all copies of the
43  * software, derivative works or modified versions, and any portions
44  * thereof, and that both notices appear in supporting documentation.
45  *
46  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
47  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
48  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
49  *
50  * Carnegie Mellon requests users of this software to return to
51  *
52  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
53  *  School of Computer Science
54  *  Carnegie Mellon University
55  *  Pittsburgh PA 15213-3890
56  *
57  * any improvements or extensions that they make and grant Carnegie the
58  * rights to redistribute these changes.
59  */
60
61 /*
62  *      Virtual memory mapping module.
63  */
64
65 #include <sys/cdefs.h>
66 __FBSDID("$FreeBSD$");
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/kernel.h>
71 #include <sys/ktr.h>
72 #include <sys/lock.h>
73 #include <sys/mutex.h>
74 #include <sys/proc.h>
75 #include <sys/vmmeter.h>
76 #include <sys/mman.h>
77 #include <sys/vnode.h>
78 #include <sys/racct.h>
79 #include <sys/resourcevar.h>
80 #include <sys/file.h>
81 #include <sys/sysctl.h>
82 #include <sys/sysent.h>
83 #include <sys/shm.h>
84
85 #include <vm/vm.h>
86 #include <vm/vm_param.h>
87 #include <vm/pmap.h>
88 #include <vm/vm_map.h>
89 #include <vm/vm_page.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_pager.h>
92 #include <vm/vm_kern.h>
93 #include <vm/vm_extern.h>
94 #include <vm/swap_pager.h>
95 #include <vm/uma.h>
96
97 /*
98  *      Virtual memory maps provide for the mapping, protection,
99  *      and sharing of virtual memory objects.  In addition,
100  *      this module provides for an efficient virtual copy of
101  *      memory from one map to another.
102  *
103  *      Synchronization is required prior to most operations.
104  *
105  *      Maps consist of an ordered doubly-linked list of simple
106  *      entries; a self-adjusting binary search tree of these
107  *      entries is used to speed up lookups.
108  *
109  *      Since portions of maps are specified by start/end addresses,
110  *      which may not align with existing map entries, all
111  *      routines merely "clip" entries to these start/end values.
112  *      [That is, an entry is split into two, bordering at a
113  *      start or end value.]  Note that these clippings may not
114  *      always be necessary (as the two resulting entries are then
115  *      not changed); however, the clipping is done for convenience.
116  *
117  *      As mentioned above, virtual copy operations are performed
118  *      by copying VM object references from one map to
119  *      another, and then marking both regions as copy-on-write.
120  */
121
122 static struct mtx map_sleep_mtx;
123 static uma_zone_t mapentzone;
124 static uma_zone_t kmapentzone;
125 static uma_zone_t mapzone;
126 static uma_zone_t vmspace_zone;
127 static struct vm_object kmapentobj;
128 static int vmspace_zinit(void *mem, int size, int flags);
129 static void vmspace_zfini(void *mem, int size);
130 static int vm_map_zinit(void *mem, int ize, int flags);
131 static void vm_map_zfini(void *mem, int size);
132 static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min,
133     vm_offset_t max);
134 static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map);
135 static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
136 #ifdef INVARIANTS
137 static void vm_map_zdtor(void *mem, int size, void *arg);
138 static void vmspace_zdtor(void *mem, int size, void *arg);
139 #endif
140
141 #define ENTRY_CHARGED(e) ((e)->cred != NULL || \
142     ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \
143      !((e)->eflags & MAP_ENTRY_NEEDS_COPY)))
144
145 /* 
146  * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type
147  * stable.
148  */
149 #define PROC_VMSPACE_LOCK(p) do { } while (0)
150 #define PROC_VMSPACE_UNLOCK(p) do { } while (0)
151
152 /*
153  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
154  *
155  *      Asserts that the starting and ending region
156  *      addresses fall within the valid range of the map.
157  */
158 #define VM_MAP_RANGE_CHECK(map, start, end)             \
159                 {                                       \
160                 if (start < vm_map_min(map))            \
161                         start = vm_map_min(map);        \
162                 if (end > vm_map_max(map))              \
163                         end = vm_map_max(map);          \
164                 if (start > end)                        \
165                         start = end;                    \
166                 }
167
168 /*
169  *      vm_map_startup:
170  *
171  *      Initialize the vm_map module.  Must be called before
172  *      any other vm_map routines.
173  *
174  *      Map and entry structures are allocated from the general
175  *      purpose memory pool with some exceptions:
176  *
177  *      - The kernel map and kmem submap are allocated statically.
178  *      - Kernel map entries are allocated out of a static pool.
179  *
180  *      These restrictions are necessary since malloc() uses the
181  *      maps and requires map entries.
182  */
183
184 void
185 vm_map_startup(void)
186 {
187         mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF);
188         mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
189 #ifdef INVARIANTS
190             vm_map_zdtor,
191 #else
192             NULL,
193 #endif
194             vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
195         uma_prealloc(mapzone, MAX_KMAP);
196         kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
197             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
198             UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
199         uma_prealloc(kmapentzone, MAX_KMAPENT);
200         mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
201             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
202 }
203
204 static void
205 vmspace_zfini(void *mem, int size)
206 {
207         struct vmspace *vm;
208
209         vm = (struct vmspace *)mem;
210         vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map));
211 }
212
213 static int
214 vmspace_zinit(void *mem, int size, int flags)
215 {
216         struct vmspace *vm;
217
218         vm = (struct vmspace *)mem;
219
220         vm->vm_map.pmap = NULL;
221         (void)vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map), flags);
222         return (0);
223 }
224
225 static void
226 vm_map_zfini(void *mem, int size)
227 {
228         vm_map_t map;
229
230         map = (vm_map_t)mem;
231         mtx_destroy(&map->system_mtx);
232         sx_destroy(&map->lock);
233 }
234
235 static int
236 vm_map_zinit(void *mem, int size, int flags)
237 {
238         vm_map_t map;
239
240         map = (vm_map_t)mem;
241         map->nentries = 0;
242         map->size = 0;
243         mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
244         sx_init(&map->lock, "user map");
245         return (0);
246 }
247
248 #ifdef INVARIANTS
249 static void
250 vmspace_zdtor(void *mem, int size, void *arg)
251 {
252         struct vmspace *vm;
253
254         vm = (struct vmspace *)mem;
255
256         vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
257 }
258 static void
259 vm_map_zdtor(void *mem, int size, void *arg)
260 {
261         vm_map_t map;
262
263         map = (vm_map_t)mem;
264         KASSERT(map->nentries == 0,
265             ("map %p nentries == %d on free.",
266             map, map->nentries));
267         KASSERT(map->size == 0,
268             ("map %p size == %lu on free.",
269             map, (unsigned long)map->size));
270 }
271 #endif  /* INVARIANTS */
272
273 /*
274  * Allocate a vmspace structure, including a vm_map and pmap,
275  * and initialize those structures.  The refcnt is set to 1.
276  */
277 struct vmspace *
278 vmspace_alloc(min, max)
279         vm_offset_t min, max;
280 {
281         struct vmspace *vm;
282
283         vm = uma_zalloc(vmspace_zone, M_WAITOK);
284         if (vm->vm_map.pmap == NULL && !pmap_pinit(vmspace_pmap(vm))) {
285                 uma_zfree(vmspace_zone, vm);
286                 return (NULL);
287         }
288         CTR1(KTR_VM, "vmspace_alloc: %p", vm);
289         _vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max);
290         vm->vm_refcnt = 1;
291         vm->vm_shm = NULL;
292         vm->vm_swrss = 0;
293         vm->vm_tsize = 0;
294         vm->vm_dsize = 0;
295         vm->vm_ssize = 0;
296         vm->vm_taddr = 0;
297         vm->vm_daddr = 0;
298         vm->vm_maxsaddr = 0;
299         return (vm);
300 }
301
302 void
303 vm_init2(void)
304 {
305         uma_zone_set_obj(kmapentzone, &kmapentobj, lmin(cnt.v_page_count,
306             (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE) / 8 +
307              maxproc * 2 + maxfiles);
308         vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
309 #ifdef INVARIANTS
310             vmspace_zdtor,
311 #else
312             NULL,
313 #endif
314             vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
315 }
316
317 static void
318 vmspace_container_reset(struct proc *p)
319 {
320
321 #ifdef RACCT
322         PROC_LOCK(p);
323         racct_set(p, RACCT_DATA, 0);
324         racct_set(p, RACCT_STACK, 0);
325         racct_set(p, RACCT_RSS, 0);
326         racct_set(p, RACCT_MEMLOCK, 0);
327         racct_set(p, RACCT_VMEM, 0);
328         PROC_UNLOCK(p);
329 #endif
330 }
331
332 static inline void
333 vmspace_dofree(struct vmspace *vm)
334 {
335
336         CTR1(KTR_VM, "vmspace_free: %p", vm);
337
338         /*
339          * Make sure any SysV shm is freed, it might not have been in
340          * exit1().
341          */
342         shmexit(vm);
343
344         /*
345          * Lock the map, to wait out all other references to it.
346          * Delete all of the mappings and pages they hold, then call
347          * the pmap module to reclaim anything left.
348          */
349         (void)vm_map_remove(&vm->vm_map, vm->vm_map.min_offset,
350             vm->vm_map.max_offset);
351
352         pmap_release(vmspace_pmap(vm));
353         vm->vm_map.pmap = NULL;
354         uma_zfree(vmspace_zone, vm);
355 }
356
357 void
358 vmspace_free(struct vmspace *vm)
359 {
360
361         if (vm->vm_refcnt == 0)
362                 panic("vmspace_free: attempt to free already freed vmspace");
363
364         if (atomic_fetchadd_int(&vm->vm_refcnt, -1) == 1)
365                 vmspace_dofree(vm);
366 }
367
368 void
369 vmspace_exitfree(struct proc *p)
370 {
371         struct vmspace *vm;
372
373         PROC_VMSPACE_LOCK(p);
374         vm = p->p_vmspace;
375         p->p_vmspace = NULL;
376         PROC_VMSPACE_UNLOCK(p);
377         KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace"));
378         vmspace_free(vm);
379 }
380
381 void
382 vmspace_exit(struct thread *td)
383 {
384         int refcnt;
385         struct vmspace *vm;
386         struct proc *p;
387
388         /*
389          * Release user portion of address space.
390          * This releases references to vnodes,
391          * which could cause I/O if the file has been unlinked.
392          * Need to do this early enough that we can still sleep.
393          *
394          * The last exiting process to reach this point releases as
395          * much of the environment as it can. vmspace_dofree() is the
396          * slower fallback in case another process had a temporary
397          * reference to the vmspace.
398          */
399
400         p = td->td_proc;
401         vm = p->p_vmspace;
402         atomic_add_int(&vmspace0.vm_refcnt, 1);
403         do {
404                 refcnt = vm->vm_refcnt;
405                 if (refcnt > 1 && p->p_vmspace != &vmspace0) {
406                         /* Switch now since other proc might free vmspace */
407                         PROC_VMSPACE_LOCK(p);
408                         p->p_vmspace = &vmspace0;
409                         PROC_VMSPACE_UNLOCK(p);
410                         pmap_activate(td);
411                 }
412         } while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1));
413         if (refcnt == 1) {
414                 if (p->p_vmspace != vm) {
415                         /* vmspace not yet freed, switch back */
416                         PROC_VMSPACE_LOCK(p);
417                         p->p_vmspace = vm;
418                         PROC_VMSPACE_UNLOCK(p);
419                         pmap_activate(td);
420                 }
421                 pmap_remove_pages(vmspace_pmap(vm));
422                 /* Switch now since this proc will free vmspace */
423                 PROC_VMSPACE_LOCK(p);
424                 p->p_vmspace = &vmspace0;
425                 PROC_VMSPACE_UNLOCK(p);
426                 pmap_activate(td);
427                 vmspace_dofree(vm);
428         }
429         vmspace_container_reset(p);
430 }
431
432 /* Acquire reference to vmspace owned by another process. */
433
434 struct vmspace *
435 vmspace_acquire_ref(struct proc *p)
436 {
437         struct vmspace *vm;
438         int refcnt;
439
440         PROC_VMSPACE_LOCK(p);
441         vm = p->p_vmspace;
442         if (vm == NULL) {
443                 PROC_VMSPACE_UNLOCK(p);
444                 return (NULL);
445         }
446         do {
447                 refcnt = vm->vm_refcnt;
448                 if (refcnt <= 0) {      /* Avoid 0->1 transition */
449                         PROC_VMSPACE_UNLOCK(p);
450                         return (NULL);
451                 }
452         } while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt + 1));
453         if (vm != p->p_vmspace) {
454                 PROC_VMSPACE_UNLOCK(p);
455                 vmspace_free(vm);
456                 return (NULL);
457         }
458         PROC_VMSPACE_UNLOCK(p);
459         return (vm);
460 }
461
462 void
463 _vm_map_lock(vm_map_t map, const char *file, int line)
464 {
465
466         if (map->system_map)
467                 mtx_lock_flags_(&map->system_mtx, 0, file, line);
468         else
469                 sx_xlock_(&map->lock, file, line);
470         map->timestamp++;
471 }
472
473 static void
474 vm_map_process_deferred(void)
475 {
476         struct thread *td;
477         vm_map_entry_t entry;
478
479         td = curthread;
480
481         while ((entry = td->td_map_def_user) != NULL) {
482                 td->td_map_def_user = entry->next;
483                 vm_map_entry_deallocate(entry, FALSE);
484         }
485 }
486
487 void
488 _vm_map_unlock(vm_map_t map, const char *file, int line)
489 {
490
491         if (map->system_map)
492                 mtx_unlock_flags_(&map->system_mtx, 0, file, line);
493         else {
494                 sx_xunlock_(&map->lock, file, line);
495                 vm_map_process_deferred();
496         }
497 }
498
499 void
500 _vm_map_lock_read(vm_map_t map, const char *file, int line)
501 {
502
503         if (map->system_map)
504                 mtx_lock_flags_(&map->system_mtx, 0, file, line);
505         else
506                 sx_slock_(&map->lock, file, line);
507 }
508
509 void
510 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
511 {
512
513         if (map->system_map)
514                 mtx_unlock_flags_(&map->system_mtx, 0, file, line);
515         else {
516                 sx_sunlock_(&map->lock, file, line);
517                 vm_map_process_deferred();
518         }
519 }
520
521 int
522 _vm_map_trylock(vm_map_t map, const char *file, int line)
523 {
524         int error;
525
526         error = map->system_map ?
527             !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
528             !sx_try_xlock_(&map->lock, file, line);
529         if (error == 0)
530                 map->timestamp++;
531         return (error == 0);
532 }
533
534 int
535 _vm_map_trylock_read(vm_map_t map, const char *file, int line)
536 {
537         int error;
538
539         error = map->system_map ?
540             !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
541             !sx_try_slock_(&map->lock, file, line);
542         return (error == 0);
543 }
544
545 /*
546  *      _vm_map_lock_upgrade:   [ internal use only ]
547  *
548  *      Tries to upgrade a read (shared) lock on the specified map to a write
549  *      (exclusive) lock.  Returns the value "0" if the upgrade succeeds and a
550  *      non-zero value if the upgrade fails.  If the upgrade fails, the map is
551  *      returned without a read or write lock held.
552  *
553  *      Requires that the map be read locked.
554  */
555 int
556 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
557 {
558         unsigned int last_timestamp;
559
560         if (map->system_map) {
561                 mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
562         } else {
563                 if (!sx_try_upgrade_(&map->lock, file, line)) {
564                         last_timestamp = map->timestamp;
565                         sx_sunlock_(&map->lock, file, line);
566                         vm_map_process_deferred();
567                         /*
568                          * If the map's timestamp does not change while the
569                          * map is unlocked, then the upgrade succeeds.
570                          */
571                         sx_xlock_(&map->lock, file, line);
572                         if (last_timestamp != map->timestamp) {
573                                 sx_xunlock_(&map->lock, file, line);
574                                 return (1);
575                         }
576                 }
577         }
578         map->timestamp++;
579         return (0);
580 }
581
582 void
583 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
584 {
585
586         if (map->system_map) {
587                 mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
588         } else
589                 sx_downgrade_(&map->lock, file, line);
590 }
591
592 /*
593  *      vm_map_locked:
594  *
595  *      Returns a non-zero value if the caller holds a write (exclusive) lock
596  *      on the specified map and the value "0" otherwise.
597  */
598 int
599 vm_map_locked(vm_map_t map)
600 {
601
602         if (map->system_map)
603                 return (mtx_owned(&map->system_mtx));
604         else
605                 return (sx_xlocked(&map->lock));
606 }
607
608 #ifdef INVARIANTS
609 static void
610 _vm_map_assert_locked(vm_map_t map, const char *file, int line)
611 {
612
613         if (map->system_map)
614                 mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
615         else
616                 sx_assert_(&map->lock, SA_XLOCKED, file, line);
617 }
618
619 #define VM_MAP_ASSERT_LOCKED(map) \
620     _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE)
621 #else
622 #define VM_MAP_ASSERT_LOCKED(map)
623 #endif
624
625 /*
626  *      _vm_map_unlock_and_wait:
627  *
628  *      Atomically releases the lock on the specified map and puts the calling
629  *      thread to sleep.  The calling thread will remain asleep until either
630  *      vm_map_wakeup() is performed on the map or the specified timeout is
631  *      exceeded.
632  *
633  *      WARNING!  This function does not perform deferred deallocations of
634  *      objects and map entries.  Therefore, the calling thread is expected to
635  *      reacquire the map lock after reawakening and later perform an ordinary
636  *      unlock operation, such as vm_map_unlock(), before completing its
637  *      operation on the map.
638  */
639 int
640 _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line)
641 {
642
643         mtx_lock(&map_sleep_mtx);
644         if (map->system_map)
645                 mtx_unlock_flags_(&map->system_mtx, 0, file, line);
646         else
647                 sx_xunlock_(&map->lock, file, line);
648         return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps",
649             timo));
650 }
651
652 /*
653  *      vm_map_wakeup:
654  *
655  *      Awaken any threads that have slept on the map using
656  *      vm_map_unlock_and_wait().
657  */
658 void
659 vm_map_wakeup(vm_map_t map)
660 {
661
662         /*
663          * Acquire and release map_sleep_mtx to prevent a wakeup()
664          * from being performed (and lost) between the map unlock
665          * and the msleep() in _vm_map_unlock_and_wait().
666          */
667         mtx_lock(&map_sleep_mtx);
668         mtx_unlock(&map_sleep_mtx);
669         wakeup(&map->root);
670 }
671
672 void
673 vm_map_busy(vm_map_t map)
674 {
675
676         VM_MAP_ASSERT_LOCKED(map);
677         map->busy++;
678 }
679
680 void
681 vm_map_unbusy(vm_map_t map)
682 {
683
684         VM_MAP_ASSERT_LOCKED(map);
685         KASSERT(map->busy, ("vm_map_unbusy: not busy"));
686         if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) {
687                 vm_map_modflags(map, 0, MAP_BUSY_WAKEUP);
688                 wakeup(&map->busy);
689         }
690 }
691
692 void 
693 vm_map_wait_busy(vm_map_t map)
694 {
695
696         VM_MAP_ASSERT_LOCKED(map);
697         while (map->busy) {
698                 vm_map_modflags(map, MAP_BUSY_WAKEUP, 0);
699                 if (map->system_map)
700                         msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0);
701                 else
702                         sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0);
703         }
704         map->timestamp++;
705 }
706
707 long
708 vmspace_resident_count(struct vmspace *vmspace)
709 {
710         return pmap_resident_count(vmspace_pmap(vmspace));
711 }
712
713 long
714 vmspace_wired_count(struct vmspace *vmspace)
715 {
716         return pmap_wired_count(vmspace_pmap(vmspace));
717 }
718
719 /*
720  *      vm_map_create:
721  *
722  *      Creates and returns a new empty VM map with
723  *      the given physical map structure, and having
724  *      the given lower and upper address bounds.
725  */
726 vm_map_t
727 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
728 {
729         vm_map_t result;
730
731         result = uma_zalloc(mapzone, M_WAITOK);
732         CTR1(KTR_VM, "vm_map_create: %p", result);
733         _vm_map_init(result, pmap, min, max);
734         return (result);
735 }
736
737 /*
738  * Initialize an existing vm_map structure
739  * such as that in the vmspace structure.
740  */
741 static void
742 _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
743 {
744
745         map->header.next = map->header.prev = &map->header;
746         map->needs_wakeup = FALSE;
747         map->system_map = 0;
748         map->pmap = pmap;
749         map->min_offset = min;
750         map->max_offset = max;
751         map->flags = 0;
752         map->root = NULL;
753         map->timestamp = 0;
754         map->busy = 0;
755 }
756
757 void
758 vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
759 {
760
761         _vm_map_init(map, pmap, min, max);
762         mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
763         sx_init(&map->lock, "user map");
764 }
765
766 /*
767  *      vm_map_entry_dispose:   [ internal use only ]
768  *
769  *      Inverse of vm_map_entry_create.
770  */
771 static void
772 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
773 {
774         uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
775 }
776
777 /*
778  *      vm_map_entry_create:    [ internal use only ]
779  *
780  *      Allocates a VM map entry for insertion.
781  *      No entry fields are filled in.
782  */
783 static vm_map_entry_t
784 vm_map_entry_create(vm_map_t map)
785 {
786         vm_map_entry_t new_entry;
787
788         if (map->system_map)
789                 new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
790         else
791                 new_entry = uma_zalloc(mapentzone, M_WAITOK);
792         if (new_entry == NULL)
793                 panic("vm_map_entry_create: kernel resources exhausted");
794         return (new_entry);
795 }
796
797 /*
798  *      vm_map_entry_set_behavior:
799  *
800  *      Set the expected access behavior, either normal, random, or
801  *      sequential.
802  */
803 static inline void
804 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
805 {
806         entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
807             (behavior & MAP_ENTRY_BEHAV_MASK);
808 }
809
810 /*
811  *      vm_map_entry_set_max_free:
812  *
813  *      Set the max_free field in a vm_map_entry.
814  */
815 static inline void
816 vm_map_entry_set_max_free(vm_map_entry_t entry)
817 {
818
819         entry->max_free = entry->adj_free;
820         if (entry->left != NULL && entry->left->max_free > entry->max_free)
821                 entry->max_free = entry->left->max_free;
822         if (entry->right != NULL && entry->right->max_free > entry->max_free)
823                 entry->max_free = entry->right->max_free;
824 }
825
826 /*
827  *      vm_map_entry_splay:
828  *
829  *      The Sleator and Tarjan top-down splay algorithm with the
830  *      following variation.  Max_free must be computed bottom-up, so
831  *      on the downward pass, maintain the left and right spines in
832  *      reverse order.  Then, make a second pass up each side to fix
833  *      the pointers and compute max_free.  The time bound is O(log n)
834  *      amortized.
835  *
836  *      The new root is the vm_map_entry containing "addr", or else an
837  *      adjacent entry (lower or higher) if addr is not in the tree.
838  *
839  *      The map must be locked, and leaves it so.
840  *
841  *      Returns: the new root.
842  */
843 static vm_map_entry_t
844 vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root)
845 {
846         vm_map_entry_t llist, rlist;
847         vm_map_entry_t ltree, rtree;
848         vm_map_entry_t y;
849
850         /* Special case of empty tree. */
851         if (root == NULL)
852                 return (root);
853
854         /*
855          * Pass One: Splay down the tree until we find addr or a NULL
856          * pointer where addr would go.  llist and rlist are the two
857          * sides in reverse order (bottom-up), with llist linked by
858          * the right pointer and rlist linked by the left pointer in
859          * the vm_map_entry.  Wait until Pass Two to set max_free on
860          * the two spines.
861          */
862         llist = NULL;
863         rlist = NULL;
864         for (;;) {
865                 /* root is never NULL in here. */
866                 if (addr < root->start) {
867                         y = root->left;
868                         if (y == NULL)
869                                 break;
870                         if (addr < y->start && y->left != NULL) {
871                                 /* Rotate right and put y on rlist. */
872                                 root->left = y->right;
873                                 y->right = root;
874                                 vm_map_entry_set_max_free(root);
875                                 root = y->left;
876                                 y->left = rlist;
877                                 rlist = y;
878                         } else {
879                                 /* Put root on rlist. */
880                                 root->left = rlist;
881                                 rlist = root;
882                                 root = y;
883                         }
884                 } else if (addr >= root->end) {
885                         y = root->right;
886                         if (y == NULL)
887                                 break;
888                         if (addr >= y->end && y->right != NULL) {
889                                 /* Rotate left and put y on llist. */
890                                 root->right = y->left;
891                                 y->left = root;
892                                 vm_map_entry_set_max_free(root);
893                                 root = y->right;
894                                 y->right = llist;
895                                 llist = y;
896                         } else {
897                                 /* Put root on llist. */
898                                 root->right = llist;
899                                 llist = root;
900                                 root = y;
901                         }
902                 } else
903                         break;
904         }
905
906         /*
907          * Pass Two: Walk back up the two spines, flip the pointers
908          * and set max_free.  The subtrees of the root go at the
909          * bottom of llist and rlist.
910          */
911         ltree = root->left;
912         while (llist != NULL) {
913                 y = llist->right;
914                 llist->right = ltree;
915                 vm_map_entry_set_max_free(llist);
916                 ltree = llist;
917                 llist = y;
918         }
919         rtree = root->right;
920         while (rlist != NULL) {
921                 y = rlist->left;
922                 rlist->left = rtree;
923                 vm_map_entry_set_max_free(rlist);
924                 rtree = rlist;
925                 rlist = y;
926         }
927
928         /*
929          * Final assembly: add ltree and rtree as subtrees of root.
930          */
931         root->left = ltree;
932         root->right = rtree;
933         vm_map_entry_set_max_free(root);
934
935         return (root);
936 }
937
938 /*
939  *      vm_map_entry_{un,}link:
940  *
941  *      Insert/remove entries from maps.
942  */
943 static void
944 vm_map_entry_link(vm_map_t map,
945                   vm_map_entry_t after_where,
946                   vm_map_entry_t entry)
947 {
948
949         CTR4(KTR_VM,
950             "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
951             map->nentries, entry, after_where);
952         VM_MAP_ASSERT_LOCKED(map);
953         map->nentries++;
954         entry->prev = after_where;
955         entry->next = after_where->next;
956         entry->next->prev = entry;
957         after_where->next = entry;
958
959         if (after_where != &map->header) {
960                 if (after_where != map->root)
961                         vm_map_entry_splay(after_where->start, map->root);
962                 entry->right = after_where->right;
963                 entry->left = after_where;
964                 after_where->right = NULL;
965                 after_where->adj_free = entry->start - after_where->end;
966                 vm_map_entry_set_max_free(after_where);
967         } else {
968                 entry->right = map->root;
969                 entry->left = NULL;
970         }
971         entry->adj_free = (entry->next == &map->header ? map->max_offset :
972             entry->next->start) - entry->end;
973         vm_map_entry_set_max_free(entry);
974         map->root = entry;
975 }
976
977 static void
978 vm_map_entry_unlink(vm_map_t map,
979                     vm_map_entry_t entry)
980 {
981         vm_map_entry_t next, prev, root;
982
983         VM_MAP_ASSERT_LOCKED(map);
984         if (entry != map->root)
985                 vm_map_entry_splay(entry->start, map->root);
986         if (entry->left == NULL)
987                 root = entry->right;
988         else {
989                 root = vm_map_entry_splay(entry->start, entry->left);
990                 root->right = entry->right;
991                 root->adj_free = (entry->next == &map->header ? map->max_offset :
992                     entry->next->start) - root->end;
993                 vm_map_entry_set_max_free(root);
994         }
995         map->root = root;
996
997         prev = entry->prev;
998         next = entry->next;
999         next->prev = prev;
1000         prev->next = next;
1001         map->nentries--;
1002         CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
1003             map->nentries, entry);
1004 }
1005
1006 /*
1007  *      vm_map_entry_resize_free:
1008  *
1009  *      Recompute the amount of free space following a vm_map_entry
1010  *      and propagate that value up the tree.  Call this function after
1011  *      resizing a map entry in-place, that is, without a call to
1012  *      vm_map_entry_link() or _unlink().
1013  *
1014  *      The map must be locked, and leaves it so.
1015  */
1016 static void
1017 vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry)
1018 {
1019
1020         /*
1021          * Using splay trees without parent pointers, propagating
1022          * max_free up the tree is done by moving the entry to the
1023          * root and making the change there.
1024          */
1025         if (entry != map->root)
1026                 map->root = vm_map_entry_splay(entry->start, map->root);
1027
1028         entry->adj_free = (entry->next == &map->header ? map->max_offset :
1029             entry->next->start) - entry->end;
1030         vm_map_entry_set_max_free(entry);
1031 }
1032
1033 /*
1034  *      vm_map_lookup_entry:    [ internal use only ]
1035  *
1036  *      Finds the map entry containing (or
1037  *      immediately preceding) the specified address
1038  *      in the given map; the entry is returned
1039  *      in the "entry" parameter.  The boolean
1040  *      result indicates whether the address is
1041  *      actually contained in the map.
1042  */
1043 boolean_t
1044 vm_map_lookup_entry(
1045         vm_map_t map,
1046         vm_offset_t address,
1047         vm_map_entry_t *entry)  /* OUT */
1048 {
1049         vm_map_entry_t cur;
1050         boolean_t locked;
1051
1052         /*
1053          * If the map is empty, then the map entry immediately preceding
1054          * "address" is the map's header.
1055          */
1056         cur = map->root;
1057         if (cur == NULL)
1058                 *entry = &map->header;
1059         else if (address >= cur->start && cur->end > address) {
1060                 *entry = cur;
1061                 return (TRUE);
1062         } else if ((locked = vm_map_locked(map)) ||
1063             sx_try_upgrade(&map->lock)) {
1064                 /*
1065                  * Splay requires a write lock on the map.  However, it only
1066                  * restructures the binary search tree; it does not otherwise
1067                  * change the map.  Thus, the map's timestamp need not change
1068                  * on a temporary upgrade.
1069                  */
1070                 map->root = cur = vm_map_entry_splay(address, cur);
1071                 if (!locked)
1072                         sx_downgrade(&map->lock);
1073
1074                 /*
1075                  * If "address" is contained within a map entry, the new root
1076                  * is that map entry.  Otherwise, the new root is a map entry
1077                  * immediately before or after "address".
1078                  */
1079                 if (address >= cur->start) {
1080                         *entry = cur;
1081                         if (cur->end > address)
1082                                 return (TRUE);
1083                 } else
1084                         *entry = cur->prev;
1085         } else
1086                 /*
1087                  * Since the map is only locked for read access, perform a
1088                  * standard binary search tree lookup for "address".
1089                  */
1090                 for (;;) {
1091                         if (address < cur->start) {
1092                                 if (cur->left == NULL) {
1093                                         *entry = cur->prev;
1094                                         break;
1095                                 }
1096                                 cur = cur->left;
1097                         } else if (cur->end > address) {
1098                                 *entry = cur;
1099                                 return (TRUE);
1100                         } else {
1101                                 if (cur->right == NULL) {
1102                                         *entry = cur;
1103                                         break;
1104                                 }
1105                                 cur = cur->right;
1106                         }
1107                 }
1108         return (FALSE);
1109 }
1110
1111 /*
1112  *      vm_map_insert:
1113  *
1114  *      Inserts the given whole VM object into the target
1115  *      map at the specified address range.  The object's
1116  *      size should match that of the address range.
1117  *
1118  *      Requires that the map be locked, and leaves it so.
1119  *
1120  *      If object is non-NULL, ref count must be bumped by caller
1121  *      prior to making call to account for the new entry.
1122  */
1123 int
1124 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1125               vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
1126               int cow)
1127 {
1128         vm_map_entry_t new_entry;
1129         vm_map_entry_t prev_entry;
1130         vm_map_entry_t temp_entry;
1131         vm_eflags_t protoeflags;
1132         struct ucred *cred;
1133         boolean_t charge_prev_obj;
1134
1135         VM_MAP_ASSERT_LOCKED(map);
1136
1137         /*
1138          * Check that the start and end points are not bogus.
1139          */
1140         if ((start < map->min_offset) || (end > map->max_offset) ||
1141             (start >= end))
1142                 return (KERN_INVALID_ADDRESS);
1143
1144         /*
1145          * Find the entry prior to the proposed starting address; if it's part
1146          * of an existing entry, this range is bogus.
1147          */
1148         if (vm_map_lookup_entry(map, start, &temp_entry))
1149                 return (KERN_NO_SPACE);
1150
1151         prev_entry = temp_entry;
1152
1153         /*
1154          * Assert that the next entry doesn't overlap the end point.
1155          */
1156         if ((prev_entry->next != &map->header) &&
1157             (prev_entry->next->start < end))
1158                 return (KERN_NO_SPACE);
1159
1160         protoeflags = 0;
1161         charge_prev_obj = FALSE;
1162
1163         if (cow & MAP_COPY_ON_WRITE)
1164                 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
1165
1166         if (cow & MAP_NOFAULT) {
1167                 protoeflags |= MAP_ENTRY_NOFAULT;
1168
1169                 KASSERT(object == NULL,
1170                         ("vm_map_insert: paradoxical MAP_NOFAULT request"));
1171         }
1172         if (cow & MAP_DISABLE_SYNCER)
1173                 protoeflags |= MAP_ENTRY_NOSYNC;
1174         if (cow & MAP_DISABLE_COREDUMP)
1175                 protoeflags |= MAP_ENTRY_NOCOREDUMP;
1176
1177         cred = NULL;
1178         KASSERT((object != kmem_object && object != kernel_object) ||
1179             ((object == kmem_object || object == kernel_object) &&
1180                 !(protoeflags & MAP_ENTRY_NEEDS_COPY)),
1181             ("kmem or kernel object and cow"));
1182         if (cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT))
1183                 goto charged;
1184         if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
1185             ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
1186                 if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start))
1187                         return (KERN_RESOURCE_SHORTAGE);
1188                 KASSERT(object == NULL || (protoeflags & MAP_ENTRY_NEEDS_COPY) ||
1189                     object->cred == NULL,
1190                     ("OVERCOMMIT: vm_map_insert o %p", object));
1191                 cred = curthread->td_ucred;
1192                 crhold(cred);
1193                 if (object == NULL && !(protoeflags & MAP_ENTRY_NEEDS_COPY))
1194                         charge_prev_obj = TRUE;
1195         }
1196
1197 charged:
1198         /* Expand the kernel pmap, if necessary. */
1199         if (map == kernel_map && end > kernel_vm_end)
1200                 pmap_growkernel(end);
1201         if (object != NULL) {
1202                 /*
1203                  * OBJ_ONEMAPPING must be cleared unless this mapping
1204                  * is trivially proven to be the only mapping for any
1205                  * of the object's pages.  (Object granularity
1206                  * reference counting is insufficient to recognize
1207                  * aliases with precision.)
1208                  */
1209                 VM_OBJECT_LOCK(object);
1210                 if (object->ref_count > 1 || object->shadow_count != 0)
1211                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
1212                 VM_OBJECT_UNLOCK(object);
1213         }
1214         else if ((prev_entry != &map->header) &&
1215                  (prev_entry->eflags == protoeflags) &&
1216                  (prev_entry->end == start) &&
1217                  (prev_entry->wired_count == 0) &&
1218                  (prev_entry->cred == cred ||
1219                   (prev_entry->object.vm_object != NULL &&
1220                    (prev_entry->object.vm_object->cred == cred))) &&
1221                    vm_object_coalesce(prev_entry->object.vm_object,
1222                        prev_entry->offset,
1223                        (vm_size_t)(prev_entry->end - prev_entry->start),
1224                        (vm_size_t)(end - prev_entry->end), charge_prev_obj)) {
1225                 /*
1226                  * We were able to extend the object.  Determine if we
1227                  * can extend the previous map entry to include the
1228                  * new range as well.
1229                  */
1230                 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
1231                     (prev_entry->protection == prot) &&
1232                     (prev_entry->max_protection == max)) {
1233                         map->size += (end - prev_entry->end);
1234                         prev_entry->end = end;
1235                         vm_map_entry_resize_free(map, prev_entry);
1236                         vm_map_simplify_entry(map, prev_entry);
1237                         if (cred != NULL)
1238                                 crfree(cred);
1239                         return (KERN_SUCCESS);
1240                 }
1241
1242                 /*
1243                  * If we can extend the object but cannot extend the
1244                  * map entry, we have to create a new map entry.  We
1245                  * must bump the ref count on the extended object to
1246                  * account for it.  object may be NULL.
1247                  */
1248                 object = prev_entry->object.vm_object;
1249                 offset = prev_entry->offset +
1250                         (prev_entry->end - prev_entry->start);
1251                 vm_object_reference(object);
1252                 if (cred != NULL && object != NULL && object->cred != NULL &&
1253                     !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
1254                         /* Object already accounts for this uid. */
1255                         crfree(cred);
1256                         cred = NULL;
1257                 }
1258         }
1259
1260         /*
1261          * NOTE: if conditionals fail, object can be NULL here.  This occurs
1262          * in things like the buffer map where we manage kva but do not manage
1263          * backing objects.
1264          */
1265
1266         /*
1267          * Create a new entry
1268          */
1269         new_entry = vm_map_entry_create(map);
1270         new_entry->start = start;
1271         new_entry->end = end;
1272         new_entry->cred = NULL;
1273
1274         new_entry->eflags = protoeflags;
1275         new_entry->object.vm_object = object;
1276         new_entry->offset = offset;
1277         new_entry->avail_ssize = 0;
1278
1279         new_entry->inheritance = VM_INHERIT_DEFAULT;
1280         new_entry->protection = prot;
1281         new_entry->max_protection = max;
1282         new_entry->wired_count = 0;
1283
1284         KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry),
1285             ("OVERCOMMIT: vm_map_insert leaks vm_map %p", new_entry));
1286         new_entry->cred = cred;
1287
1288         /*
1289          * Insert the new entry into the list
1290          */
1291         vm_map_entry_link(map, prev_entry, new_entry);
1292         map->size += new_entry->end - new_entry->start;
1293
1294         /*
1295          * It may be possible to merge the new entry with the next and/or
1296          * previous entries.  However, due to MAP_STACK_* being a hack, a
1297          * panic can result from merging such entries.
1298          */
1299         if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0)
1300                 vm_map_simplify_entry(map, new_entry);
1301
1302         if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
1303                 vm_map_pmap_enter(map, start, prot,
1304                                     object, OFF_TO_IDX(offset), end - start,
1305                                     cow & MAP_PREFAULT_PARTIAL);
1306         }
1307
1308         return (KERN_SUCCESS);
1309 }
1310
1311 /*
1312  *      vm_map_findspace:
1313  *
1314  *      Find the first fit (lowest VM address) for "length" free bytes
1315  *      beginning at address >= start in the given map.
1316  *
1317  *      In a vm_map_entry, "adj_free" is the amount of free space
1318  *      adjacent (higher address) to this entry, and "max_free" is the
1319  *      maximum amount of contiguous free space in its subtree.  This
1320  *      allows finding a free region in one path down the tree, so
1321  *      O(log n) amortized with splay trees.
1322  *
1323  *      The map must be locked, and leaves it so.
1324  *
1325  *      Returns: 0 on success, and starting address in *addr,
1326  *               1 if insufficient space.
1327  */
1328 int
1329 vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length,
1330     vm_offset_t *addr)  /* OUT */
1331 {
1332         vm_map_entry_t entry;
1333         vm_offset_t st;
1334
1335         /*
1336          * Request must fit within min/max VM address and must avoid
1337          * address wrap.
1338          */
1339         if (start < map->min_offset)
1340                 start = map->min_offset;
1341         if (start + length > map->max_offset || start + length < start)
1342                 return (1);
1343
1344         /* Empty tree means wide open address space. */
1345         if (map->root == NULL) {
1346                 *addr = start;
1347                 return (0);
1348         }
1349
1350         /*
1351          * After splay, if start comes before root node, then there
1352          * must be a gap from start to the root.
1353          */
1354         map->root = vm_map_entry_splay(start, map->root);
1355         if (start + length <= map->root->start) {
1356                 *addr = start;
1357                 return (0);
1358         }
1359
1360         /*
1361          * Root is the last node that might begin its gap before
1362          * start, and this is the last comparison where address
1363          * wrap might be a problem.
1364          */
1365         st = (start > map->root->end) ? start : map->root->end;
1366         if (length <= map->root->end + map->root->adj_free - st) {
1367                 *addr = st;
1368                 return (0);
1369         }
1370
1371         /* With max_free, can immediately tell if no solution. */
1372         entry = map->root->right;
1373         if (entry == NULL || length > entry->max_free)
1374                 return (1);
1375
1376         /*
1377          * Search the right subtree in the order: left subtree, root,
1378          * right subtree (first fit).  The previous splay implies that
1379          * all regions in the right subtree have addresses > start.
1380          */
1381         while (entry != NULL) {
1382                 if (entry->left != NULL && entry->left->max_free >= length)
1383                         entry = entry->left;
1384                 else if (entry->adj_free >= length) {
1385                         *addr = entry->end;
1386                         return (0);
1387                 } else
1388                         entry = entry->right;
1389         }
1390
1391         /* Can't get here, so panic if we do. */
1392         panic("vm_map_findspace: max_free corrupt");
1393 }
1394
1395 int
1396 vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1397     vm_offset_t start, vm_size_t length, vm_prot_t prot,
1398     vm_prot_t max, int cow)
1399 {
1400         vm_offset_t end;
1401         int result;
1402
1403         end = start + length;
1404         vm_map_lock(map);
1405         VM_MAP_RANGE_CHECK(map, start, end);
1406         (void) vm_map_delete(map, start, end);
1407         result = vm_map_insert(map, object, offset, start, end, prot,
1408             max, cow);
1409         vm_map_unlock(map);
1410         return (result);
1411 }
1412
1413 /*
1414  *      vm_map_find finds an unallocated region in the target address
1415  *      map with the given length.  The search is defined to be
1416  *      first-fit from the specified address; the region found is
1417  *      returned in the same parameter.
1418  *
1419  *      If object is non-NULL, ref count must be bumped by caller
1420  *      prior to making call to account for the new entry.
1421  */
1422 int
1423 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1424             vm_offset_t *addr,  /* IN/OUT */
1425             vm_size_t length, int find_space, vm_prot_t prot,
1426             vm_prot_t max, int cow)
1427 {
1428         vm_offset_t start;
1429         int result;
1430
1431         start = *addr;
1432         vm_map_lock(map);
1433         do {
1434                 if (find_space != VMFS_NO_SPACE) {
1435                         if (vm_map_findspace(map, start, length, addr)) {
1436                                 vm_map_unlock(map);
1437                                 return (KERN_NO_SPACE);
1438                         }
1439                         switch (find_space) {
1440                         case VMFS_ALIGNED_SPACE:
1441                                 pmap_align_superpage(object, offset, addr,
1442                                     length);
1443                                 break;
1444 #ifdef VMFS_TLB_ALIGNED_SPACE
1445                         case VMFS_TLB_ALIGNED_SPACE:
1446                                 pmap_align_tlb(addr);
1447                                 break;
1448 #endif
1449                         default:
1450                                 break;
1451                         }
1452
1453                         start = *addr;
1454                 }
1455                 result = vm_map_insert(map, object, offset, start, start +
1456                     length, prot, max, cow);
1457         } while (result == KERN_NO_SPACE && (find_space == VMFS_ALIGNED_SPACE
1458 #ifdef VMFS_TLB_ALIGNED_SPACE
1459             || find_space == VMFS_TLB_ALIGNED_SPACE
1460 #endif
1461             ));
1462         vm_map_unlock(map);
1463         return (result);
1464 }
1465
1466 /*
1467  *      vm_map_simplify_entry:
1468  *
1469  *      Simplify the given map entry by merging with either neighbor.  This
1470  *      routine also has the ability to merge with both neighbors.
1471  *
1472  *      The map must be locked.
1473  *
1474  *      This routine guarentees that the passed entry remains valid (though
1475  *      possibly extended).  When merging, this routine may delete one or
1476  *      both neighbors.
1477  */
1478 void
1479 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
1480 {
1481         vm_map_entry_t next, prev;
1482         vm_size_t prevsize, esize;
1483
1484         if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP))
1485                 return;
1486
1487         prev = entry->prev;
1488         if (prev != &map->header) {
1489                 prevsize = prev->end - prev->start;
1490                 if ( (prev->end == entry->start) &&
1491                      (prev->object.vm_object == entry->object.vm_object) &&
1492                      (!prev->object.vm_object ||
1493                         (prev->offset + prevsize == entry->offset)) &&
1494                      (prev->eflags == entry->eflags) &&
1495                      (prev->protection == entry->protection) &&
1496                      (prev->max_protection == entry->max_protection) &&
1497                      (prev->inheritance == entry->inheritance) &&
1498                      (prev->wired_count == entry->wired_count) &&
1499                      (prev->cred == entry->cred)) {
1500                         vm_map_entry_unlink(map, prev);
1501                         entry->start = prev->start;
1502                         entry->offset = prev->offset;
1503                         if (entry->prev != &map->header)
1504                                 vm_map_entry_resize_free(map, entry->prev);
1505
1506                         /*
1507                          * If the backing object is a vnode object,
1508                          * vm_object_deallocate() calls vrele().
1509                          * However, vrele() does not lock the vnode
1510                          * because the vnode has additional
1511                          * references.  Thus, the map lock can be kept
1512                          * without causing a lock-order reversal with
1513                          * the vnode lock.
1514                          */
1515                         if (prev->object.vm_object)
1516                                 vm_object_deallocate(prev->object.vm_object);
1517                         if (prev->cred != NULL)
1518                                 crfree(prev->cred);
1519                         vm_map_entry_dispose(map, prev);
1520                 }
1521         }
1522
1523         next = entry->next;
1524         if (next != &map->header) {
1525                 esize = entry->end - entry->start;
1526                 if ((entry->end == next->start) &&
1527                     (next->object.vm_object == entry->object.vm_object) &&
1528                      (!entry->object.vm_object ||
1529                         (entry->offset + esize == next->offset)) &&
1530                     (next->eflags == entry->eflags) &&
1531                     (next->protection == entry->protection) &&
1532                     (next->max_protection == entry->max_protection) &&
1533                     (next->inheritance == entry->inheritance) &&
1534                     (next->wired_count == entry->wired_count) &&
1535                     (next->cred == entry->cred)) {
1536                         vm_map_entry_unlink(map, next);
1537                         entry->end = next->end;
1538                         vm_map_entry_resize_free(map, entry);
1539
1540                         /*
1541                          * See comment above.
1542                          */
1543                         if (next->object.vm_object)
1544                                 vm_object_deallocate(next->object.vm_object);
1545                         if (next->cred != NULL)
1546                                 crfree(next->cred);
1547                         vm_map_entry_dispose(map, next);
1548                 }
1549         }
1550 }
1551 /*
1552  *      vm_map_clip_start:      [ internal use only ]
1553  *
1554  *      Asserts that the given entry begins at or after
1555  *      the specified address; if necessary,
1556  *      it splits the entry into two.
1557  */
1558 #define vm_map_clip_start(map, entry, startaddr) \
1559 { \
1560         if (startaddr > entry->start) \
1561                 _vm_map_clip_start(map, entry, startaddr); \
1562 }
1563
1564 /*
1565  *      This routine is called only when it is known that
1566  *      the entry must be split.
1567  */
1568 static void
1569 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
1570 {
1571         vm_map_entry_t new_entry;
1572
1573         VM_MAP_ASSERT_LOCKED(map);
1574
1575         /*
1576          * Split off the front portion -- note that we must insert the new
1577          * entry BEFORE this one, so that this entry has the specified
1578          * starting address.
1579          */
1580         vm_map_simplify_entry(map, entry);
1581
1582         /*
1583          * If there is no object backing this entry, we might as well create
1584          * one now.  If we defer it, an object can get created after the map
1585          * is clipped, and individual objects will be created for the split-up
1586          * map.  This is a bit of a hack, but is also about the best place to
1587          * put this improvement.
1588          */
1589         if (entry->object.vm_object == NULL && !map->system_map) {
1590                 vm_object_t object;
1591                 object = vm_object_allocate(OBJT_DEFAULT,
1592                                 atop(entry->end - entry->start));
1593                 entry->object.vm_object = object;
1594                 entry->offset = 0;
1595                 if (entry->cred != NULL) {
1596                         object->cred = entry->cred;
1597                         object->charge = entry->end - entry->start;
1598                         entry->cred = NULL;
1599                 }
1600         } else if (entry->object.vm_object != NULL &&
1601                    ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
1602                    entry->cred != NULL) {
1603                 VM_OBJECT_LOCK(entry->object.vm_object);
1604                 KASSERT(entry->object.vm_object->cred == NULL,
1605                     ("OVERCOMMIT: vm_entry_clip_start: both cred e %p", entry));
1606                 entry->object.vm_object->cred = entry->cred;
1607                 entry->object.vm_object->charge = entry->end - entry->start;
1608                 VM_OBJECT_UNLOCK(entry->object.vm_object);
1609                 entry->cred = NULL;
1610         }
1611
1612         new_entry = vm_map_entry_create(map);
1613         *new_entry = *entry;
1614
1615         new_entry->end = start;
1616         entry->offset += (start - entry->start);
1617         entry->start = start;
1618         if (new_entry->cred != NULL)
1619                 crhold(entry->cred);
1620
1621         vm_map_entry_link(map, entry->prev, new_entry);
1622
1623         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1624                 vm_object_reference(new_entry->object.vm_object);
1625         }
1626 }
1627
1628 /*
1629  *      vm_map_clip_end:        [ internal use only ]
1630  *
1631  *      Asserts that the given entry ends at or before
1632  *      the specified address; if necessary,
1633  *      it splits the entry into two.
1634  */
1635 #define vm_map_clip_end(map, entry, endaddr) \
1636 { \
1637         if ((endaddr) < (entry->end)) \
1638                 _vm_map_clip_end((map), (entry), (endaddr)); \
1639 }
1640
1641 /*
1642  *      This routine is called only when it is known that
1643  *      the entry must be split.
1644  */
1645 static void
1646 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1647 {
1648         vm_map_entry_t new_entry;
1649
1650         VM_MAP_ASSERT_LOCKED(map);
1651
1652         /*
1653          * If there is no object backing this entry, we might as well create
1654          * one now.  If we defer it, an object can get created after the map
1655          * is clipped, and individual objects will be created for the split-up
1656          * map.  This is a bit of a hack, but is also about the best place to
1657          * put this improvement.
1658          */
1659         if (entry->object.vm_object == NULL && !map->system_map) {
1660                 vm_object_t object;
1661                 object = vm_object_allocate(OBJT_DEFAULT,
1662                                 atop(entry->end - entry->start));
1663                 entry->object.vm_object = object;
1664                 entry->offset = 0;
1665                 if (entry->cred != NULL) {
1666                         object->cred = entry->cred;
1667                         object->charge = entry->end - entry->start;
1668                         entry->cred = NULL;
1669                 }
1670         } else if (entry->object.vm_object != NULL &&
1671                    ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
1672                    entry->cred != NULL) {
1673                 VM_OBJECT_LOCK(entry->object.vm_object);
1674                 KASSERT(entry->object.vm_object->cred == NULL,
1675                     ("OVERCOMMIT: vm_entry_clip_end: both cred e %p", entry));
1676                 entry->object.vm_object->cred = entry->cred;
1677                 entry->object.vm_object->charge = entry->end - entry->start;
1678                 VM_OBJECT_UNLOCK(entry->object.vm_object);
1679                 entry->cred = NULL;
1680         }
1681
1682         /*
1683          * Create a new entry and insert it AFTER the specified entry
1684          */
1685         new_entry = vm_map_entry_create(map);
1686         *new_entry = *entry;
1687
1688         new_entry->start = entry->end = end;
1689         new_entry->offset += (end - entry->start);
1690         if (new_entry->cred != NULL)
1691                 crhold(entry->cred);
1692
1693         vm_map_entry_link(map, entry, new_entry);
1694
1695         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1696                 vm_object_reference(new_entry->object.vm_object);
1697         }
1698 }
1699
1700 /*
1701  *      vm_map_submap:          [ kernel use only ]
1702  *
1703  *      Mark the given range as handled by a subordinate map.
1704  *
1705  *      This range must have been created with vm_map_find,
1706  *      and no other operations may have been performed on this
1707  *      range prior to calling vm_map_submap.
1708  *
1709  *      Only a limited number of operations can be performed
1710  *      within this rage after calling vm_map_submap:
1711  *              vm_fault
1712  *      [Don't try vm_map_copy!]
1713  *
1714  *      To remove a submapping, one must first remove the
1715  *      range from the superior map, and then destroy the
1716  *      submap (if desired).  [Better yet, don't try it.]
1717  */
1718 int
1719 vm_map_submap(
1720         vm_map_t map,
1721         vm_offset_t start,
1722         vm_offset_t end,
1723         vm_map_t submap)
1724 {
1725         vm_map_entry_t entry;
1726         int result = KERN_INVALID_ARGUMENT;
1727
1728         vm_map_lock(map);
1729
1730         VM_MAP_RANGE_CHECK(map, start, end);
1731
1732         if (vm_map_lookup_entry(map, start, &entry)) {
1733                 vm_map_clip_start(map, entry, start);
1734         } else
1735                 entry = entry->next;
1736
1737         vm_map_clip_end(map, entry, end);
1738
1739         if ((entry->start == start) && (entry->end == end) &&
1740             ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1741             (entry->object.vm_object == NULL)) {
1742                 entry->object.sub_map = submap;
1743                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1744                 result = KERN_SUCCESS;
1745         }
1746         vm_map_unlock(map);
1747
1748         return (result);
1749 }
1750
1751 /*
1752  * The maximum number of pages to map
1753  */
1754 #define MAX_INIT_PT     96
1755
1756 /*
1757  *      vm_map_pmap_enter:
1758  *
1759  *      Preload read-only mappings for the given object's resident pages into
1760  *      the given map.  This eliminates the soft faults on process startup and
1761  *      immediately after an mmap(2).  Because these are speculative mappings,
1762  *      cached pages are not reactivated and mapped.
1763  */
1764 void
1765 vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
1766     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags)
1767 {
1768         vm_offset_t start;
1769         vm_page_t p, p_start;
1770         vm_pindex_t psize, tmpidx;
1771
1772         if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL)
1773                 return;
1774         VM_OBJECT_LOCK(object);
1775         if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
1776                 pmap_object_init_pt(map->pmap, addr, object, pindex, size);
1777                 goto unlock_return;
1778         }
1779
1780         psize = atop(size);
1781
1782         if ((flags & MAP_PREFAULT_PARTIAL) && psize > MAX_INIT_PT &&
1783             object->resident_page_count > MAX_INIT_PT)
1784                 goto unlock_return;
1785
1786         if (psize + pindex > object->size) {
1787                 if (object->size < pindex)
1788                         goto unlock_return;
1789                 psize = object->size - pindex;
1790         }
1791
1792         start = 0;
1793         p_start = NULL;
1794
1795         p = vm_page_find_least(object, pindex);
1796         /*
1797          * Assert: the variable p is either (1) the page with the
1798          * least pindex greater than or equal to the parameter pindex
1799          * or (2) NULL.
1800          */
1801         for (;
1802              p != NULL && (tmpidx = p->pindex - pindex) < psize;
1803              p = TAILQ_NEXT(p, listq)) {
1804                 /*
1805                  * don't allow an madvise to blow away our really
1806                  * free pages allocating pv entries.
1807                  */
1808                 if ((flags & MAP_PREFAULT_MADVISE) &&
1809                     cnt.v_free_count < cnt.v_free_reserved) {
1810                         psize = tmpidx;
1811                         break;
1812                 }
1813                 if (p->valid == VM_PAGE_BITS_ALL) {
1814                         if (p_start == NULL) {
1815                                 start = addr + ptoa(tmpidx);
1816                                 p_start = p;
1817                         }
1818                 } else if (p_start != NULL) {
1819                         pmap_enter_object(map->pmap, start, addr +
1820                             ptoa(tmpidx), p_start, prot);
1821                         p_start = NULL;
1822                 }
1823         }
1824         if (p_start != NULL)
1825                 pmap_enter_object(map->pmap, start, addr + ptoa(psize),
1826                     p_start, prot);
1827 unlock_return:
1828         VM_OBJECT_UNLOCK(object);
1829 }
1830
1831 /*
1832  *      vm_map_protect:
1833  *
1834  *      Sets the protection of the specified address
1835  *      region in the target map.  If "set_max" is
1836  *      specified, the maximum protection is to be set;
1837  *      otherwise, only the current protection is affected.
1838  */
1839 int
1840 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1841                vm_prot_t new_prot, boolean_t set_max)
1842 {
1843         vm_map_entry_t current, entry;
1844         vm_object_t obj;
1845         struct ucred *cred;
1846         vm_prot_t old_prot;
1847
1848         vm_map_lock(map);
1849
1850         VM_MAP_RANGE_CHECK(map, start, end);
1851
1852         if (vm_map_lookup_entry(map, start, &entry)) {
1853                 vm_map_clip_start(map, entry, start);
1854         } else {
1855                 entry = entry->next;
1856         }
1857
1858         /*
1859          * Make a first pass to check for protection violations.
1860          */
1861         current = entry;
1862         while ((current != &map->header) && (current->start < end)) {
1863                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1864                         vm_map_unlock(map);
1865                         return (KERN_INVALID_ARGUMENT);
1866                 }
1867                 if ((new_prot & current->max_protection) != new_prot) {
1868                         vm_map_unlock(map);
1869                         return (KERN_PROTECTION_FAILURE);
1870                 }
1871                 current = current->next;
1872         }
1873
1874
1875         /*
1876          * Do an accounting pass for private read-only mappings that
1877          * now will do cow due to allowed write (e.g. debugger sets
1878          * breakpoint on text segment)
1879          */
1880         for (current = entry; (current != &map->header) &&
1881              (current->start < end); current = current->next) {
1882
1883                 vm_map_clip_end(map, current, end);
1884
1885                 if (set_max ||
1886                     ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 ||
1887                     ENTRY_CHARGED(current)) {
1888                         continue;
1889                 }
1890
1891                 cred = curthread->td_ucred;
1892                 obj = current->object.vm_object;
1893
1894                 if (obj == NULL || (current->eflags & MAP_ENTRY_NEEDS_COPY)) {
1895                         if (!swap_reserve(current->end - current->start)) {
1896                                 vm_map_unlock(map);
1897                                 return (KERN_RESOURCE_SHORTAGE);
1898                         }
1899                         crhold(cred);
1900                         current->cred = cred;
1901                         continue;
1902                 }
1903
1904                 VM_OBJECT_LOCK(obj);
1905                 if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP) {
1906                         VM_OBJECT_UNLOCK(obj);
1907                         continue;
1908                 }
1909
1910                 /*
1911                  * Charge for the whole object allocation now, since
1912                  * we cannot distinguish between non-charged and
1913                  * charged clipped mapping of the same object later.
1914                  */
1915                 KASSERT(obj->charge == 0,
1916                     ("vm_map_protect: object %p overcharged\n", obj));
1917                 if (!swap_reserve(ptoa(obj->size))) {
1918                         VM_OBJECT_UNLOCK(obj);
1919                         vm_map_unlock(map);
1920                         return (KERN_RESOURCE_SHORTAGE);
1921                 }
1922
1923                 crhold(cred);
1924                 obj->cred = cred;
1925                 obj->charge = ptoa(obj->size);
1926                 VM_OBJECT_UNLOCK(obj);
1927         }
1928
1929         /*
1930          * Go back and fix up protections. [Note that clipping is not
1931          * necessary the second time.]
1932          */
1933         current = entry;
1934         while ((current != &map->header) && (current->start < end)) {
1935                 old_prot = current->protection;
1936
1937                 if (set_max)
1938                         current->protection =
1939                             (current->max_protection = new_prot) &
1940                             old_prot;
1941                 else
1942                         current->protection = new_prot;
1943
1944                 if ((current->eflags & (MAP_ENTRY_COW | MAP_ENTRY_USER_WIRED))
1945                      == (MAP_ENTRY_COW | MAP_ENTRY_USER_WIRED) &&
1946                     (current->protection & VM_PROT_WRITE) != 0 &&
1947                     (old_prot & VM_PROT_WRITE) == 0) {
1948                         vm_fault_copy_entry(map, map, current, current, NULL);
1949                 }
1950
1951                 /*
1952                  * When restricting access, update the physical map.  Worry
1953                  * about copy-on-write here.
1954                  */
1955                 if ((old_prot & ~current->protection) != 0) {
1956 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1957                                                         VM_PROT_ALL)
1958                         pmap_protect(map->pmap, current->start,
1959                             current->end,
1960                             current->protection & MASK(current));
1961 #undef  MASK
1962                 }
1963                 vm_map_simplify_entry(map, current);
1964                 current = current->next;
1965         }
1966         vm_map_unlock(map);
1967         return (KERN_SUCCESS);
1968 }
1969
1970 /*
1971  *      vm_map_madvise:
1972  *
1973  *      This routine traverses a processes map handling the madvise
1974  *      system call.  Advisories are classified as either those effecting
1975  *      the vm_map_entry structure, or those effecting the underlying
1976  *      objects.
1977  */
1978 int
1979 vm_map_madvise(
1980         vm_map_t map,
1981         vm_offset_t start,
1982         vm_offset_t end,
1983         int behav)
1984 {
1985         vm_map_entry_t current, entry;
1986         int modify_map = 0;
1987
1988         /*
1989          * Some madvise calls directly modify the vm_map_entry, in which case
1990          * we need to use an exclusive lock on the map and we need to perform
1991          * various clipping operations.  Otherwise we only need a read-lock
1992          * on the map.
1993          */
1994         switch(behav) {
1995         case MADV_NORMAL:
1996         case MADV_SEQUENTIAL:
1997         case MADV_RANDOM:
1998         case MADV_NOSYNC:
1999         case MADV_AUTOSYNC:
2000         case MADV_NOCORE:
2001         case MADV_CORE:
2002                 modify_map = 1;
2003                 vm_map_lock(map);
2004                 break;
2005         case MADV_WILLNEED:
2006         case MADV_DONTNEED:
2007         case MADV_FREE:
2008                 vm_map_lock_read(map);
2009                 break;
2010         default:
2011                 return (KERN_INVALID_ARGUMENT);
2012         }
2013
2014         /*
2015          * Locate starting entry and clip if necessary.
2016          */
2017         VM_MAP_RANGE_CHECK(map, start, end);
2018
2019         if (vm_map_lookup_entry(map, start, &entry)) {
2020                 if (modify_map)
2021                         vm_map_clip_start(map, entry, start);
2022         } else {
2023                 entry = entry->next;
2024         }
2025
2026         if (modify_map) {
2027                 /*
2028                  * madvise behaviors that are implemented in the vm_map_entry.
2029                  *
2030                  * We clip the vm_map_entry so that behavioral changes are
2031                  * limited to the specified address range.
2032                  */
2033                 for (current = entry;
2034                      (current != &map->header) && (current->start < end);
2035                      current = current->next
2036                 ) {
2037                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
2038                                 continue;
2039
2040                         vm_map_clip_end(map, current, end);
2041
2042                         switch (behav) {
2043                         case MADV_NORMAL:
2044                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
2045                                 break;
2046                         case MADV_SEQUENTIAL:
2047                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
2048                                 break;
2049                         case MADV_RANDOM:
2050                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
2051                                 break;
2052                         case MADV_NOSYNC:
2053                                 current->eflags |= MAP_ENTRY_NOSYNC;
2054                                 break;
2055                         case MADV_AUTOSYNC:
2056                                 current->eflags &= ~MAP_ENTRY_NOSYNC;
2057                                 break;
2058                         case MADV_NOCORE:
2059                                 current->eflags |= MAP_ENTRY_NOCOREDUMP;
2060                                 break;
2061                         case MADV_CORE:
2062                                 current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
2063                                 break;
2064                         default:
2065                                 break;
2066                         }
2067                         vm_map_simplify_entry(map, current);
2068                 }
2069                 vm_map_unlock(map);
2070         } else {
2071                 vm_pindex_t pindex;
2072                 int count;
2073
2074                 /*
2075                  * madvise behaviors that are implemented in the underlying
2076                  * vm_object.
2077                  *
2078                  * Since we don't clip the vm_map_entry, we have to clip
2079                  * the vm_object pindex and count.
2080                  */
2081                 for (current = entry;
2082                      (current != &map->header) && (current->start < end);
2083                      current = current->next
2084                 ) {
2085                         vm_offset_t useStart;
2086
2087                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
2088                                 continue;
2089
2090                         pindex = OFF_TO_IDX(current->offset);
2091                         count = atop(current->end - current->start);
2092                         useStart = current->start;
2093
2094                         if (current->start < start) {
2095                                 pindex += atop(start - current->start);
2096                                 count -= atop(start - current->start);
2097                                 useStart = start;
2098                         }
2099                         if (current->end > end)
2100                                 count -= atop(current->end - end);
2101
2102                         if (count <= 0)
2103                                 continue;
2104
2105                         vm_object_madvise(current->object.vm_object,
2106                                           pindex, count, behav);
2107                         if (behav == MADV_WILLNEED) {
2108                                 vm_map_pmap_enter(map,
2109                                     useStart,
2110                                     current->protection,
2111                                     current->object.vm_object,
2112                                     pindex,
2113                                     (count << PAGE_SHIFT),
2114                                     MAP_PREFAULT_MADVISE
2115                                 );
2116                         }
2117                 }
2118                 vm_map_unlock_read(map);
2119         }
2120         return (0);
2121 }
2122
2123
2124 /*
2125  *      vm_map_inherit:
2126  *
2127  *      Sets the inheritance of the specified address
2128  *      range in the target map.  Inheritance
2129  *      affects how the map will be shared with
2130  *      child maps at the time of vmspace_fork.
2131  */
2132 int
2133 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
2134                vm_inherit_t new_inheritance)
2135 {
2136         vm_map_entry_t entry;
2137         vm_map_entry_t temp_entry;
2138
2139         switch (new_inheritance) {
2140         case VM_INHERIT_NONE:
2141         case VM_INHERIT_COPY:
2142         case VM_INHERIT_SHARE:
2143                 break;
2144         default:
2145                 return (KERN_INVALID_ARGUMENT);
2146         }
2147         vm_map_lock(map);
2148         VM_MAP_RANGE_CHECK(map, start, end);
2149         if (vm_map_lookup_entry(map, start, &temp_entry)) {
2150                 entry = temp_entry;
2151                 vm_map_clip_start(map, entry, start);
2152         } else
2153                 entry = temp_entry->next;
2154         while ((entry != &map->header) && (entry->start < end)) {
2155                 vm_map_clip_end(map, entry, end);
2156                 entry->inheritance = new_inheritance;
2157                 vm_map_simplify_entry(map, entry);
2158                 entry = entry->next;
2159         }
2160         vm_map_unlock(map);
2161         return (KERN_SUCCESS);
2162 }
2163
2164 /*
2165  *      vm_map_unwire:
2166  *
2167  *      Implements both kernel and user unwiring.
2168  */
2169 int
2170 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
2171     int flags)
2172 {
2173         vm_map_entry_t entry, first_entry, tmp_entry;
2174         vm_offset_t saved_start;
2175         unsigned int last_timestamp;
2176         int rv;
2177         boolean_t need_wakeup, result, user_unwire;
2178
2179         user_unwire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
2180         vm_map_lock(map);
2181         VM_MAP_RANGE_CHECK(map, start, end);
2182         if (!vm_map_lookup_entry(map, start, &first_entry)) {
2183                 if (flags & VM_MAP_WIRE_HOLESOK)
2184                         first_entry = first_entry->next;
2185                 else {
2186                         vm_map_unlock(map);
2187                         return (KERN_INVALID_ADDRESS);
2188                 }
2189         }
2190         last_timestamp = map->timestamp;
2191         entry = first_entry;
2192         while (entry != &map->header && entry->start < end) {
2193                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
2194                         /*
2195                          * We have not yet clipped the entry.
2196                          */
2197                         saved_start = (start >= entry->start) ? start :
2198                             entry->start;
2199                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2200                         if (vm_map_unlock_and_wait(map, 0)) {
2201                                 /*
2202                                  * Allow interruption of user unwiring?
2203                                  */
2204                         }
2205                         vm_map_lock(map);
2206                         if (last_timestamp+1 != map->timestamp) {
2207                                 /*
2208                                  * Look again for the entry because the map was
2209                                  * modified while it was unlocked.
2210                                  * Specifically, the entry may have been
2211                                  * clipped, merged, or deleted.
2212                                  */
2213                                 if (!vm_map_lookup_entry(map, saved_start,
2214                                     &tmp_entry)) {
2215                                         if (flags & VM_MAP_WIRE_HOLESOK)
2216                                                 tmp_entry = tmp_entry->next;
2217                                         else {
2218                                                 if (saved_start == start) {
2219                                                         /*
2220                                                          * First_entry has been deleted.
2221                                                          */
2222                                                         vm_map_unlock(map);
2223                                                         return (KERN_INVALID_ADDRESS);
2224                                                 }
2225                                                 end = saved_start;
2226                                                 rv = KERN_INVALID_ADDRESS;
2227                                                 goto done;
2228                                         }
2229                                 }
2230                                 if (entry == first_entry)
2231                                         first_entry = tmp_entry;
2232                                 else
2233                                         first_entry = NULL;
2234                                 entry = tmp_entry;
2235                         }
2236                         last_timestamp = map->timestamp;
2237                         continue;
2238                 }
2239                 vm_map_clip_start(map, entry, start);
2240                 vm_map_clip_end(map, entry, end);
2241                 /*
2242                  * Mark the entry in case the map lock is released.  (See
2243                  * above.)
2244                  */
2245                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
2246                 /*
2247                  * Check the map for holes in the specified region.
2248                  * If VM_MAP_WIRE_HOLESOK was specified, skip this check.
2249                  */
2250                 if (((flags & VM_MAP_WIRE_HOLESOK) == 0) &&
2251                     (entry->end < end && (entry->next == &map->header ||
2252                     entry->next->start > entry->end))) {
2253                         end = entry->end;
2254                         rv = KERN_INVALID_ADDRESS;
2255                         goto done;
2256                 }
2257                 /*
2258                  * If system unwiring, require that the entry is system wired.
2259                  */
2260                 if (!user_unwire &&
2261                     vm_map_entry_system_wired_count(entry) == 0) {
2262                         end = entry->end;
2263                         rv = KERN_INVALID_ARGUMENT;
2264                         goto done;
2265                 }
2266                 entry = entry->next;
2267         }
2268         rv = KERN_SUCCESS;
2269 done:
2270         need_wakeup = FALSE;
2271         if (first_entry == NULL) {
2272                 result = vm_map_lookup_entry(map, start, &first_entry);
2273                 if (!result && (flags & VM_MAP_WIRE_HOLESOK))
2274                         first_entry = first_entry->next;
2275                 else
2276                         KASSERT(result, ("vm_map_unwire: lookup failed"));
2277         }
2278         entry = first_entry;
2279         while (entry != &map->header && entry->start < end) {
2280                 if (rv == KERN_SUCCESS && (!user_unwire ||
2281                     (entry->eflags & MAP_ENTRY_USER_WIRED))) {
2282                         if (user_unwire)
2283                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2284                         entry->wired_count--;
2285                         if (entry->wired_count == 0) {
2286                                 /*
2287                                  * Retain the map lock.
2288                                  */
2289                                 vm_fault_unwire(map, entry->start, entry->end,
2290                                     entry->object.vm_object != NULL &&
2291                                     (entry->object.vm_object->type == OBJT_DEVICE ||
2292                                     entry->object.vm_object->type == OBJT_SG));
2293                         }
2294                 }
2295                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
2296                         ("vm_map_unwire: in-transition flag missing"));
2297                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
2298                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
2299                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
2300                         need_wakeup = TRUE;
2301                 }
2302                 vm_map_simplify_entry(map, entry);
2303                 entry = entry->next;
2304         }
2305         vm_map_unlock(map);
2306         if (need_wakeup)
2307                 vm_map_wakeup(map);
2308         return (rv);
2309 }
2310
2311 /*
2312  *      vm_map_wire:
2313  *
2314  *      Implements both kernel and user wiring.
2315  */
2316 int
2317 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
2318     int flags)
2319 {
2320         vm_map_entry_t entry, first_entry, tmp_entry;
2321         vm_offset_t saved_end, saved_start;
2322         unsigned int last_timestamp;
2323         int rv;
2324         boolean_t fictitious, need_wakeup, result, user_wire;
2325         vm_prot_t prot;
2326
2327         prot = 0;
2328         if (flags & VM_MAP_WIRE_WRITE)
2329                 prot |= VM_PROT_WRITE;
2330         user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
2331         vm_map_lock(map);
2332         VM_MAP_RANGE_CHECK(map, start, end);
2333         if (!vm_map_lookup_entry(map, start, &first_entry)) {
2334                 if (flags & VM_MAP_WIRE_HOLESOK)
2335                         first_entry = first_entry->next;
2336                 else {
2337                         vm_map_unlock(map);
2338                         return (KERN_INVALID_ADDRESS);
2339                 }
2340         }
2341         last_timestamp = map->timestamp;
2342         entry = first_entry;
2343         while (entry != &map->header && entry->start < end) {
2344                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
2345                         /*
2346                          * We have not yet clipped the entry.
2347                          */
2348                         saved_start = (start >= entry->start) ? start :
2349                             entry->start;
2350                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2351                         if (vm_map_unlock_and_wait(map, 0)) {
2352                                 /*
2353                                  * Allow interruption of user wiring?
2354                                  */
2355                         }
2356                         vm_map_lock(map);
2357                         if (last_timestamp + 1 != map->timestamp) {
2358                                 /*
2359                                  * Look again for the entry because the map was
2360                                  * modified while it was unlocked.
2361                                  * Specifically, the entry may have been
2362                                  * clipped, merged, or deleted.
2363                                  */
2364                                 if (!vm_map_lookup_entry(map, saved_start,
2365                                     &tmp_entry)) {
2366                                         if (flags & VM_MAP_WIRE_HOLESOK)
2367                                                 tmp_entry = tmp_entry->next;
2368                                         else {
2369                                                 if (saved_start == start) {
2370                                                         /*
2371                                                          * first_entry has been deleted.
2372                                                          */
2373                                                         vm_map_unlock(map);
2374                                                         return (KERN_INVALID_ADDRESS);
2375                                                 }
2376                                                 end = saved_start;
2377                                                 rv = KERN_INVALID_ADDRESS;
2378                                                 goto done;
2379                                         }
2380                                 }
2381                                 if (entry == first_entry)
2382                                         first_entry = tmp_entry;
2383                                 else
2384                                         first_entry = NULL;
2385                                 entry = tmp_entry;
2386                         }
2387                         last_timestamp = map->timestamp;
2388                         continue;
2389                 }
2390                 vm_map_clip_start(map, entry, start);
2391                 vm_map_clip_end(map, entry, end);
2392                 /*
2393                  * Mark the entry in case the map lock is released.  (See
2394                  * above.)
2395                  */
2396                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
2397                 if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0
2398                     || (entry->protection & prot) != prot) {
2399                         entry->eflags |= MAP_ENTRY_WIRE_SKIPPED;
2400                         if ((flags & VM_MAP_WIRE_HOLESOK) == 0) {
2401                                 end = entry->end;
2402                                 rv = KERN_INVALID_ADDRESS;
2403                                 goto done;
2404                         }
2405                         goto next_entry;
2406                 }
2407                 if (entry->wired_count == 0) {
2408                         entry->wired_count++;
2409                         saved_start = entry->start;
2410                         saved_end = entry->end;
2411                         fictitious = entry->object.vm_object != NULL &&
2412                             (entry->object.vm_object->type == OBJT_DEVICE ||
2413                             entry->object.vm_object->type == OBJT_SG);
2414                         /*
2415                          * Release the map lock, relying on the in-transition
2416                          * mark.  Mark the map busy for fork.
2417                          */
2418                         vm_map_busy(map);
2419                         vm_map_unlock(map);
2420                         rv = vm_fault_wire(map, saved_start, saved_end,
2421                             fictitious);
2422                         vm_map_lock(map);
2423                         vm_map_unbusy(map);
2424                         if (last_timestamp + 1 != map->timestamp) {
2425                                 /*
2426                                  * Look again for the entry because the map was
2427                                  * modified while it was unlocked.  The entry
2428                                  * may have been clipped, but NOT merged or
2429                                  * deleted.
2430                                  */
2431                                 result = vm_map_lookup_entry(map, saved_start,
2432                                     &tmp_entry);
2433                                 KASSERT(result, ("vm_map_wire: lookup failed"));
2434                                 if (entry == first_entry)
2435                                         first_entry = tmp_entry;
2436                                 else
2437                                         first_entry = NULL;
2438                                 entry = tmp_entry;
2439                                 while (entry->end < saved_end) {
2440                                         if (rv != KERN_SUCCESS) {
2441                                                 KASSERT(entry->wired_count == 1,
2442                                                     ("vm_map_wire: bad count"));
2443                                                 entry->wired_count = -1;
2444                                         }
2445                                         entry = entry->next;
2446                                 }
2447                         }
2448                         last_timestamp = map->timestamp;
2449                         if (rv != KERN_SUCCESS) {
2450                                 KASSERT(entry->wired_count == 1,
2451                                     ("vm_map_wire: bad count"));
2452                                 /*
2453                                  * Assign an out-of-range value to represent
2454                                  * the failure to wire this entry.
2455                                  */
2456                                 entry->wired_count = -1;
2457                                 end = entry->end;
2458                                 goto done;
2459                         }
2460                 } else if (!user_wire ||
2461                            (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
2462                         entry->wired_count++;
2463                 }
2464                 /*
2465                  * Check the map for holes in the specified region.
2466                  * If VM_MAP_WIRE_HOLESOK was specified, skip this check.
2467                  */
2468         next_entry:
2469                 if (((flags & VM_MAP_WIRE_HOLESOK) == 0) &&
2470                     (entry->end < end && (entry->next == &map->header ||
2471                     entry->next->start > entry->end))) {
2472                         end = entry->end;
2473                         rv = KERN_INVALID_ADDRESS;
2474                         goto done;
2475                 }
2476                 entry = entry->next;
2477         }
2478         rv = KERN_SUCCESS;
2479 done:
2480         need_wakeup = FALSE;
2481         if (first_entry == NULL) {
2482                 result = vm_map_lookup_entry(map, start, &first_entry);
2483                 if (!result && (flags & VM_MAP_WIRE_HOLESOK))
2484                         first_entry = first_entry->next;
2485                 else
2486                         KASSERT(result, ("vm_map_wire: lookup failed"));
2487         }
2488         entry = first_entry;
2489         while (entry != &map->header && entry->start < end) {
2490                 if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0)
2491                         goto next_entry_done;
2492                 if (rv == KERN_SUCCESS) {
2493                         if (user_wire)
2494                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
2495                 } else if (entry->wired_count == -1) {
2496                         /*
2497                          * Wiring failed on this entry.  Thus, unwiring is
2498                          * unnecessary.
2499                          */
2500                         entry->wired_count = 0;
2501                 } else {
2502                         if (!user_wire ||
2503                             (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)
2504                                 entry->wired_count--;
2505                         if (entry->wired_count == 0) {
2506                                 /*
2507                                  * Retain the map lock.
2508                                  */
2509                                 vm_fault_unwire(map, entry->start, entry->end,
2510                                     entry->object.vm_object != NULL &&
2511                                     (entry->object.vm_object->type == OBJT_DEVICE ||
2512                                     entry->object.vm_object->type == OBJT_SG));
2513                         }
2514                 }
2515         next_entry_done:
2516                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
2517                         ("vm_map_wire: in-transition flag missing"));
2518                 entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION|MAP_ENTRY_WIRE_SKIPPED);
2519                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
2520                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
2521                         need_wakeup = TRUE;
2522                 }
2523                 vm_map_simplify_entry(map, entry);
2524                 entry = entry->next;
2525         }
2526         vm_map_unlock(map);
2527         if (need_wakeup)
2528                 vm_map_wakeup(map);
2529         return (rv);
2530 }
2531
2532 /*
2533  * vm_map_sync
2534  *
2535  * Push any dirty cached pages in the address range to their pager.
2536  * If syncio is TRUE, dirty pages are written synchronously.
2537  * If invalidate is TRUE, any cached pages are freed as well.
2538  *
2539  * If the size of the region from start to end is zero, we are
2540  * supposed to flush all modified pages within the region containing
2541  * start.  Unfortunately, a region can be split or coalesced with
2542  * neighboring regions, making it difficult to determine what the
2543  * original region was.  Therefore, we approximate this requirement by
2544  * flushing the current region containing start.
2545  *
2546  * Returns an error if any part of the specified range is not mapped.
2547  */
2548 int
2549 vm_map_sync(
2550         vm_map_t map,
2551         vm_offset_t start,
2552         vm_offset_t end,
2553         boolean_t syncio,
2554         boolean_t invalidate)
2555 {
2556         vm_map_entry_t current;
2557         vm_map_entry_t entry;
2558         vm_size_t size;
2559         vm_object_t object;
2560         vm_ooffset_t offset;
2561         unsigned int last_timestamp;
2562
2563         vm_map_lock_read(map);
2564         VM_MAP_RANGE_CHECK(map, start, end);
2565         if (!vm_map_lookup_entry(map, start, &entry)) {
2566                 vm_map_unlock_read(map);
2567                 return (KERN_INVALID_ADDRESS);
2568         } else if (start == end) {
2569                 start = entry->start;
2570                 end = entry->end;
2571         }
2572         /*
2573          * Make a first pass to check for user-wired memory and holes.
2574          */
2575         for (current = entry; current != &map->header && current->start < end;
2576             current = current->next) {
2577                 if (invalidate && (current->eflags & MAP_ENTRY_USER_WIRED)) {
2578                         vm_map_unlock_read(map);
2579                         return (KERN_INVALID_ARGUMENT);
2580                 }
2581                 if (end > current->end &&
2582                     (current->next == &map->header ||
2583                         current->end != current->next->start)) {
2584                         vm_map_unlock_read(map);
2585                         return (KERN_INVALID_ADDRESS);
2586                 }
2587         }
2588
2589         if (invalidate)
2590                 pmap_remove(map->pmap, start, end);
2591
2592         /*
2593          * Make a second pass, cleaning/uncaching pages from the indicated
2594          * objects as we go.
2595          */
2596         for (current = entry; current != &map->header && current->start < end;) {
2597                 offset = current->offset + (start - current->start);
2598                 size = (end <= current->end ? end : current->end) - start;
2599                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
2600                         vm_map_t smap;
2601                         vm_map_entry_t tentry;
2602                         vm_size_t tsize;
2603
2604                         smap = current->object.sub_map;
2605                         vm_map_lock_read(smap);
2606                         (void) vm_map_lookup_entry(smap, offset, &tentry);
2607                         tsize = tentry->end - offset;
2608                         if (tsize < size)
2609                                 size = tsize;
2610                         object = tentry->object.vm_object;
2611                         offset = tentry->offset + (offset - tentry->start);
2612                         vm_map_unlock_read(smap);
2613                 } else {
2614                         object = current->object.vm_object;
2615                 }
2616                 vm_object_reference(object);
2617                 last_timestamp = map->timestamp;
2618                 vm_map_unlock_read(map);
2619                 vm_object_sync(object, offset, size, syncio, invalidate);
2620                 start += size;
2621                 vm_object_deallocate(object);
2622                 vm_map_lock_read(map);
2623                 if (last_timestamp == map->timestamp ||
2624                     !vm_map_lookup_entry(map, start, &current))
2625                         current = current->next;
2626         }
2627
2628         vm_map_unlock_read(map);
2629         return (KERN_SUCCESS);
2630 }
2631
2632 /*
2633  *      vm_map_entry_unwire:    [ internal use only ]
2634  *
2635  *      Make the region specified by this entry pageable.
2636  *
2637  *      The map in question should be locked.
2638  *      [This is the reason for this routine's existence.]
2639  */
2640 static void
2641 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
2642 {
2643         vm_fault_unwire(map, entry->start, entry->end,
2644             entry->object.vm_object != NULL &&
2645             (entry->object.vm_object->type == OBJT_DEVICE ||
2646             entry->object.vm_object->type == OBJT_SG));
2647         entry->wired_count = 0;
2648 }
2649
2650 static void
2651 vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map)
2652 {
2653
2654         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0)
2655                 vm_object_deallocate(entry->object.vm_object);
2656         uma_zfree(system_map ? kmapentzone : mapentzone, entry);
2657 }
2658
2659 /*
2660  *      vm_map_entry_delete:    [ internal use only ]
2661  *
2662  *      Deallocate the given entry from the target map.
2663  */
2664 static void
2665 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
2666 {
2667         vm_object_t object;
2668         vm_pindex_t offidxstart, offidxend, count, size1;
2669         vm_ooffset_t size;
2670
2671         vm_map_entry_unlink(map, entry);
2672         object = entry->object.vm_object;
2673         size = entry->end - entry->start;
2674         map->size -= size;
2675
2676         if (entry->cred != NULL) {
2677                 swap_release_by_cred(size, entry->cred);
2678                 crfree(entry->cred);
2679         }
2680
2681         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
2682             (object != NULL)) {
2683                 KASSERT(entry->cred == NULL || object->cred == NULL ||
2684                     (entry->eflags & MAP_ENTRY_NEEDS_COPY),
2685                     ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry));
2686                 count = OFF_TO_IDX(size);
2687                 offidxstart = OFF_TO_IDX(entry->offset);
2688                 offidxend = offidxstart + count;
2689                 VM_OBJECT_LOCK(object);
2690                 if (object->ref_count != 1 &&
2691                     ((object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING ||
2692                     object == kernel_object || object == kmem_object)) {
2693                         vm_object_collapse(object);
2694
2695                         /*
2696                          * The option OBJPR_NOTMAPPED can be passed here
2697                          * because vm_map_delete() already performed
2698                          * pmap_remove() on the only mapping to this range
2699                          * of pages. 
2700                          */
2701                         vm_object_page_remove(object, offidxstart, offidxend,
2702                             OBJPR_NOTMAPPED);
2703                         if (object->type == OBJT_SWAP)
2704                                 swap_pager_freespace(object, offidxstart, count);
2705                         if (offidxend >= object->size &&
2706                             offidxstart < object->size) {
2707                                 size1 = object->size;
2708                                 object->size = offidxstart;
2709                                 if (object->cred != NULL) {
2710                                         size1 -= object->size;
2711                                         KASSERT(object->charge >= ptoa(size1),
2712                                             ("vm_map_entry_delete: object->charge < 0"));
2713                                         swap_release_by_cred(ptoa(size1), object->cred);
2714                                         object->charge -= ptoa(size1);
2715                                 }
2716                         }
2717                 }
2718                 VM_OBJECT_UNLOCK(object);
2719         } else
2720                 entry->object.vm_object = NULL;
2721         if (map->system_map)
2722                 vm_map_entry_deallocate(entry, TRUE);
2723         else {
2724                 entry->next = curthread->td_map_def_user;
2725                 curthread->td_map_def_user = entry;
2726         }
2727 }
2728
2729 /*
2730  *      vm_map_delete:  [ internal use only ]
2731  *
2732  *      Deallocates the given address range from the target
2733  *      map.
2734  */
2735 int
2736 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
2737 {
2738         vm_map_entry_t entry;
2739         vm_map_entry_t first_entry;
2740
2741         VM_MAP_ASSERT_LOCKED(map);
2742
2743         /*
2744          * Find the start of the region, and clip it
2745          */
2746         if (!vm_map_lookup_entry(map, start, &first_entry))
2747                 entry = first_entry->next;
2748         else {
2749                 entry = first_entry;
2750                 vm_map_clip_start(map, entry, start);
2751         }
2752
2753         /*
2754          * Step through all entries in this region
2755          */
2756         while ((entry != &map->header) && (entry->start < end)) {
2757                 vm_map_entry_t next;
2758
2759                 /*
2760                  * Wait for wiring or unwiring of an entry to complete.
2761                  * Also wait for any system wirings to disappear on
2762                  * user maps.
2763                  */
2764                 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 ||
2765                     (vm_map_pmap(map) != kernel_pmap &&
2766                     vm_map_entry_system_wired_count(entry) != 0)) {
2767                         unsigned int last_timestamp;
2768                         vm_offset_t saved_start;
2769                         vm_map_entry_t tmp_entry;
2770
2771                         saved_start = entry->start;
2772                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2773                         last_timestamp = map->timestamp;
2774                         (void) vm_map_unlock_and_wait(map, 0);
2775                         vm_map_lock(map);
2776                         if (last_timestamp + 1 != map->timestamp) {
2777                                 /*
2778                                  * Look again for the entry because the map was
2779                                  * modified while it was unlocked.
2780                                  * Specifically, the entry may have been
2781                                  * clipped, merged, or deleted.
2782                                  */
2783                                 if (!vm_map_lookup_entry(map, saved_start,
2784                                                          &tmp_entry))
2785                                         entry = tmp_entry->next;
2786                                 else {
2787                                         entry = tmp_entry;
2788                                         vm_map_clip_start(map, entry,
2789                                                           saved_start);
2790                                 }
2791                         }
2792                         continue;
2793                 }
2794                 vm_map_clip_end(map, entry, end);
2795
2796                 next = entry->next;
2797
2798                 /*
2799                  * Unwire before removing addresses from the pmap; otherwise,
2800                  * unwiring will put the entries back in the pmap.
2801                  */
2802                 if (entry->wired_count != 0) {
2803                         vm_map_entry_unwire(map, entry);
2804                 }
2805
2806                 pmap_remove(map->pmap, entry->start, entry->end);
2807
2808                 /*
2809                  * Delete the entry only after removing all pmap
2810                  * entries pointing to its pages.  (Otherwise, its
2811                  * page frames may be reallocated, and any modify bits
2812                  * will be set in the wrong object!)
2813                  */
2814                 vm_map_entry_delete(map, entry);
2815                 entry = next;
2816         }
2817         return (KERN_SUCCESS);
2818 }
2819
2820 /*
2821  *      vm_map_remove:
2822  *
2823  *      Remove the given address range from the target map.
2824  *      This is the exported form of vm_map_delete.
2825  */
2826 int
2827 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2828 {
2829         int result;
2830
2831         vm_map_lock(map);
2832         VM_MAP_RANGE_CHECK(map, start, end);
2833         result = vm_map_delete(map, start, end);
2834         vm_map_unlock(map);
2835         return (result);
2836 }
2837
2838 /*
2839  *      vm_map_check_protection:
2840  *
2841  *      Assert that the target map allows the specified privilege on the
2842  *      entire address region given.  The entire region must be allocated.
2843  *
2844  *      WARNING!  This code does not and should not check whether the
2845  *      contents of the region is accessible.  For example a smaller file
2846  *      might be mapped into a larger address space.
2847  *
2848  *      NOTE!  This code is also called by munmap().
2849  *
2850  *      The map must be locked.  A read lock is sufficient.
2851  */
2852 boolean_t
2853 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2854                         vm_prot_t protection)
2855 {
2856         vm_map_entry_t entry;
2857         vm_map_entry_t tmp_entry;
2858
2859         if (!vm_map_lookup_entry(map, start, &tmp_entry))
2860                 return (FALSE);
2861         entry = tmp_entry;
2862
2863         while (start < end) {
2864                 if (entry == &map->header)
2865                         return (FALSE);
2866                 /*
2867                  * No holes allowed!
2868                  */
2869                 if (start < entry->start)
2870                         return (FALSE);
2871                 /*
2872                  * Check protection associated with entry.
2873                  */
2874                 if ((entry->protection & protection) != protection)
2875                         return (FALSE);
2876                 /* go to next entry */
2877                 start = entry->end;
2878                 entry = entry->next;
2879         }
2880         return (TRUE);
2881 }
2882
2883 /*
2884  *      vm_map_copy_entry:
2885  *
2886  *      Copies the contents of the source entry to the destination
2887  *      entry.  The entries *must* be aligned properly.
2888  */
2889 static void
2890 vm_map_copy_entry(
2891         vm_map_t src_map,
2892         vm_map_t dst_map,
2893         vm_map_entry_t src_entry,
2894         vm_map_entry_t dst_entry,
2895         vm_ooffset_t *fork_charge)
2896 {
2897         vm_object_t src_object;
2898         vm_offset_t size;
2899         struct ucred *cred;
2900         int charged;
2901
2902         VM_MAP_ASSERT_LOCKED(dst_map);
2903
2904         if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2905                 return;
2906
2907         if (src_entry->wired_count == 0) {
2908
2909                 /*
2910                  * If the source entry is marked needs_copy, it is already
2911                  * write-protected.
2912                  */
2913                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2914                         pmap_protect(src_map->pmap,
2915                             src_entry->start,
2916                             src_entry->end,
2917                             src_entry->protection & ~VM_PROT_WRITE);
2918                 }
2919
2920                 /*
2921                  * Make a copy of the object.
2922                  */
2923                 size = src_entry->end - src_entry->start;
2924                 if ((src_object = src_entry->object.vm_object) != NULL) {
2925                         VM_OBJECT_LOCK(src_object);
2926                         charged = ENTRY_CHARGED(src_entry);
2927                         if ((src_object->handle == NULL) &&
2928                                 (src_object->type == OBJT_DEFAULT ||
2929                                  src_object->type == OBJT_SWAP)) {
2930                                 vm_object_collapse(src_object);
2931                                 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2932                                         vm_object_split(src_entry);
2933                                         src_object = src_entry->object.vm_object;
2934                                 }
2935                         }
2936                         vm_object_reference_locked(src_object);
2937                         vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2938                         if (src_entry->cred != NULL &&
2939                             !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
2940                                 KASSERT(src_object->cred == NULL,
2941                                     ("OVERCOMMIT: vm_map_copy_entry: cred %p",
2942                                      src_object));
2943                                 src_object->cred = src_entry->cred;
2944                                 src_object->charge = size;
2945                         }
2946                         VM_OBJECT_UNLOCK(src_object);
2947                         dst_entry->object.vm_object = src_object;
2948                         if (charged) {
2949                                 cred = curthread->td_ucred;
2950                                 crhold(cred);
2951                                 dst_entry->cred = cred;
2952                                 *fork_charge += size;
2953                                 if (!(src_entry->eflags &
2954                                       MAP_ENTRY_NEEDS_COPY)) {
2955                                         crhold(cred);
2956                                         src_entry->cred = cred;
2957                                         *fork_charge += size;
2958                                 }
2959                         }
2960                         src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2961                         dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2962                         dst_entry->offset = src_entry->offset;
2963                 } else {
2964                         dst_entry->object.vm_object = NULL;
2965                         dst_entry->offset = 0;
2966                         if (src_entry->cred != NULL) {
2967                                 dst_entry->cred = curthread->td_ucred;
2968                                 crhold(dst_entry->cred);
2969                                 *fork_charge += size;
2970                         }
2971                 }
2972
2973                 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2974                     dst_entry->end - dst_entry->start, src_entry->start);
2975         } else {
2976                 /*
2977                  * Of course, wired down pages can't be set copy-on-write.
2978                  * Cause wired pages to be copied into the new map by
2979                  * simulating faults (the new pages are pageable)
2980                  */
2981                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry,
2982                     fork_charge);
2983         }
2984 }
2985
2986 /*
2987  * vmspace_map_entry_forked:
2988  * Update the newly-forked vmspace each time a map entry is inherited
2989  * or copied.  The values for vm_dsize and vm_tsize are approximate
2990  * (and mostly-obsolete ideas in the face of mmap(2) et al.)
2991  */
2992 static void
2993 vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2,
2994     vm_map_entry_t entry)
2995 {
2996         vm_size_t entrysize;
2997         vm_offset_t newend;
2998
2999         entrysize = entry->end - entry->start;
3000         vm2->vm_map.size += entrysize;
3001         if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
3002                 vm2->vm_ssize += btoc(entrysize);
3003         } else if (entry->start >= (vm_offset_t)vm1->vm_daddr &&
3004             entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) {
3005                 newend = MIN(entry->end,
3006                     (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize));
3007                 vm2->vm_dsize += btoc(newend - entry->start);
3008         } else if (entry->start >= (vm_offset_t)vm1->vm_taddr &&
3009             entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) {
3010                 newend = MIN(entry->end,
3011                     (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize));
3012                 vm2->vm_tsize += btoc(newend - entry->start);
3013         }
3014 }
3015
3016 /*
3017  * vmspace_fork:
3018  * Create a new process vmspace structure and vm_map
3019  * based on those of an existing process.  The new map
3020  * is based on the old map, according to the inheritance
3021  * values on the regions in that map.
3022  *
3023  * XXX It might be worth coalescing the entries added to the new vmspace.
3024  *
3025  * The source map must not be locked.
3026  */
3027 struct vmspace *
3028 vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
3029 {
3030         struct vmspace *vm2;
3031         vm_map_t old_map = &vm1->vm_map;
3032         vm_map_t new_map;
3033         vm_map_entry_t old_entry;
3034         vm_map_entry_t new_entry;
3035         vm_object_t object;
3036         int locked;
3037
3038         vm_map_lock(old_map);
3039         if (old_map->busy)
3040                 vm_map_wait_busy(old_map);
3041         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
3042         if (vm2 == NULL)
3043                 goto unlock_and_return;
3044         vm2->vm_taddr = vm1->vm_taddr;
3045         vm2->vm_daddr = vm1->vm_daddr;
3046         vm2->vm_maxsaddr = vm1->vm_maxsaddr;
3047         new_map = &vm2->vm_map; /* XXX */
3048         locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
3049         KASSERT(locked, ("vmspace_fork: lock failed"));
3050         new_map->timestamp = 1;
3051
3052         old_entry = old_map->header.next;
3053
3054         while (old_entry != &old_map->header) {
3055                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
3056                         panic("vm_map_fork: encountered a submap");
3057
3058                 switch (old_entry->inheritance) {
3059                 case VM_INHERIT_NONE:
3060                         break;
3061
3062                 case VM_INHERIT_SHARE:
3063                         /*
3064                          * Clone the entry, creating the shared object if necessary.
3065                          */
3066                         object = old_entry->object.vm_object;
3067                         if (object == NULL) {
3068                                 object = vm_object_allocate(OBJT_DEFAULT,
3069                                         atop(old_entry->end - old_entry->start));
3070                                 old_entry->object.vm_object = object;
3071                                 old_entry->offset = 0;
3072                                 if (old_entry->cred != NULL) {
3073                                         object->cred = old_entry->cred;
3074                                         object->charge = old_entry->end -
3075                                             old_entry->start;
3076                                         old_entry->cred = NULL;
3077                                 }
3078                         }
3079
3080                         /*
3081                          * Add the reference before calling vm_object_shadow
3082                          * to insure that a shadow object is created.
3083                          */
3084                         vm_object_reference(object);
3085                         if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
3086                                 vm_object_shadow(&old_entry->object.vm_object,
3087                                     &old_entry->offset,
3088                                     old_entry->end - old_entry->start);
3089                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
3090                                 /* Transfer the second reference too. */
3091                                 vm_object_reference(
3092                                     old_entry->object.vm_object);
3093
3094                                 /*
3095                                  * As in vm_map_simplify_entry(), the
3096                                  * vnode lock will not be acquired in
3097                                  * this call to vm_object_deallocate().
3098                                  */
3099                                 vm_object_deallocate(object);
3100                                 object = old_entry->object.vm_object;
3101                         }
3102                         VM_OBJECT_LOCK(object);
3103                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
3104                         if (old_entry->cred != NULL) {
3105                                 KASSERT(object->cred == NULL, ("vmspace_fork both cred"));
3106                                 object->cred = old_entry->cred;
3107                                 object->charge = old_entry->end - old_entry->start;
3108                                 old_entry->cred = NULL;
3109                         }
3110                         VM_OBJECT_UNLOCK(object);
3111
3112                         /*
3113                          * Clone the entry, referencing the shared object.
3114                          */
3115                         new_entry = vm_map_entry_create(new_map);
3116                         *new_entry = *old_entry;
3117                         new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
3118                             MAP_ENTRY_IN_TRANSITION);
3119                         new_entry->wired_count = 0;
3120
3121                         /*
3122                          * Insert the entry into the new map -- we know we're
3123                          * inserting at the end of the new map.
3124                          */
3125                         vm_map_entry_link(new_map, new_map->header.prev,
3126                             new_entry);
3127                         vmspace_map_entry_forked(vm1, vm2, new_entry);
3128
3129                         /*
3130                          * Update the physical map
3131                          */
3132                         pmap_copy(new_map->pmap, old_map->pmap,
3133                             new_entry->start,
3134                             (old_entry->end - old_entry->start),
3135                             old_entry->start);
3136                         break;
3137
3138                 case VM_INHERIT_COPY:
3139                         /*
3140                          * Clone the entry and link into the map.
3141                          */
3142                         new_entry = vm_map_entry_create(new_map);
3143                         *new_entry = *old_entry;
3144                         new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
3145                             MAP_ENTRY_IN_TRANSITION);
3146                         new_entry->wired_count = 0;
3147                         new_entry->object.vm_object = NULL;
3148                         new_entry->cred = NULL;
3149                         vm_map_entry_link(new_map, new_map->header.prev,
3150                             new_entry);
3151                         vmspace_map_entry_forked(vm1, vm2, new_entry);
3152                         vm_map_copy_entry(old_map, new_map, old_entry,
3153                             new_entry, fork_charge);
3154                         break;
3155                 }
3156                 old_entry = old_entry->next;
3157         }
3158 unlock_and_return:
3159         vm_map_unlock(old_map);
3160         if (vm2 != NULL)
3161                 vm_map_unlock(new_map);
3162
3163         return (vm2);
3164 }
3165
3166 int
3167 vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
3168     vm_prot_t prot, vm_prot_t max, int cow)
3169 {
3170         vm_map_entry_t new_entry, prev_entry;
3171         vm_offset_t bot, top;
3172         vm_size_t init_ssize;
3173         int orient, rv;
3174         rlim_t vmemlim;
3175
3176         /*
3177          * The stack orientation is piggybacked with the cow argument.
3178          * Extract it into orient and mask the cow argument so that we
3179          * don't pass it around further.
3180          * NOTE: We explicitly allow bi-directional stacks.
3181          */
3182         orient = cow & (MAP_STACK_GROWS_DOWN|MAP_STACK_GROWS_UP);
3183         cow &= ~orient;
3184         KASSERT(orient != 0, ("No stack grow direction"));
3185
3186         if (addrbos < vm_map_min(map) ||
3187             addrbos > vm_map_max(map) ||
3188             addrbos + max_ssize < addrbos)
3189                 return (KERN_NO_SPACE);
3190
3191         init_ssize = (max_ssize < sgrowsiz) ? max_ssize : sgrowsiz;
3192
3193         PROC_LOCK(curthread->td_proc);
3194         vmemlim = lim_cur(curthread->td_proc, RLIMIT_VMEM);
3195         PROC_UNLOCK(curthread->td_proc);
3196
3197         vm_map_lock(map);
3198
3199         /* If addr is already mapped, no go */
3200         if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
3201                 vm_map_unlock(map);
3202                 return (KERN_NO_SPACE);
3203         }
3204
3205         /* If we would blow our VMEM resource limit, no go */
3206         if (map->size + init_ssize > vmemlim) {
3207                 vm_map_unlock(map);
3208                 return (KERN_NO_SPACE);
3209         }
3210
3211         /*
3212          * If we can't accomodate max_ssize in the current mapping, no go.
3213          * However, we need to be aware that subsequent user mappings might
3214          * map into the space we have reserved for stack, and currently this
3215          * space is not protected.
3216          *
3217          * Hopefully we will at least detect this condition when we try to
3218          * grow the stack.
3219          */
3220         if ((prev_entry->next != &map->header) &&
3221             (prev_entry->next->start < addrbos + max_ssize)) {
3222                 vm_map_unlock(map);
3223                 return (KERN_NO_SPACE);
3224         }
3225
3226         /*
3227          * We initially map a stack of only init_ssize.  We will grow as
3228          * needed later.  Depending on the orientation of the stack (i.e.
3229          * the grow direction) we either map at the top of the range, the
3230          * bottom of the range or in the middle.
3231          *
3232          * Note: we would normally expect prot and max to be VM_PROT_ALL,
3233          * and cow to be 0.  Possibly we should eliminate these as input
3234          * parameters, and just pass these values here in the insert call.
3235          */
3236         if (orient == MAP_STACK_GROWS_DOWN)
3237                 bot = addrbos + max_ssize - init_ssize;
3238         else if (orient == MAP_STACK_GROWS_UP)
3239                 bot = addrbos;
3240         else
3241                 bot = round_page(addrbos + max_ssize/2 - init_ssize/2);
3242         top = bot + init_ssize;
3243         rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
3244
3245         /* Now set the avail_ssize amount. */
3246         if (rv == KERN_SUCCESS) {
3247                 if (prev_entry != &map->header)
3248                         vm_map_clip_end(map, prev_entry, bot);
3249                 new_entry = prev_entry->next;
3250                 if (new_entry->end != top || new_entry->start != bot)
3251                         panic("Bad entry start/end for new stack entry");
3252
3253                 new_entry->avail_ssize = max_ssize - init_ssize;
3254                 if (orient & MAP_STACK_GROWS_DOWN)
3255                         new_entry->eflags |= MAP_ENTRY_GROWS_DOWN;
3256                 if (orient & MAP_STACK_GROWS_UP)
3257                         new_entry->eflags |= MAP_ENTRY_GROWS_UP;
3258         }
3259
3260         vm_map_unlock(map);
3261         return (rv);
3262 }
3263
3264 static int stack_guard_page = 0;
3265 TUNABLE_INT("security.bsd.stack_guard_page", &stack_guard_page);
3266 SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RW,
3267     &stack_guard_page, 0,
3268     "Insert stack guard page ahead of the growable segments.");
3269
3270 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
3271  * desired address is already mapped, or if we successfully grow
3272  * the stack.  Also returns KERN_SUCCESS if addr is outside the
3273  * stack range (this is strange, but preserves compatibility with
3274  * the grow function in vm_machdep.c).
3275  */
3276 int
3277 vm_map_growstack(struct proc *p, vm_offset_t addr)
3278 {
3279         vm_map_entry_t next_entry, prev_entry;
3280         vm_map_entry_t new_entry, stack_entry;
3281         struct vmspace *vm = p->p_vmspace;
3282         vm_map_t map = &vm->vm_map;
3283         vm_offset_t end;
3284         size_t grow_amount, max_grow;
3285         rlim_t stacklim, vmemlim;
3286         int is_procstack, rv;
3287         struct ucred *cred;
3288 #ifdef notyet
3289         uint64_t limit;
3290 #endif
3291 #ifdef RACCT
3292         int error;
3293 #endif
3294
3295 Retry:
3296         PROC_LOCK(p);
3297         stacklim = lim_cur(p, RLIMIT_STACK);
3298         vmemlim = lim_cur(p, RLIMIT_VMEM);
3299         PROC_UNLOCK(p);
3300
3301         vm_map_lock_read(map);
3302
3303         /* If addr is already in the entry range, no need to grow.*/
3304         if (vm_map_lookup_entry(map, addr, &prev_entry)) {
3305                 vm_map_unlock_read(map);
3306                 return (KERN_SUCCESS);
3307         }
3308
3309         next_entry = prev_entry->next;
3310         if (!(prev_entry->eflags & MAP_ENTRY_GROWS_UP)) {
3311                 /*
3312                  * This entry does not grow upwards. Since the address lies
3313                  * beyond this entry, the next entry (if one exists) has to
3314                  * be a downward growable entry. The entry list header is
3315                  * never a growable entry, so it suffices to check the flags.
3316                  */
3317                 if (!(next_entry->eflags & MAP_ENTRY_GROWS_DOWN)) {
3318                         vm_map_unlock_read(map);
3319                         return (KERN_SUCCESS);
3320                 }
3321                 stack_entry = next_entry;
3322         } else {
3323                 /*
3324                  * This entry grows upward. If the next entry does not at
3325                  * least grow downwards, this is the entry we need to grow.
3326                  * otherwise we have two possible choices and we have to
3327                  * select one.
3328                  */
3329                 if (next_entry->eflags & MAP_ENTRY_GROWS_DOWN) {
3330                         /*
3331                          * We have two choices; grow the entry closest to
3332                          * the address to minimize the amount of growth.
3333                          */
3334                         if (addr - prev_entry->end <= next_entry->start - addr)
3335                                 stack_entry = prev_entry;
3336                         else
3337                                 stack_entry = next_entry;
3338                 } else
3339                         stack_entry = prev_entry;
3340         }
3341
3342         if (stack_entry == next_entry) {
3343                 KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_DOWN, ("foo"));
3344                 KASSERT(addr < stack_entry->start, ("foo"));
3345                 end = (prev_entry != &map->header) ? prev_entry->end :
3346                     stack_entry->start - stack_entry->avail_ssize;
3347                 grow_amount = roundup(stack_entry->start - addr, PAGE_SIZE);
3348                 max_grow = stack_entry->start - end;
3349         } else {
3350                 KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_UP, ("foo"));
3351                 KASSERT(addr >= stack_entry->end, ("foo"));
3352                 end = (next_entry != &map->header) ? next_entry->start :
3353                     stack_entry->end + stack_entry->avail_ssize;
3354                 grow_amount = roundup(addr + 1 - stack_entry->end, PAGE_SIZE);
3355                 max_grow = end - stack_entry->end;
3356         }
3357
3358         if (grow_amount > stack_entry->avail_ssize) {
3359                 vm_map_unlock_read(map);
3360                 return (KERN_NO_SPACE);
3361         }
3362
3363         /*
3364          * If there is no longer enough space between the entries nogo, and
3365          * adjust the available space.  Note: this  should only happen if the
3366          * user has mapped into the stack area after the stack was created,
3367          * and is probably an error.
3368          *
3369          * This also effectively destroys any guard page the user might have
3370          * intended by limiting the stack size.
3371          */
3372         if (grow_amount + (stack_guard_page ? PAGE_SIZE : 0) > max_grow) {
3373                 if (vm_map_lock_upgrade(map))
3374                         goto Retry;
3375
3376                 stack_entry->avail_ssize = max_grow;
3377
3378                 vm_map_unlock(map);
3379                 return (KERN_NO_SPACE);
3380         }
3381
3382         is_procstack = (addr >= (vm_offset_t)vm->vm_maxsaddr) ? 1 : 0;
3383
3384         /*
3385          * If this is the main process stack, see if we're over the stack
3386          * limit.
3387          */
3388         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
3389                 vm_map_unlock_read(map);
3390                 return (KERN_NO_SPACE);
3391         }
3392 #ifdef RACCT
3393         PROC_LOCK(p);
3394         if (is_procstack &&
3395             racct_set(p, RACCT_STACK, ctob(vm->vm_ssize) + grow_amount)) {
3396                 PROC_UNLOCK(p);
3397                 vm_map_unlock_read(map);
3398                 return (KERN_NO_SPACE);
3399         }
3400         PROC_UNLOCK(p);
3401 #endif
3402
3403         /* Round up the grow amount modulo SGROWSIZ */
3404         grow_amount = roundup (grow_amount, sgrowsiz);
3405         if (grow_amount > stack_entry->avail_ssize)
3406                 grow_amount = stack_entry->avail_ssize;
3407         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
3408                 grow_amount = trunc_page((vm_size_t)stacklim) -
3409                     ctob(vm->vm_ssize);
3410         }
3411 #ifdef notyet
3412         PROC_LOCK(p);
3413         limit = racct_get_available(p, RACCT_STACK);
3414         PROC_UNLOCK(p);
3415         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
3416                 grow_amount = limit - ctob(vm->vm_ssize);
3417 #endif
3418
3419         /* If we would blow our VMEM resource limit, no go */
3420         if (map->size + grow_amount > vmemlim) {
3421                 vm_map_unlock_read(map);
3422                 rv = KERN_NO_SPACE;
3423                 goto out;
3424         }
3425 #ifdef RACCT
3426         PROC_LOCK(p);
3427         if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
3428                 PROC_UNLOCK(p);
3429                 vm_map_unlock_read(map);
3430                 rv = KERN_NO_SPACE;
3431                 goto out;
3432         }
3433         PROC_UNLOCK(p);
3434 #endif
3435
3436         if (vm_map_lock_upgrade(map))
3437                 goto Retry;
3438
3439         if (stack_entry == next_entry) {
3440                 /*
3441                  * Growing downward.
3442                  */
3443                 /* Get the preliminary new entry start value */
3444                 addr = stack_entry->start - grow_amount;
3445
3446                 /*
3447                  * If this puts us into the previous entry, cut back our
3448                  * growth to the available space. Also, see the note above.
3449                  */
3450                 if (addr < end) {
3451                         stack_entry->avail_ssize = max_grow;
3452                         addr = end;
3453                         if (stack_guard_page)
3454                                 addr += PAGE_SIZE;
3455                 }
3456
3457                 rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
3458                     p->p_sysent->sv_stackprot, VM_PROT_ALL, 0);
3459
3460                 /* Adjust the available stack space by the amount we grew. */
3461                 if (rv == KERN_SUCCESS) {
3462                         if (prev_entry != &map->header)
3463                                 vm_map_clip_end(map, prev_entry, addr);
3464                         new_entry = prev_entry->next;
3465                         KASSERT(new_entry == stack_entry->prev, ("foo"));
3466                         KASSERT(new_entry->end == stack_entry->start, ("foo"));
3467                         KASSERT(new_entry->start == addr, ("foo"));
3468                         grow_amount = new_entry->end - new_entry->start;
3469                         new_entry->avail_ssize = stack_entry->avail_ssize -
3470                             grow_amount;
3471                         stack_entry->eflags &= ~MAP_ENTRY_GROWS_DOWN;
3472                         new_entry->eflags |= MAP_ENTRY_GROWS_DOWN;
3473                 }
3474         } else {
3475                 /*
3476                  * Growing upward.
3477                  */
3478                 addr = stack_entry->end + grow_amount;
3479
3480                 /*
3481                  * If this puts us into the next entry, cut back our growth
3482                  * to the available space. Also, see the note above.
3483                  */
3484                 if (addr > end) {
3485                         stack_entry->avail_ssize = end - stack_entry->end;
3486                         addr = end;
3487                         if (stack_guard_page)
3488                                 addr -= PAGE_SIZE;
3489                 }
3490
3491                 grow_amount = addr - stack_entry->end;
3492                 cred = stack_entry->cred;
3493                 if (cred == NULL && stack_entry->object.vm_object != NULL)
3494                         cred = stack_entry->object.vm_object->cred;
3495                 if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred))
3496                         rv = KERN_NO_SPACE;
3497                 /* Grow the underlying object if applicable. */
3498                 else if (stack_entry->object.vm_object == NULL ||
3499                          vm_object_coalesce(stack_entry->object.vm_object,
3500                          stack_entry->offset,
3501                          (vm_size_t)(stack_entry->end - stack_entry->start),
3502                          (vm_size_t)grow_amount, cred != NULL)) {
3503                         map->size += (addr - stack_entry->end);
3504                         /* Update the current entry. */
3505                         stack_entry->end = addr;
3506                         stack_entry->avail_ssize -= grow_amount;
3507                         vm_map_entry_resize_free(map, stack_entry);
3508                         rv = KERN_SUCCESS;
3509
3510                         if (next_entry != &map->header)
3511                                 vm_map_clip_start(map, next_entry, addr);
3512                 } else
3513                         rv = KERN_FAILURE;
3514         }
3515
3516         if (rv == KERN_SUCCESS && is_procstack)
3517                 vm->vm_ssize += btoc(grow_amount);
3518
3519         vm_map_unlock(map);
3520
3521         /*
3522          * Heed the MAP_WIREFUTURE flag if it was set for this process.
3523          */
3524         if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE)) {
3525                 vm_map_wire(map,
3526                     (stack_entry == next_entry) ? addr : addr - grow_amount,
3527                     (stack_entry == next_entry) ? stack_entry->start : addr,
3528                     (p->p_flag & P_SYSTEM)
3529                     ? VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES
3530                     : VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES);
3531         }
3532
3533 out:
3534 #ifdef RACCT
3535         if (rv != KERN_SUCCESS) {
3536                 PROC_LOCK(p);
3537                 error = racct_set(p, RACCT_VMEM, map->size);
3538                 KASSERT(error == 0, ("decreasing RACCT_VMEM failed"));
3539                 error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize));
3540                 KASSERT(error == 0, ("decreasing RACCT_STACK failed"));
3541                 PROC_UNLOCK(p);
3542         }
3543 #endif
3544
3545         return (rv);
3546 }
3547
3548 /*
3549  * Unshare the specified VM space for exec.  If other processes are
3550  * mapped to it, then create a new one.  The new vmspace is null.
3551  */
3552 int
3553 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
3554 {
3555         struct vmspace *oldvmspace = p->p_vmspace;
3556         struct vmspace *newvmspace;
3557
3558         newvmspace = vmspace_alloc(minuser, maxuser);
3559         if (newvmspace == NULL)
3560                 return (ENOMEM);
3561         newvmspace->vm_swrss = oldvmspace->vm_swrss;
3562         /*
3563          * This code is written like this for prototype purposes.  The
3564          * goal is to avoid running down the vmspace here, but let the
3565          * other process's that are still using the vmspace to finally
3566          * run it down.  Even though there is little or no chance of blocking
3567          * here, it is a good idea to keep this form for future mods.
3568          */
3569         PROC_VMSPACE_LOCK(p);
3570         p->p_vmspace = newvmspace;
3571         PROC_VMSPACE_UNLOCK(p);
3572         if (p == curthread->td_proc)
3573                 pmap_activate(curthread);
3574         vmspace_free(oldvmspace);
3575         return (0);
3576 }
3577
3578 /*
3579  * Unshare the specified VM space for forcing COW.  This
3580  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
3581  */
3582 int
3583 vmspace_unshare(struct proc *p)
3584 {
3585         struct vmspace *oldvmspace = p->p_vmspace;
3586         struct vmspace *newvmspace;
3587         vm_ooffset_t fork_charge;
3588
3589         if (oldvmspace->vm_refcnt == 1)
3590                 return (0);
3591         fork_charge = 0;
3592         newvmspace = vmspace_fork(oldvmspace, &fork_charge);
3593         if (newvmspace == NULL)
3594                 return (ENOMEM);
3595         if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) {
3596                 vmspace_free(newvmspace);
3597                 return (ENOMEM);
3598         }
3599         PROC_VMSPACE_LOCK(p);
3600         p->p_vmspace = newvmspace;
3601         PROC_VMSPACE_UNLOCK(p);
3602         if (p == curthread->td_proc)
3603                 pmap_activate(curthread);
3604         vmspace_free(oldvmspace);
3605         return (0);
3606 }
3607
3608 /*
3609  *      vm_map_lookup:
3610  *
3611  *      Finds the VM object, offset, and
3612  *      protection for a given virtual address in the
3613  *      specified map, assuming a page fault of the
3614  *      type specified.
3615  *
3616  *      Leaves the map in question locked for read; return
3617  *      values are guaranteed until a vm_map_lookup_done
3618  *      call is performed.  Note that the map argument
3619  *      is in/out; the returned map must be used in
3620  *      the call to vm_map_lookup_done.
3621  *
3622  *      A handle (out_entry) is returned for use in
3623  *      vm_map_lookup_done, to make that fast.
3624  *
3625  *      If a lookup is requested with "write protection"
3626  *      specified, the map may be changed to perform virtual
3627  *      copying operations, although the data referenced will
3628  *      remain the same.
3629  */
3630 int
3631 vm_map_lookup(vm_map_t *var_map,                /* IN/OUT */
3632               vm_offset_t vaddr,
3633               vm_prot_t fault_typea,
3634               vm_map_entry_t *out_entry,        /* OUT */
3635               vm_object_t *object,              /* OUT */
3636               vm_pindex_t *pindex,              /* OUT */
3637               vm_prot_t *out_prot,              /* OUT */
3638               boolean_t *wired)                 /* OUT */
3639 {
3640         vm_map_entry_t entry;
3641         vm_map_t map = *var_map;
3642         vm_prot_t prot;
3643         vm_prot_t fault_type = fault_typea;
3644         vm_object_t eobject;
3645         vm_size_t size;
3646         struct ucred *cred;
3647
3648 RetryLookup:;
3649
3650         vm_map_lock_read(map);
3651
3652         /*
3653          * Lookup the faulting address.
3654          */
3655         if (!vm_map_lookup_entry(map, vaddr, out_entry)) {
3656                 vm_map_unlock_read(map);
3657                 return (KERN_INVALID_ADDRESS);
3658         }
3659
3660         entry = *out_entry;
3661
3662         /*
3663          * Handle submaps.
3664          */
3665         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3666                 vm_map_t old_map = map;
3667
3668                 *var_map = map = entry->object.sub_map;
3669                 vm_map_unlock_read(old_map);
3670                 goto RetryLookup;
3671         }
3672
3673         /*
3674          * Check whether this task is allowed to have this page.
3675          */
3676         prot = entry->protection;
3677         fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
3678         if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
3679                 vm_map_unlock_read(map);
3680                 return (KERN_PROTECTION_FAILURE);
3681         }
3682         if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
3683             (entry->eflags & MAP_ENTRY_COW) &&
3684             (fault_type & VM_PROT_WRITE)) {
3685                 vm_map_unlock_read(map);
3686                 return (KERN_PROTECTION_FAILURE);
3687         }
3688
3689         /*
3690          * If this page is not pageable, we have to get it for all possible
3691          * accesses.
3692          */
3693         *wired = (entry->wired_count != 0);
3694         if (*wired)
3695                 fault_type = entry->protection;
3696         size = entry->end - entry->start;
3697         /*
3698          * If the entry was copy-on-write, we either ...
3699          */
3700         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
3701                 /*
3702                  * If we want to write the page, we may as well handle that
3703                  * now since we've got the map locked.
3704                  *
3705                  * If we don't need to write the page, we just demote the
3706                  * permissions allowed.
3707                  */
3708                 if ((fault_type & VM_PROT_WRITE) != 0 ||
3709                     (fault_typea & VM_PROT_COPY) != 0) {
3710                         /*
3711                          * Make a new object, and place it in the object
3712                          * chain.  Note that no new references have appeared
3713                          * -- one just moved from the map to the new
3714                          * object.
3715                          */
3716                         if (vm_map_lock_upgrade(map))
3717                                 goto RetryLookup;
3718
3719                         if (entry->cred == NULL) {
3720                                 /*
3721                                  * The debugger owner is charged for
3722                                  * the memory.
3723                                  */
3724                                 cred = curthread->td_ucred;
3725                                 crhold(cred);
3726                                 if (!swap_reserve_by_cred(size, cred)) {
3727                                         crfree(cred);
3728                                         vm_map_unlock(map);
3729                                         return (KERN_RESOURCE_SHORTAGE);
3730                                 }
3731                                 entry->cred = cred;
3732                         }
3733                         vm_object_shadow(&entry->object.vm_object,
3734                             &entry->offset, size);
3735                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
3736                         eobject = entry->object.vm_object;
3737                         if (eobject->cred != NULL) {
3738                                 /*
3739                                  * The object was not shadowed.
3740                                  */
3741                                 swap_release_by_cred(size, entry->cred);
3742                                 crfree(entry->cred);
3743                                 entry->cred = NULL;
3744                         } else if (entry->cred != NULL) {
3745                                 VM_OBJECT_LOCK(eobject);
3746                                 eobject->cred = entry->cred;
3747                                 eobject->charge = size;
3748                                 VM_OBJECT_UNLOCK(eobject);
3749                                 entry->cred = NULL;
3750                         }
3751
3752                         vm_map_lock_downgrade(map);
3753                 } else {
3754                         /*
3755                          * We're attempting to read a copy-on-write page --
3756                          * don't allow writes.
3757                          */
3758                         prot &= ~VM_PROT_WRITE;
3759                 }
3760         }
3761
3762         /*
3763          * Create an object if necessary.
3764          */
3765         if (entry->object.vm_object == NULL &&
3766             !map->system_map) {
3767                 if (vm_map_lock_upgrade(map))
3768                         goto RetryLookup;
3769                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
3770                     atop(size));
3771                 entry->offset = 0;
3772                 if (entry->cred != NULL) {
3773                         VM_OBJECT_LOCK(entry->object.vm_object);
3774                         entry->object.vm_object->cred = entry->cred;
3775                         entry->object.vm_object->charge = size;
3776                         VM_OBJECT_UNLOCK(entry->object.vm_object);
3777                         entry->cred = NULL;
3778                 }
3779                 vm_map_lock_downgrade(map);
3780         }
3781
3782         /*
3783          * Return the object/offset from this entry.  If the entry was
3784          * copy-on-write or empty, it has been fixed up.
3785          */
3786         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
3787         *object = entry->object.vm_object;
3788
3789         *out_prot = prot;
3790         return (KERN_SUCCESS);
3791 }
3792
3793 /*
3794  *      vm_map_lookup_locked:
3795  *
3796  *      Lookup the faulting address.  A version of vm_map_lookup that returns 
3797  *      KERN_FAILURE instead of blocking on map lock or memory allocation.
3798  */
3799 int
3800 vm_map_lookup_locked(vm_map_t *var_map,         /* IN/OUT */
3801                      vm_offset_t vaddr,
3802                      vm_prot_t fault_typea,
3803                      vm_map_entry_t *out_entry, /* OUT */
3804                      vm_object_t *object,       /* OUT */
3805                      vm_pindex_t *pindex,       /* OUT */
3806                      vm_prot_t *out_prot,       /* OUT */
3807                      boolean_t *wired)          /* OUT */
3808 {
3809         vm_map_entry_t entry;
3810         vm_map_t map = *var_map;
3811         vm_prot_t prot;
3812         vm_prot_t fault_type = fault_typea;
3813
3814         /*
3815          * Lookup the faulting address.
3816          */
3817         if (!vm_map_lookup_entry(map, vaddr, out_entry))
3818                 return (KERN_INVALID_ADDRESS);
3819
3820         entry = *out_entry;
3821
3822         /*
3823          * Fail if the entry refers to a submap.
3824          */
3825         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
3826                 return (KERN_FAILURE);
3827
3828         /*
3829          * Check whether this task is allowed to have this page.
3830          */
3831         prot = entry->protection;
3832         fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
3833         if ((fault_type & prot) != fault_type)
3834                 return (KERN_PROTECTION_FAILURE);
3835         if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
3836             (entry->eflags & MAP_ENTRY_COW) &&
3837             (fault_type & VM_PROT_WRITE))
3838                 return (KERN_PROTECTION_FAILURE);
3839
3840         /*
3841          * If this page is not pageable, we have to get it for all possible
3842          * accesses.
3843          */
3844         *wired = (entry->wired_count != 0);
3845         if (*wired)
3846                 fault_type = entry->protection;
3847
3848         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
3849                 /*
3850                  * Fail if the entry was copy-on-write for a write fault.
3851                  */
3852                 if (fault_type & VM_PROT_WRITE)
3853                         return (KERN_FAILURE);
3854                 /*
3855                  * We're attempting to read a copy-on-write page --
3856                  * don't allow writes.
3857                  */
3858                 prot &= ~VM_PROT_WRITE;
3859         }
3860
3861         /*
3862          * Fail if an object should be created.
3863          */
3864         if (entry->object.vm_object == NULL && !map->system_map)
3865                 return (KERN_FAILURE);
3866
3867         /*
3868          * Return the object/offset from this entry.  If the entry was
3869          * copy-on-write or empty, it has been fixed up.
3870          */
3871         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
3872         *object = entry->object.vm_object;
3873
3874         *out_prot = prot;
3875         return (KERN_SUCCESS);
3876 }
3877
3878 /*
3879  *      vm_map_lookup_done:
3880  *
3881  *      Releases locks acquired by a vm_map_lookup
3882  *      (according to the handle returned by that lookup).
3883  */
3884 void
3885 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
3886 {
3887         /*
3888          * Unlock the main-level map
3889          */
3890         vm_map_unlock_read(map);
3891 }
3892
3893 #include "opt_ddb.h"
3894 #ifdef DDB
3895 #include <sys/kernel.h>
3896
3897 #include <ddb/ddb.h>
3898
3899 /*
3900  *      vm_map_print:   [ debug ]
3901  */
3902 DB_SHOW_COMMAND(map, vm_map_print)
3903 {
3904         static int nlines;
3905         /* XXX convert args. */
3906         vm_map_t map = (vm_map_t)addr;
3907         boolean_t full = have_addr;
3908
3909         vm_map_entry_t entry;
3910
3911         db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3912             (void *)map,
3913             (void *)map->pmap, map->nentries, map->timestamp);
3914         nlines++;
3915
3916         if (!full && db_indent)
3917                 return;
3918
3919         db_indent += 2;
3920         for (entry = map->header.next; entry != &map->header;
3921             entry = entry->next) {
3922                 db_iprintf("map entry %p: start=%p, end=%p\n",
3923                     (void *)entry, (void *)entry->start, (void *)entry->end);
3924                 nlines++;
3925                 {
3926                         static char *inheritance_name[4] =
3927                         {"share", "copy", "none", "donate_copy"};
3928
3929                         db_iprintf(" prot=%x/%x/%s",
3930                             entry->protection,
3931                             entry->max_protection,
3932                             inheritance_name[(int)(unsigned char)entry->inheritance]);
3933                         if (entry->wired_count != 0)
3934                                 db_printf(", wired");
3935                 }
3936                 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3937                         db_printf(", share=%p, offset=0x%jx\n",
3938                             (void *)entry->object.sub_map,
3939                             (uintmax_t)entry->offset);
3940                         nlines++;
3941                         if ((entry->prev == &map->header) ||
3942                             (entry->prev->object.sub_map !=
3943                                 entry->object.sub_map)) {
3944                                 db_indent += 2;
3945                                 vm_map_print((db_expr_t)(intptr_t)
3946                                              entry->object.sub_map,
3947                                              full, 0, (char *)0);
3948                                 db_indent -= 2;
3949                         }
3950                 } else {
3951                         if (entry->cred != NULL)
3952                                 db_printf(", ruid %d", entry->cred->cr_ruid);
3953                         db_printf(", object=%p, offset=0x%jx",
3954                             (void *)entry->object.vm_object,
3955                             (uintmax_t)entry->offset);
3956                         if (entry->object.vm_object && entry->object.vm_object->cred)
3957                                 db_printf(", obj ruid %d charge %jx",
3958                                     entry->object.vm_object->cred->cr_ruid,
3959                                     (uintmax_t)entry->object.vm_object->charge);
3960                         if (entry->eflags & MAP_ENTRY_COW)
3961                                 db_printf(", copy (%s)",
3962                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3963                         db_printf("\n");
3964                         nlines++;
3965
3966                         if ((entry->prev == &map->header) ||
3967                             (entry->prev->object.vm_object !=
3968                                 entry->object.vm_object)) {
3969                                 db_indent += 2;
3970                                 vm_object_print((db_expr_t)(intptr_t)
3971                                                 entry->object.vm_object,
3972                                                 full, 0, (char *)0);
3973                                 nlines += 4;
3974                                 db_indent -= 2;
3975                         }
3976                 }
3977         }
3978         db_indent -= 2;
3979         if (db_indent == 0)
3980                 nlines = 0;
3981 }
3982
3983
3984 DB_SHOW_COMMAND(procvm, procvm)
3985 {
3986         struct proc *p;
3987
3988         if (have_addr) {
3989                 p = (struct proc *) addr;
3990         } else {
3991                 p = curproc;
3992         }
3993
3994         db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3995             (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3996             (void *)vmspace_pmap(p->p_vmspace));
3997
3998         vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3999 }
4000
4001 #endif /* DDB */