]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/vm/vm_map.c
This commit was generated by cvs2svn to compensate for changes in r102514,
[FreeBSD/FreeBSD.git] / sys / vm / vm_map.c
1 /*
2  * Copyright (c) 1991, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66
67 /*
68  *      Virtual memory mapping module.
69  */
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/ktr.h>
74 #include <sys/lock.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/vmmeter.h>
78 #include <sys/mman.h>
79 #include <sys/vnode.h>
80 #include <sys/resourcevar.h>
81
82 #include <vm/vm.h>
83 #include <vm/vm_param.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_pager.h>
89 #include <vm/vm_kern.h>
90 #include <vm/vm_extern.h>
91 #include <vm/swap_pager.h>
92 #include <vm/uma.h>
93
94 /*
95  *      Virtual memory maps provide for the mapping, protection,
96  *      and sharing of virtual memory objects.  In addition,
97  *      this module provides for an efficient virtual copy of
98  *      memory from one map to another.
99  *
100  *      Synchronization is required prior to most operations.
101  *
102  *      Maps consist of an ordered doubly-linked list of simple
103  *      entries; a single hint is used to speed up lookups.
104  *
105  *      Since portions of maps are specified by start/end addresses,
106  *      which may not align with existing map entries, all
107  *      routines merely "clip" entries to these start/end values.
108  *      [That is, an entry is split into two, bordering at a
109  *      start or end value.]  Note that these clippings may not
110  *      always be necessary (as the two resulting entries are then
111  *      not changed); however, the clipping is done for convenience.
112  *
113  *      As mentioned above, virtual copy operations are performed
114  *      by copying VM object references from one map to
115  *      another, and then marking both regions as copy-on-write.
116  */
117
118 /*
119  *      vm_map_startup:
120  *
121  *      Initialize the vm_map module.  Must be called before
122  *      any other vm_map routines.
123  *
124  *      Map and entry structures are allocated from the general
125  *      purpose memory pool with some exceptions:
126  *
127  *      - The kernel map and kmem submap are allocated statically.
128  *      - Kernel map entries are allocated out of a static pool.
129  *
130  *      These restrictions are necessary since malloc() uses the
131  *      maps and requires map entries.
132  */
133
134 static uma_zone_t mapentzone;
135 static uma_zone_t kmapentzone;
136 static uma_zone_t mapzone;
137 static uma_zone_t vmspace_zone;
138 static struct vm_object kmapentobj;
139 static void vmspace_zinit(void *mem, int size);
140 static void vmspace_zfini(void *mem, int size);
141 static void vm_map_zinit(void *mem, int size);
142 static void vm_map_zfini(void *mem, int size);
143 static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max);
144
145 #ifdef INVARIANTS
146 static void vm_map_zdtor(void *mem, int size, void *arg);
147 static void vmspace_zdtor(void *mem, int size, void *arg);
148 #endif
149
150 void
151 vm_map_startup(void)
152 {
153         mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
154 #ifdef INVARIANTS
155             vm_map_zdtor,
156 #else
157             NULL,
158 #endif
159             vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
160         uma_prealloc(mapzone, MAX_KMAP);
161         kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry), 
162             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
163             UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
164         uma_prealloc(kmapentzone, MAX_KMAPENT);
165         mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), 
166             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
167         uma_prealloc(mapentzone, MAX_MAPENT);
168 }
169
170 static void
171 vmspace_zfini(void *mem, int size)
172 {
173         struct vmspace *vm;
174
175         vm = (struct vmspace *)mem;
176
177         vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map));
178 }
179
180 static void
181 vmspace_zinit(void *mem, int size)
182 {
183         struct vmspace *vm;
184
185         vm = (struct vmspace *)mem;
186
187         vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map));
188 }
189
190 static void
191 vm_map_zfini(void *mem, int size)
192 {
193         vm_map_t map;
194
195         map = (vm_map_t)mem;
196
197         lockdestroy(&map->lock);
198 }
199
200 static void
201 vm_map_zinit(void *mem, int size)
202 {
203         vm_map_t map;
204
205         map = (vm_map_t)mem;
206         map->nentries = 0;
207         map->size = 0;
208         map->infork = 0;
209         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
210 }
211
212 #ifdef INVARIANTS
213 static void
214 vmspace_zdtor(void *mem, int size, void *arg)
215 {
216         struct vmspace *vm;
217
218         vm = (struct vmspace *)mem;
219
220         vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
221 }
222 static void
223 vm_map_zdtor(void *mem, int size, void *arg)
224 {
225         vm_map_t map;
226
227         map = (vm_map_t)mem;
228         KASSERT(map->nentries == 0,
229             ("map %p nentries == %d on free.", 
230             map, map->nentries));
231         KASSERT(map->size == 0,
232             ("map %p size == %lu on free.",
233             map, (unsigned long)map->size));
234         KASSERT(map->infork == 0,
235             ("map %p infork == %d on free.",
236             map, map->infork));
237 }
238 #endif  /* INVARIANTS */
239
240 /*
241  * Allocate a vmspace structure, including a vm_map and pmap,
242  * and initialize those structures.  The refcnt is set to 1.
243  * The remaining fields must be initialized by the caller.
244  */
245 struct vmspace *
246 vmspace_alloc(min, max)
247         vm_offset_t min, max;
248 {
249         struct vmspace *vm;
250
251         GIANT_REQUIRED;
252         vm = uma_zalloc(vmspace_zone, M_WAITOK);
253         CTR1(KTR_VM, "vmspace_alloc: %p", vm);
254         _vm_map_init(&vm->vm_map, min, max);
255         pmap_pinit(vmspace_pmap(vm));
256         vm->vm_map.pmap = vmspace_pmap(vm);             /* XXX */
257         vm->vm_refcnt = 1;
258         vm->vm_shm = NULL;
259         vm->vm_freer = NULL;
260         return (vm);
261 }
262
263 void
264 vm_init2(void) 
265 {
266         uma_zone_set_obj(kmapentzone, &kmapentobj, lmin(cnt.v_page_count,
267             (VM_MAX_KERNEL_ADDRESS - KERNBASE) / PAGE_SIZE) / 8);
268         vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
269 #ifdef INVARIANTS
270             vmspace_zdtor,
271 #else
272             NULL,
273 #endif
274             vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
275         pmap_init2();
276         vm_object_init2();
277 }
278
279 static __inline void
280 vmspace_dofree(struct vmspace *vm)
281 {
282         CTR1(KTR_VM, "vmspace_free: %p", vm);
283         /*
284          * Lock the map, to wait out all other references to it.
285          * Delete all of the mappings and pages they hold, then call
286          * the pmap module to reclaim anything left.
287          */
288         vm_map_lock(&vm->vm_map);
289         (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
290             vm->vm_map.max_offset);
291         vm_map_unlock(&vm->vm_map);
292
293         pmap_release(vmspace_pmap(vm));
294         uma_zfree(vmspace_zone, vm);
295 }
296
297 void
298 vmspace_free(struct vmspace *vm)
299 {
300         GIANT_REQUIRED;
301
302         if (vm->vm_refcnt == 0)
303                 panic("vmspace_free: attempt to free already freed vmspace");
304
305         if (--vm->vm_refcnt == 0)
306                 vmspace_dofree(vm);
307 }
308
309 void
310 vmspace_exitfree(struct proc *p)
311 {
312         struct vmspace *vm;
313
314         GIANT_REQUIRED;
315         if (p == p->p_vmspace->vm_freer) {
316                 vm = p->p_vmspace;
317                 p->p_vmspace = NULL;
318                 vmspace_dofree(vm);
319         }
320 }
321
322 /*
323  * vmspace_swap_count() - count the approximate swap useage in pages for a
324  *                        vmspace.
325  *
326  *      Swap useage is determined by taking the proportional swap used by
327  *      VM objects backing the VM map.  To make up for fractional losses,
328  *      if the VM object has any swap use at all the associated map entries
329  *      count for at least 1 swap page.
330  */
331 int
332 vmspace_swap_count(struct vmspace *vmspace)
333 {
334         vm_map_t map = &vmspace->vm_map;
335         vm_map_entry_t cur;
336         int count = 0;
337
338         vm_map_lock_read(map);
339         for (cur = map->header.next; cur != &map->header; cur = cur->next) {
340                 vm_object_t object;
341
342                 if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
343                     (object = cur->object.vm_object) != NULL &&
344                     object->type == OBJT_SWAP
345                 ) {
346                         int n = (cur->end - cur->start) / PAGE_SIZE;
347
348                         if (object->un_pager.swp.swp_bcount) {
349                                 count += object->un_pager.swp.swp_bcount *
350                                     SWAP_META_PAGES * n / object->size + 1;
351                         }
352                 }
353         }
354         vm_map_unlock_read(map);
355         return (count);
356 }
357
358 void
359 _vm_map_lock(vm_map_t map, const char *file, int line)
360 {
361         int error;
362
363         if (map->system_map)
364                 GIANT_REQUIRED;
365         error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
366         KASSERT(error == 0, ("%s: failed to get lock", __func__));
367         map->timestamp++;
368 }
369
370 void
371 _vm_map_unlock(vm_map_t map, const char *file, int line)
372 {
373
374         lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
375 }
376
377 void
378 _vm_map_lock_read(vm_map_t map, const char *file, int line)
379 {
380         int error;
381
382         if (map->system_map)
383                 GIANT_REQUIRED;
384         error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
385         KASSERT(error == 0, ("%s: failed to get lock", __func__));
386 }
387
388 void
389 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
390 {
391
392         lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
393 }
394
395 int
396 _vm_map_trylock(vm_map_t map, const char *file, int line)
397 {
398         int error;
399
400         if (map->system_map)
401                 GIANT_REQUIRED;
402         error = lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, curthread);
403         return (error == 0);
404 }
405
406 int
407 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
408 {
409
410         KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
411                 ("%s: lock not held", __func__));
412         map->timestamp++;
413         return (0);
414 }
415
416 void
417 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
418 {
419
420         KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
421                 ("%s: lock not held", __func__));
422 }
423
424 /*
425  *      vm_map_unlock_and_wait:
426  */
427 int
428 vm_map_unlock_and_wait(vm_map_t map, boolean_t user_wait)
429 {
430         int retval;
431
432         mtx_lock(&Giant);
433         vm_map_unlock(map);
434         retval = tsleep(&map->root, PVM, "vmmapw", 0);
435         mtx_unlock(&Giant);
436         return (retval);
437 }
438
439 /*
440  *      vm_map_wakeup:
441  */
442 void
443 vm_map_wakeup(vm_map_t map)
444 {
445
446         /*
447          * Acquire and release Giant to prevent a wakeup() from being
448          * performed (and lost) between the vm_map_unlock() and the
449          * tsleep() in vm_map_unlock_and_wait().
450          */
451         mtx_lock(&Giant);
452         mtx_unlock(&Giant);
453         wakeup(&map->root);
454 }
455
456 long
457 vmspace_resident_count(struct vmspace *vmspace)
458 {
459         return pmap_resident_count(vmspace_pmap(vmspace));
460 }
461
462 /*
463  *      vm_map_create:
464  *
465  *      Creates and returns a new empty VM map with
466  *      the given physical map structure, and having
467  *      the given lower and upper address bounds.
468  */
469 vm_map_t
470 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
471 {
472         vm_map_t result;
473
474         result = uma_zalloc(mapzone, M_WAITOK);
475         CTR1(KTR_VM, "vm_map_create: %p", result);
476         _vm_map_init(result, min, max);
477         result->pmap = pmap;
478         return (result);
479 }
480
481 /*
482  * Initialize an existing vm_map structure
483  * such as that in the vmspace structure.
484  * The pmap is set elsewhere.
485  */
486 static void
487 _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
488 {
489
490         map->header.next = map->header.prev = &map->header;
491         map->needs_wakeup = FALSE;
492         map->system_map = 0;
493         map->min_offset = min;
494         map->max_offset = max;
495         map->first_free = &map->header;
496         map->root = NULL;
497         map->timestamp = 0;
498 }
499
500 void
501 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
502 {
503         _vm_map_init(map, min, max);
504         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
505 }
506
507 /*
508  *      vm_map_entry_dispose:   [ internal use only ]
509  *
510  *      Inverse of vm_map_entry_create.
511  */
512 static void
513 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
514 {
515         uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
516 }
517
518 /*
519  *      vm_map_entry_create:    [ internal use only ]
520  *
521  *      Allocates a VM map entry for insertion.
522  *      No entry fields are filled in.
523  */
524 static vm_map_entry_t
525 vm_map_entry_create(vm_map_t map)
526 {
527         vm_map_entry_t new_entry;
528
529         if (map->system_map)
530                 new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
531         else
532                 new_entry = uma_zalloc(mapentzone, M_WAITOK);
533         if (new_entry == NULL)
534                 panic("vm_map_entry_create: kernel resources exhausted");
535         return (new_entry);
536 }
537
538 /*
539  *      vm_map_entry_set_behavior:
540  *
541  *      Set the expected access behavior, either normal, random, or
542  *      sequential.
543  */
544 static __inline void
545 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
546 {
547         entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
548             (behavior & MAP_ENTRY_BEHAV_MASK);
549 }
550
551 /*
552  *      vm_map_entry_splay:
553  *
554  *      Implements Sleator and Tarjan's top-down splay algorithm.  Returns
555  *      the vm_map_entry containing the given address.  If, however, that
556  *      address is not found in the vm_map, returns a vm_map_entry that is
557  *      adjacent to the address, coming before or after it.
558  */
559 static vm_map_entry_t
560 vm_map_entry_splay(vm_offset_t address, vm_map_entry_t root)
561 {
562         struct vm_map_entry dummy;
563         vm_map_entry_t lefttreemax, righttreemin, y;
564
565         if (root == NULL)
566                 return (root);
567         lefttreemax = righttreemin = &dummy;
568         for (;; root = y) {
569                 if (address < root->start) {
570                         if ((y = root->left) == NULL)
571                                 break;
572                         if (address < y->start) {
573                                 /* Rotate right. */
574                                 root->left = y->right;
575                                 y->right = root;
576                                 root = y;
577                                 if ((y = root->left) == NULL)
578                                         break;
579                         }
580                         /* Link into the new root's right tree. */
581                         righttreemin->left = root;
582                         righttreemin = root;
583                 } else if (address >= root->end) {
584                         if ((y = root->right) == NULL)
585                                 break;
586                         if (address >= y->end) {
587                                 /* Rotate left. */
588                                 root->right = y->left;
589                                 y->left = root;
590                                 root = y;
591                                 if ((y = root->right) == NULL)
592                                         break;
593                         }
594                         /* Link into the new root's left tree. */
595                         lefttreemax->right = root;
596                         lefttreemax = root;
597                 } else
598                         break;
599         }
600         /* Assemble the new root. */
601         lefttreemax->right = root->left;
602         righttreemin->left = root->right;
603         root->left = dummy.right;
604         root->right = dummy.left;
605         return (root);
606 }
607
608 /*
609  *      vm_map_entry_{un,}link:
610  *
611  *      Insert/remove entries from maps.
612  */
613 static void
614 vm_map_entry_link(vm_map_t map,
615                   vm_map_entry_t after_where,
616                   vm_map_entry_t entry)
617 {
618
619         CTR4(KTR_VM,
620             "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
621             map->nentries, entry, after_where);
622         map->nentries++;
623         entry->prev = after_where;
624         entry->next = after_where->next;
625         entry->next->prev = entry;
626         after_where->next = entry;
627
628         if (after_where != &map->header) {
629                 if (after_where != map->root)
630                         vm_map_entry_splay(after_where->start, map->root);
631                 entry->right = after_where->right;
632                 entry->left = after_where;
633                 after_where->right = NULL;
634         } else {
635                 entry->right = map->root;
636                 entry->left = NULL;
637         }
638         map->root = entry;
639 }
640
641 static void
642 vm_map_entry_unlink(vm_map_t map,
643                     vm_map_entry_t entry)
644 {
645         vm_map_entry_t next, prev, root;
646
647         if (entry != map->root)
648                 vm_map_entry_splay(entry->start, map->root);
649         if (entry->left == NULL)
650                 root = entry->right;
651         else {
652                 root = vm_map_entry_splay(entry->start, entry->left);
653                 root->right = entry->right;
654         }
655         map->root = root;
656
657         prev = entry->prev;
658         next = entry->next;
659         next->prev = prev;
660         prev->next = next;
661         map->nentries--;
662         CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
663             map->nentries, entry);
664 }
665
666 /*
667  *      vm_map_lookup_entry:    [ internal use only ]
668  *
669  *      Finds the map entry containing (or
670  *      immediately preceding) the specified address
671  *      in the given map; the entry is returned
672  *      in the "entry" parameter.  The boolean
673  *      result indicates whether the address is
674  *      actually contained in the map.
675  */
676 boolean_t
677 vm_map_lookup_entry(
678         vm_map_t map,
679         vm_offset_t address,
680         vm_map_entry_t *entry)  /* OUT */
681 {
682         vm_map_entry_t cur;
683
684         cur = vm_map_entry_splay(address, map->root);
685         if (cur == NULL)
686                 *entry = &map->header;
687         else {
688                 map->root = cur;
689
690                 if (address >= cur->start) {
691                         *entry = cur;
692                         if (cur->end > address)
693                                 return (TRUE);
694                 } else
695                         *entry = cur->prev;
696         }
697         return (FALSE);
698 }
699
700 /*
701  *      vm_map_insert:
702  *
703  *      Inserts the given whole VM object into the target
704  *      map at the specified address range.  The object's
705  *      size should match that of the address range.
706  *
707  *      Requires that the map be locked, and leaves it so.
708  *
709  *      If object is non-NULL, ref count must be bumped by caller
710  *      prior to making call to account for the new entry.
711  */
712 int
713 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
714               vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
715               int cow)
716 {
717         vm_map_entry_t new_entry;
718         vm_map_entry_t prev_entry;
719         vm_map_entry_t temp_entry;
720         vm_eflags_t protoeflags;
721
722         /*
723          * Check that the start and end points are not bogus.
724          */
725         if ((start < map->min_offset) || (end > map->max_offset) ||
726             (start >= end))
727                 return (KERN_INVALID_ADDRESS);
728
729         /*
730          * Find the entry prior to the proposed starting address; if it's part
731          * of an existing entry, this range is bogus.
732          */
733         if (vm_map_lookup_entry(map, start, &temp_entry))
734                 return (KERN_NO_SPACE);
735
736         prev_entry = temp_entry;
737
738         /*
739          * Assert that the next entry doesn't overlap the end point.
740          */
741         if ((prev_entry->next != &map->header) &&
742             (prev_entry->next->start < end))
743                 return (KERN_NO_SPACE);
744
745         protoeflags = 0;
746
747         if (cow & MAP_COPY_ON_WRITE)
748                 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
749
750         if (cow & MAP_NOFAULT) {
751                 protoeflags |= MAP_ENTRY_NOFAULT;
752
753                 KASSERT(object == NULL,
754                         ("vm_map_insert: paradoxical MAP_NOFAULT request"));
755         }
756         if (cow & MAP_DISABLE_SYNCER)
757                 protoeflags |= MAP_ENTRY_NOSYNC;
758         if (cow & MAP_DISABLE_COREDUMP)
759                 protoeflags |= MAP_ENTRY_NOCOREDUMP;
760
761         if (object) {
762                 /*
763                  * When object is non-NULL, it could be shared with another
764                  * process.  We have to set or clear OBJ_ONEMAPPING 
765                  * appropriately.
766                  */
767                 vm_object_lock(object);
768                 if ((object->ref_count > 1) || (object->shadow_count != 0)) {
769                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
770                 }
771                 vm_object_unlock(object);
772         }
773         else if ((prev_entry != &map->header) &&
774                  (prev_entry->eflags == protoeflags) &&
775                  (prev_entry->end == start) &&
776                  (prev_entry->wired_count == 0) &&
777                  ((prev_entry->object.vm_object == NULL) ||
778                   vm_object_coalesce(prev_entry->object.vm_object,
779                                      OFF_TO_IDX(prev_entry->offset),
780                                      (vm_size_t)(prev_entry->end - prev_entry->start),
781                                      (vm_size_t)(end - prev_entry->end)))) {
782                 /*
783                  * We were able to extend the object.  Determine if we
784                  * can extend the previous map entry to include the 
785                  * new range as well.
786                  */
787                 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
788                     (prev_entry->protection == prot) &&
789                     (prev_entry->max_protection == max)) {
790                         map->size += (end - prev_entry->end);
791                         prev_entry->end = end;
792                         vm_map_simplify_entry(map, prev_entry);
793                         return (KERN_SUCCESS);
794                 }
795
796                 /*
797                  * If we can extend the object but cannot extend the
798                  * map entry, we have to create a new map entry.  We
799                  * must bump the ref count on the extended object to
800                  * account for it.  object may be NULL.
801                  */
802                 object = prev_entry->object.vm_object;
803                 offset = prev_entry->offset +
804                         (prev_entry->end - prev_entry->start);
805                 vm_object_reference(object);
806         }
807
808         /*
809          * NOTE: if conditionals fail, object can be NULL here.  This occurs
810          * in things like the buffer map where we manage kva but do not manage
811          * backing objects.
812          */
813
814         /*
815          * Create a new entry
816          */
817         new_entry = vm_map_entry_create(map);
818         new_entry->start = start;
819         new_entry->end = end;
820
821         new_entry->eflags = protoeflags;
822         new_entry->object.vm_object = object;
823         new_entry->offset = offset;
824         new_entry->avail_ssize = 0;
825
826         new_entry->inheritance = VM_INHERIT_DEFAULT;
827         new_entry->protection = prot;
828         new_entry->max_protection = max;
829         new_entry->wired_count = 0;
830
831         /*
832          * Insert the new entry into the list
833          */
834         vm_map_entry_link(map, prev_entry, new_entry);
835         map->size += new_entry->end - new_entry->start;
836
837         /*
838          * Update the free space hint
839          */
840         if ((map->first_free == prev_entry) &&
841             (prev_entry->end >= new_entry->start)) {
842                 map->first_free = new_entry;
843         }
844
845 #if 0
846         /*
847          * Temporarily removed to avoid MAP_STACK panic, due to
848          * MAP_STACK being a huge hack.  Will be added back in
849          * when MAP_STACK (and the user stack mapping) is fixed.
850          */
851         /*
852          * It may be possible to simplify the entry
853          */
854         vm_map_simplify_entry(map, new_entry);
855 #endif
856
857         if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
858                 mtx_lock(&Giant);
859                 pmap_object_init_pt(map->pmap, start,
860                                     object, OFF_TO_IDX(offset), end - start,
861                                     cow & MAP_PREFAULT_PARTIAL);
862                 mtx_unlock(&Giant);
863         }
864
865         return (KERN_SUCCESS);
866 }
867
868 /*
869  * Find sufficient space for `length' bytes in the given map, starting at
870  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
871  */
872 int
873 vm_map_findspace(
874         vm_map_t map,
875         vm_offset_t start,
876         vm_size_t length,
877         vm_offset_t *addr)
878 {
879         vm_map_entry_t entry, next;
880         vm_offset_t end;
881
882         if (start < map->min_offset)
883                 start = map->min_offset;
884         if (start > map->max_offset)
885                 return (1);
886
887         /*
888          * Look for the first possible address; if there's already something
889          * at this address, we have to start after it.
890          */
891         if (start == map->min_offset) {
892                 if ((entry = map->first_free) != &map->header)
893                         start = entry->end;
894         } else {
895                 vm_map_entry_t tmp;
896
897                 if (vm_map_lookup_entry(map, start, &tmp))
898                         start = tmp->end;
899                 entry = tmp;
900         }
901
902         /*
903          * Look through the rest of the map, trying to fit a new region in the
904          * gap between existing regions, or after the very last region.
905          */
906         for (;; start = (entry = next)->end) {
907                 /*
908                  * Find the end of the proposed new region.  Be sure we didn't
909                  * go beyond the end of the map, or wrap around the address;
910                  * if so, we lose.  Otherwise, if this is the last entry, or
911                  * if the proposed new region fits before the next entry, we
912                  * win.
913                  */
914                 end = start + length;
915                 if (end > map->max_offset || end < start)
916                         return (1);
917                 next = entry->next;
918                 if (next == &map->header || next->start >= end)
919                         break;
920         }
921         *addr = start;
922         if (map == kernel_map) {
923                 vm_offset_t ksize;
924                 if ((ksize = round_page(start + length)) > kernel_vm_end) {
925                         mtx_lock(&Giant);
926                         pmap_growkernel(ksize);
927                         mtx_unlock(&Giant);
928                 }
929         }
930         return (0);
931 }
932
933 /*
934  *      vm_map_find finds an unallocated region in the target address
935  *      map with the given length.  The search is defined to be
936  *      first-fit from the specified address; the region found is
937  *      returned in the same parameter.
938  *
939  *      If object is non-NULL, ref count must be bumped by caller
940  *      prior to making call to account for the new entry.
941  */
942 int
943 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
944             vm_offset_t *addr,  /* IN/OUT */
945             vm_size_t length, boolean_t find_space, vm_prot_t prot,
946             vm_prot_t max, int cow)
947 {
948         vm_offset_t start;
949         int result, s = 0;
950
951         start = *addr;
952
953         if (map == kmem_map)
954                 s = splvm();
955
956         vm_map_lock(map);
957         if (find_space) {
958                 if (vm_map_findspace(map, start, length, addr)) {
959                         vm_map_unlock(map);
960                         if (map == kmem_map)
961                                 splx(s);
962                         return (KERN_NO_SPACE);
963                 }
964                 start = *addr;
965         }
966         result = vm_map_insert(map, object, offset,
967                 start, start + length, prot, max, cow);
968         vm_map_unlock(map);
969
970         if (map == kmem_map)
971                 splx(s);
972
973         return (result);
974 }
975
976 /*
977  *      vm_map_simplify_entry:
978  *
979  *      Simplify the given map entry by merging with either neighbor.  This
980  *      routine also has the ability to merge with both neighbors.
981  *
982  *      The map must be locked.
983  *
984  *      This routine guarentees that the passed entry remains valid (though
985  *      possibly extended).  When merging, this routine may delete one or
986  *      both neighbors.
987  */
988 void
989 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
990 {
991         vm_map_entry_t next, prev;
992         vm_size_t prevsize, esize;
993
994         if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP))
995                 return;
996
997         prev = entry->prev;
998         if (prev != &map->header) {
999                 prevsize = prev->end - prev->start;
1000                 if ( (prev->end == entry->start) &&
1001                      (prev->object.vm_object == entry->object.vm_object) &&
1002                      (!prev->object.vm_object ||
1003                         (prev->offset + prevsize == entry->offset)) &&
1004                      (prev->eflags == entry->eflags) &&
1005                      (prev->protection == entry->protection) &&
1006                      (prev->max_protection == entry->max_protection) &&
1007                      (prev->inheritance == entry->inheritance) &&
1008                      (prev->wired_count == entry->wired_count)) {
1009                         if (map->first_free == prev)
1010                                 map->first_free = entry;
1011                         vm_map_entry_unlink(map, prev);
1012                         entry->start = prev->start;
1013                         entry->offset = prev->offset;
1014                         if (prev->object.vm_object)
1015                                 vm_object_deallocate(prev->object.vm_object);
1016                         vm_map_entry_dispose(map, prev);
1017                 }
1018         }
1019
1020         next = entry->next;
1021         if (next != &map->header) {
1022                 esize = entry->end - entry->start;
1023                 if ((entry->end == next->start) &&
1024                     (next->object.vm_object == entry->object.vm_object) &&
1025                      (!entry->object.vm_object ||
1026                         (entry->offset + esize == next->offset)) &&
1027                     (next->eflags == entry->eflags) &&
1028                     (next->protection == entry->protection) &&
1029                     (next->max_protection == entry->max_protection) &&
1030                     (next->inheritance == entry->inheritance) &&
1031                     (next->wired_count == entry->wired_count)) {
1032                         if (map->first_free == next)
1033                                 map->first_free = entry;
1034                         vm_map_entry_unlink(map, next);
1035                         entry->end = next->end;
1036                         if (next->object.vm_object)
1037                                 vm_object_deallocate(next->object.vm_object);
1038                         vm_map_entry_dispose(map, next);
1039                 }
1040         }
1041 }
1042 /*
1043  *      vm_map_clip_start:      [ internal use only ]
1044  *
1045  *      Asserts that the given entry begins at or after
1046  *      the specified address; if necessary,
1047  *      it splits the entry into two.
1048  */
1049 #define vm_map_clip_start(map, entry, startaddr) \
1050 { \
1051         if (startaddr > entry->start) \
1052                 _vm_map_clip_start(map, entry, startaddr); \
1053 }
1054
1055 /*
1056  *      This routine is called only when it is known that
1057  *      the entry must be split.
1058  */
1059 static void
1060 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
1061 {
1062         vm_map_entry_t new_entry;
1063
1064         /*
1065          * Split off the front portion -- note that we must insert the new
1066          * entry BEFORE this one, so that this entry has the specified
1067          * starting address.
1068          */
1069         vm_map_simplify_entry(map, entry);
1070
1071         /*
1072          * If there is no object backing this entry, we might as well create
1073          * one now.  If we defer it, an object can get created after the map
1074          * is clipped, and individual objects will be created for the split-up
1075          * map.  This is a bit of a hack, but is also about the best place to
1076          * put this improvement.
1077          */
1078         if (entry->object.vm_object == NULL && !map->system_map) {
1079                 vm_object_t object;
1080                 object = vm_object_allocate(OBJT_DEFAULT,
1081                                 atop(entry->end - entry->start));
1082                 entry->object.vm_object = object;
1083                 entry->offset = 0;
1084         }
1085
1086         new_entry = vm_map_entry_create(map);
1087         *new_entry = *entry;
1088
1089         new_entry->end = start;
1090         entry->offset += (start - entry->start);
1091         entry->start = start;
1092
1093         vm_map_entry_link(map, entry->prev, new_entry);
1094
1095         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1096                 vm_object_reference(new_entry->object.vm_object);
1097         }
1098 }
1099
1100 /*
1101  *      vm_map_clip_end:        [ internal use only ]
1102  *
1103  *      Asserts that the given entry ends at or before
1104  *      the specified address; if necessary,
1105  *      it splits the entry into two.
1106  */
1107 #define vm_map_clip_end(map, entry, endaddr) \
1108 { \
1109         if (endaddr < entry->end) \
1110                 _vm_map_clip_end(map, entry, endaddr); \
1111 }
1112
1113 /*
1114  *      This routine is called only when it is known that
1115  *      the entry must be split.
1116  */
1117 static void
1118 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1119 {
1120         vm_map_entry_t new_entry;
1121
1122         /*
1123          * If there is no object backing this entry, we might as well create
1124          * one now.  If we defer it, an object can get created after the map
1125          * is clipped, and individual objects will be created for the split-up
1126          * map.  This is a bit of a hack, but is also about the best place to
1127          * put this improvement.
1128          */
1129         if (entry->object.vm_object == NULL && !map->system_map) {
1130                 vm_object_t object;
1131                 object = vm_object_allocate(OBJT_DEFAULT,
1132                                 atop(entry->end - entry->start));
1133                 entry->object.vm_object = object;
1134                 entry->offset = 0;
1135         }
1136
1137         /*
1138          * Create a new entry and insert it AFTER the specified entry
1139          */
1140         new_entry = vm_map_entry_create(map);
1141         *new_entry = *entry;
1142
1143         new_entry->start = entry->end = end;
1144         new_entry->offset += (end - entry->start);
1145
1146         vm_map_entry_link(map, entry, new_entry);
1147
1148         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1149                 vm_object_reference(new_entry->object.vm_object);
1150         }
1151 }
1152
1153 /*
1154  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
1155  *
1156  *      Asserts that the starting and ending region
1157  *      addresses fall within the valid range of the map.
1158  */
1159 #define VM_MAP_RANGE_CHECK(map, start, end)             \
1160                 {                                       \
1161                 if (start < vm_map_min(map))            \
1162                         start = vm_map_min(map);        \
1163                 if (end > vm_map_max(map))              \
1164                         end = vm_map_max(map);          \
1165                 if (start > end)                        \
1166                         start = end;                    \
1167                 }
1168
1169 /*
1170  *      vm_map_submap:          [ kernel use only ]
1171  *
1172  *      Mark the given range as handled by a subordinate map.
1173  *
1174  *      This range must have been created with vm_map_find,
1175  *      and no other operations may have been performed on this
1176  *      range prior to calling vm_map_submap.
1177  *
1178  *      Only a limited number of operations can be performed
1179  *      within this rage after calling vm_map_submap:
1180  *              vm_fault
1181  *      [Don't try vm_map_copy!]
1182  *
1183  *      To remove a submapping, one must first remove the
1184  *      range from the superior map, and then destroy the
1185  *      submap (if desired).  [Better yet, don't try it.]
1186  */
1187 int
1188 vm_map_submap(
1189         vm_map_t map,
1190         vm_offset_t start,
1191         vm_offset_t end,
1192         vm_map_t submap)
1193 {
1194         vm_map_entry_t entry;
1195         int result = KERN_INVALID_ARGUMENT;
1196
1197         vm_map_lock(map);
1198
1199         VM_MAP_RANGE_CHECK(map, start, end);
1200
1201         if (vm_map_lookup_entry(map, start, &entry)) {
1202                 vm_map_clip_start(map, entry, start);
1203         } else
1204                 entry = entry->next;
1205
1206         vm_map_clip_end(map, entry, end);
1207
1208         if ((entry->start == start) && (entry->end == end) &&
1209             ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1210             (entry->object.vm_object == NULL)) {
1211                 entry->object.sub_map = submap;
1212                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1213                 result = KERN_SUCCESS;
1214         }
1215         vm_map_unlock(map);
1216
1217         return (result);
1218 }
1219
1220 /*
1221  *      vm_map_protect:
1222  *
1223  *      Sets the protection of the specified address
1224  *      region in the target map.  If "set_max" is
1225  *      specified, the maximum protection is to be set;
1226  *      otherwise, only the current protection is affected.
1227  */
1228 int
1229 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1230                vm_prot_t new_prot, boolean_t set_max)
1231 {
1232         vm_map_entry_t current;
1233         vm_map_entry_t entry;
1234
1235         vm_map_lock(map);
1236
1237         VM_MAP_RANGE_CHECK(map, start, end);
1238
1239         if (vm_map_lookup_entry(map, start, &entry)) {
1240                 vm_map_clip_start(map, entry, start);
1241         } else {
1242                 entry = entry->next;
1243         }
1244
1245         /*
1246          * Make a first pass to check for protection violations.
1247          */
1248         current = entry;
1249         while ((current != &map->header) && (current->start < end)) {
1250                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1251                         vm_map_unlock(map);
1252                         return (KERN_INVALID_ARGUMENT);
1253                 }
1254                 if ((new_prot & current->max_protection) != new_prot) {
1255                         vm_map_unlock(map);
1256                         return (KERN_PROTECTION_FAILURE);
1257                 }
1258                 current = current->next;
1259         }
1260
1261         /*
1262          * Go back and fix up protections. [Note that clipping is not
1263          * necessary the second time.]
1264          */
1265         current = entry;
1266         while ((current != &map->header) && (current->start < end)) {
1267                 vm_prot_t old_prot;
1268
1269                 vm_map_clip_end(map, current, end);
1270
1271                 old_prot = current->protection;
1272                 if (set_max)
1273                         current->protection =
1274                             (current->max_protection = new_prot) &
1275                             old_prot;
1276                 else
1277                         current->protection = new_prot;
1278
1279                 /*
1280                  * Update physical map if necessary. Worry about copy-on-write
1281                  * here -- CHECK THIS XXX
1282                  */
1283                 if (current->protection != old_prot) {
1284                         mtx_lock(&Giant);
1285 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1286                                                         VM_PROT_ALL)
1287                         pmap_protect(map->pmap, current->start,
1288                             current->end,
1289                             current->protection & MASK(current));
1290 #undef  MASK
1291                         mtx_unlock(&Giant);
1292                 }
1293                 vm_map_simplify_entry(map, current);
1294                 current = current->next;
1295         }
1296         vm_map_unlock(map);
1297         return (KERN_SUCCESS);
1298 }
1299
1300 /*
1301  *      vm_map_madvise:
1302  *
1303  *      This routine traverses a processes map handling the madvise
1304  *      system call.  Advisories are classified as either those effecting
1305  *      the vm_map_entry structure, or those effecting the underlying 
1306  *      objects.
1307  */
1308 int
1309 vm_map_madvise(
1310         vm_map_t map,
1311         vm_offset_t start, 
1312         vm_offset_t end,
1313         int behav)
1314 {
1315         vm_map_entry_t current, entry;
1316         int modify_map = 0;
1317
1318         /*
1319          * Some madvise calls directly modify the vm_map_entry, in which case
1320          * we need to use an exclusive lock on the map and we need to perform 
1321          * various clipping operations.  Otherwise we only need a read-lock
1322          * on the map.
1323          */
1324         switch(behav) {
1325         case MADV_NORMAL:
1326         case MADV_SEQUENTIAL:
1327         case MADV_RANDOM:
1328         case MADV_NOSYNC:
1329         case MADV_AUTOSYNC:
1330         case MADV_NOCORE:
1331         case MADV_CORE:
1332                 modify_map = 1;
1333                 vm_map_lock(map);
1334                 break;
1335         case MADV_WILLNEED:
1336         case MADV_DONTNEED:
1337         case MADV_FREE:
1338                 vm_map_lock_read(map);
1339                 break;
1340         default:
1341                 return (KERN_INVALID_ARGUMENT);
1342         }
1343
1344         /*
1345          * Locate starting entry and clip if necessary.
1346          */
1347         VM_MAP_RANGE_CHECK(map, start, end);
1348
1349         if (vm_map_lookup_entry(map, start, &entry)) {
1350                 if (modify_map)
1351                         vm_map_clip_start(map, entry, start);
1352         } else {
1353                 entry = entry->next;
1354         }
1355
1356         if (modify_map) {
1357                 /*
1358                  * madvise behaviors that are implemented in the vm_map_entry.
1359                  *
1360                  * We clip the vm_map_entry so that behavioral changes are
1361                  * limited to the specified address range.
1362                  */
1363                 for (current = entry;
1364                      (current != &map->header) && (current->start < end);
1365                      current = current->next
1366                 ) {
1367                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1368                                 continue;
1369
1370                         vm_map_clip_end(map, current, end);
1371
1372                         switch (behav) {
1373                         case MADV_NORMAL:
1374                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1375                                 break;
1376                         case MADV_SEQUENTIAL:
1377                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1378                                 break;
1379                         case MADV_RANDOM:
1380                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1381                                 break;
1382                         case MADV_NOSYNC:
1383                                 current->eflags |= MAP_ENTRY_NOSYNC;
1384                                 break;
1385                         case MADV_AUTOSYNC:
1386                                 current->eflags &= ~MAP_ENTRY_NOSYNC;
1387                                 break;
1388                         case MADV_NOCORE:
1389                                 current->eflags |= MAP_ENTRY_NOCOREDUMP;
1390                                 break;
1391                         case MADV_CORE:
1392                                 current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1393                                 break;
1394                         default:
1395                                 break;
1396                         }
1397                         vm_map_simplify_entry(map, current);
1398                 }
1399                 vm_map_unlock(map);
1400         } else {
1401                 vm_pindex_t pindex;
1402                 int count;
1403
1404                 /*
1405                  * madvise behaviors that are implemented in the underlying
1406                  * vm_object.
1407                  *
1408                  * Since we don't clip the vm_map_entry, we have to clip
1409                  * the vm_object pindex and count.
1410                  */
1411                 for (current = entry;
1412                      (current != &map->header) && (current->start < end);
1413                      current = current->next
1414                 ) {
1415                         vm_offset_t useStart;
1416
1417                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1418                                 continue;
1419
1420                         pindex = OFF_TO_IDX(current->offset);
1421                         count = atop(current->end - current->start);
1422                         useStart = current->start;
1423
1424                         if (current->start < start) {
1425                                 pindex += atop(start - current->start);
1426                                 count -= atop(start - current->start);
1427                                 useStart = start;
1428                         }
1429                         if (current->end > end)
1430                                 count -= atop(current->end - end);
1431
1432                         if (count <= 0)
1433                                 continue;
1434
1435                         vm_object_madvise(current->object.vm_object,
1436                                           pindex, count, behav);
1437                         if (behav == MADV_WILLNEED) {
1438                                 mtx_lock(&Giant);
1439                                 pmap_object_init_pt(
1440                                     map->pmap, 
1441                                     useStart,
1442                                     current->object.vm_object,
1443                                     pindex, 
1444                                     (count << PAGE_SHIFT),
1445                                     MAP_PREFAULT_MADVISE
1446                                 );
1447                                 mtx_unlock(&Giant);
1448                         }
1449                 }
1450                 vm_map_unlock_read(map);
1451         }
1452         return (0);
1453 }       
1454
1455
1456 /*
1457  *      vm_map_inherit:
1458  *
1459  *      Sets the inheritance of the specified address
1460  *      range in the target map.  Inheritance
1461  *      affects how the map will be shared with
1462  *      child maps at the time of vm_map_fork.
1463  */
1464 int
1465 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1466                vm_inherit_t new_inheritance)
1467 {
1468         vm_map_entry_t entry;
1469         vm_map_entry_t temp_entry;
1470
1471         switch (new_inheritance) {
1472         case VM_INHERIT_NONE:
1473         case VM_INHERIT_COPY:
1474         case VM_INHERIT_SHARE:
1475                 break;
1476         default:
1477                 return (KERN_INVALID_ARGUMENT);
1478         }
1479         vm_map_lock(map);
1480         VM_MAP_RANGE_CHECK(map, start, end);
1481         if (vm_map_lookup_entry(map, start, &temp_entry)) {
1482                 entry = temp_entry;
1483                 vm_map_clip_start(map, entry, start);
1484         } else
1485                 entry = temp_entry->next;
1486         while ((entry != &map->header) && (entry->start < end)) {
1487                 vm_map_clip_end(map, entry, end);
1488                 entry->inheritance = new_inheritance;
1489                 vm_map_simplify_entry(map, entry);
1490                 entry = entry->next;
1491         }
1492         vm_map_unlock(map);
1493         return (KERN_SUCCESS);
1494 }
1495
1496 /*
1497  *      vm_map_unwire:
1498  *
1499  *      Implements both kernel and user unwiring.
1500  */
1501 int
1502 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1503         boolean_t user_unwire)
1504 {
1505         vm_map_entry_t entry, first_entry, tmp_entry;
1506         vm_offset_t saved_start;
1507         unsigned int last_timestamp;
1508         int rv;
1509         boolean_t need_wakeup, result;
1510
1511         vm_map_lock(map);
1512         VM_MAP_RANGE_CHECK(map, start, end);
1513         if (!vm_map_lookup_entry(map, start, &first_entry)) {
1514                 vm_map_unlock(map);
1515                 return (KERN_INVALID_ADDRESS);
1516         }
1517         last_timestamp = map->timestamp;
1518         entry = first_entry;
1519         while (entry != &map->header && entry->start < end) {
1520                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1521                         /*
1522                          * We have not yet clipped the entry.
1523                          */
1524                         saved_start = (start >= entry->start) ? start :
1525                             entry->start;
1526                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1527                         if (vm_map_unlock_and_wait(map, user_unwire)) {
1528                                 /*
1529                                  * Allow interruption of user unwiring?
1530                                  */
1531                         }
1532                         vm_map_lock(map);
1533                         if (last_timestamp+1 != map->timestamp) {
1534                                 /*
1535                                  * Look again for the entry because the map was
1536                                  * modified while it was unlocked.
1537                                  * Specifically, the entry may have been
1538                                  * clipped, merged, or deleted.
1539                                  */
1540                                 if (!vm_map_lookup_entry(map, saved_start,
1541                                     &tmp_entry)) {
1542                                         if (saved_start == start) {
1543                                                 /*
1544                                                  * First_entry has been deleted.
1545                                                  */
1546                                                 vm_map_unlock(map);
1547                                                 return (KERN_INVALID_ADDRESS);
1548                                         }
1549                                         end = saved_start;
1550                                         rv = KERN_INVALID_ADDRESS;
1551                                         goto done;
1552                                 }
1553                                 if (entry == first_entry)
1554                                         first_entry = tmp_entry;
1555                                 else
1556                                         first_entry = NULL;
1557                                 entry = tmp_entry;
1558                         }
1559                         last_timestamp = map->timestamp;
1560                         continue;
1561                 }
1562                 vm_map_clip_start(map, entry, start);
1563                 vm_map_clip_end(map, entry, end);
1564                 /*
1565                  * Mark the entry in case the map lock is released.  (See
1566                  * above.)
1567                  */
1568                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1569                 /*
1570                  * Check the map for holes in the specified region.
1571                  */
1572                 if (entry->end < end && (entry->next == &map->header ||
1573                     entry->next->start > entry->end)) {
1574                         end = entry->end;
1575                         rv = KERN_INVALID_ADDRESS;
1576                         goto done;
1577                 }
1578                 /*
1579                  * Require that the entry is wired.
1580                  */
1581                 if (entry->wired_count == 0 || (user_unwire &&
1582                     (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)) {
1583                         end = entry->end;
1584                         rv = KERN_INVALID_ARGUMENT;
1585                         goto done;
1586                 }
1587                 entry = entry->next;
1588         }
1589         rv = KERN_SUCCESS;
1590 done:
1591         need_wakeup = FALSE;
1592         if (first_entry == NULL) {
1593                 result = vm_map_lookup_entry(map, start, &first_entry);
1594                 KASSERT(result, ("vm_map_unwire: lookup failed"));
1595         }
1596         entry = first_entry;
1597         while (entry != &map->header && entry->start < end) {
1598                 if (rv == KERN_SUCCESS) {
1599                         if (user_unwire)
1600                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1601                         entry->wired_count--;
1602                         if (entry->wired_count == 0) {
1603                                 /*
1604                                  * Retain the map lock.
1605                                  */
1606                                 vm_fault_unwire(map, entry->start, entry->end);
1607                         }
1608                 }
1609                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1610                         ("vm_map_unwire: in-transition flag missing"));
1611                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1612                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1613                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1614                         need_wakeup = TRUE;
1615                 }
1616                 vm_map_simplify_entry(map, entry);
1617                 entry = entry->next;
1618         }
1619         vm_map_unlock(map);
1620         if (need_wakeup)
1621                 vm_map_wakeup(map);
1622         return (rv);
1623 }
1624
1625 /*
1626  *      vm_map_wire:
1627  *
1628  *      Implements both kernel and user wiring.
1629  */
1630 int
1631 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1632         boolean_t user_wire)
1633 {
1634         vm_map_entry_t entry, first_entry, tmp_entry;
1635         vm_offset_t saved_end, saved_start;
1636         unsigned int last_timestamp;
1637         int rv;
1638         boolean_t need_wakeup, result;
1639
1640         vm_map_lock(map);
1641         VM_MAP_RANGE_CHECK(map, start, end);
1642         if (!vm_map_lookup_entry(map, start, &first_entry)) {
1643                 vm_map_unlock(map);
1644                 return (KERN_INVALID_ADDRESS);
1645         }
1646         last_timestamp = map->timestamp;
1647         entry = first_entry;
1648         while (entry != &map->header && entry->start < end) {
1649                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1650                         /*
1651                          * We have not yet clipped the entry.
1652                          */
1653                         saved_start = (start >= entry->start) ? start :
1654                             entry->start;
1655                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1656                         if (vm_map_unlock_and_wait(map, user_wire)) {
1657                                 /*
1658                                  * Allow interruption of user wiring?
1659                                  */
1660                         }
1661                         vm_map_lock(map);
1662                         if (last_timestamp + 1 != map->timestamp) {
1663                                 /*
1664                                  * Look again for the entry because the map was
1665                                  * modified while it was unlocked.
1666                                  * Specifically, the entry may have been
1667                                  * clipped, merged, or deleted.
1668                                  */
1669                                 if (!vm_map_lookup_entry(map, saved_start,
1670                                     &tmp_entry)) {
1671                                         if (saved_start == start) {
1672                                                 /*
1673                                                  * first_entry has been deleted.
1674                                                  */
1675                                                 vm_map_unlock(map);
1676                                                 return (KERN_INVALID_ADDRESS);
1677                                         }
1678                                         end = saved_start;
1679                                         rv = KERN_INVALID_ADDRESS;
1680                                         goto done;
1681                                 }
1682                                 if (entry == first_entry)
1683                                         first_entry = tmp_entry;
1684                                 else
1685                                         first_entry = NULL;
1686                                 entry = tmp_entry;
1687                         }
1688                         last_timestamp = map->timestamp;
1689                         continue;
1690                 }
1691                 vm_map_clip_start(map, entry, start);
1692                 vm_map_clip_end(map, entry, end);
1693                 /*
1694                  * Mark the entry in case the map lock is released.  (See
1695                  * above.)
1696                  */
1697                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1698                 /*
1699                  *
1700                  */
1701                 if (entry->wired_count == 0) {
1702                         entry->wired_count++;
1703                         saved_start = entry->start;
1704                         saved_end = entry->end;
1705                         /*
1706                          * Release the map lock, relying on the in-transition
1707                          * mark.
1708                          */
1709                         vm_map_unlock(map);
1710                         rv = vm_fault_wire(map, saved_start, saved_end,
1711                             user_wire);
1712                         vm_map_lock(map);
1713                         if (last_timestamp + 1 != map->timestamp) {
1714                                 /*
1715                                  * Look again for the entry because the map was
1716                                  * modified while it was unlocked.  The entry
1717                                  * may have been clipped, but NOT merged or
1718                                  * deleted.
1719                                  */
1720                                 result = vm_map_lookup_entry(map, saved_start,
1721                                     &tmp_entry);
1722                                 KASSERT(result, ("vm_map_wire: lookup failed"));
1723                                 if (entry == first_entry)
1724                                         first_entry = tmp_entry;
1725                                 else
1726                                         first_entry = NULL;
1727                                 entry = tmp_entry;
1728                                 while (entry->end < saved_end) {
1729                                         if (rv != KERN_SUCCESS) {
1730                                                 KASSERT(entry->wired_count == 1,
1731                                                     ("vm_map_wire: bad count"));
1732                                                 entry->wired_count = -1;
1733                                         }
1734                                         entry = entry->next;
1735                                 }
1736                         }
1737                         last_timestamp = map->timestamp;
1738                         if (rv != KERN_SUCCESS) {
1739                                 KASSERT(entry->wired_count == 1,
1740                                     ("vm_map_wire: bad count"));
1741                                 /*
1742                                  * Assign an out-of-range value to represent
1743                                  * the failure to wire this entry.
1744                                  */
1745                                 entry->wired_count = -1;
1746                                 end = entry->end;
1747                                 goto done;
1748                         }
1749                 } else if (!user_wire ||
1750                            (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
1751                         entry->wired_count++;
1752                 }
1753                 /*
1754                  * Check the map for holes in the specified region.
1755                  */
1756                 if (entry->end < end && (entry->next == &map->header ||
1757                     entry->next->start > entry->end)) {
1758                         end = entry->end;
1759                         rv = KERN_INVALID_ADDRESS;
1760                         goto done;
1761                 }
1762                 entry = entry->next;
1763         }
1764         rv = KERN_SUCCESS;
1765 done:
1766         need_wakeup = FALSE;
1767         if (first_entry == NULL) {
1768                 result = vm_map_lookup_entry(map, start, &first_entry);
1769                 KASSERT(result, ("vm_map_wire: lookup failed"));
1770         }
1771         entry = first_entry;
1772         while (entry != &map->header && entry->start < end) {
1773                 if (rv == KERN_SUCCESS) {
1774                         if (user_wire)
1775                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
1776                 } else if (entry->wired_count == -1) {
1777                         /*
1778                          * Wiring failed on this entry.  Thus, unwiring is
1779                          * unnecessary.
1780                          */
1781                         entry->wired_count = 0;
1782                 } else {
1783                         if (!user_wire || (entry->wired_count == 1 &&
1784                             (entry->eflags & MAP_ENTRY_USER_WIRED) == 0))
1785                                 entry->wired_count--;
1786                         if (entry->wired_count == 0) {
1787                                 /*
1788                                  * Retain the map lock.
1789                                  */
1790                                 vm_fault_unwire(map, entry->start, entry->end);
1791                         }
1792                 }
1793                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1794                         ("vm_map_wire: in-transition flag missing"));
1795                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1796                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1797                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1798                         need_wakeup = TRUE;
1799                 }
1800                 vm_map_simplify_entry(map, entry);
1801                 entry = entry->next;
1802         }
1803         vm_map_unlock(map);
1804         if (need_wakeup)
1805                 vm_map_wakeup(map);
1806         return (rv);
1807 }
1808
1809 /*
1810  * vm_map_clean
1811  *
1812  * Push any dirty cached pages in the address range to their pager.
1813  * If syncio is TRUE, dirty pages are written synchronously.
1814  * If invalidate is TRUE, any cached pages are freed as well.
1815  *
1816  * Returns an error if any part of the specified range is not mapped.
1817  */
1818 int
1819 vm_map_clean(
1820         vm_map_t map,
1821         vm_offset_t start,
1822         vm_offset_t end,
1823         boolean_t syncio,
1824         boolean_t invalidate)
1825 {
1826         vm_map_entry_t current;
1827         vm_map_entry_t entry;
1828         vm_size_t size;
1829         vm_object_t object;
1830         vm_ooffset_t offset;
1831
1832         GIANT_REQUIRED;
1833
1834         vm_map_lock_read(map);
1835         VM_MAP_RANGE_CHECK(map, start, end);
1836         if (!vm_map_lookup_entry(map, start, &entry)) {
1837                 vm_map_unlock_read(map);
1838                 return (KERN_INVALID_ADDRESS);
1839         }
1840         /*
1841          * Make a first pass to check for holes.
1842          */
1843         for (current = entry; current->start < end; current = current->next) {
1844                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1845                         vm_map_unlock_read(map);
1846                         return (KERN_INVALID_ARGUMENT);
1847                 }
1848                 if (end > current->end &&
1849                     (current->next == &map->header ||
1850                         current->end != current->next->start)) {
1851                         vm_map_unlock_read(map);
1852                         return (KERN_INVALID_ADDRESS);
1853                 }
1854         }
1855
1856         if (invalidate)
1857                 pmap_remove(vm_map_pmap(map), start, end);
1858         /*
1859          * Make a second pass, cleaning/uncaching pages from the indicated
1860          * objects as we go.
1861          */
1862         for (current = entry; current->start < end; current = current->next) {
1863                 offset = current->offset + (start - current->start);
1864                 size = (end <= current->end ? end : current->end) - start;
1865                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1866                         vm_map_t smap;
1867                         vm_map_entry_t tentry;
1868                         vm_size_t tsize;
1869
1870                         smap = current->object.sub_map;
1871                         vm_map_lock_read(smap);
1872                         (void) vm_map_lookup_entry(smap, offset, &tentry);
1873                         tsize = tentry->end - offset;
1874                         if (tsize < size)
1875                                 size = tsize;
1876                         object = tentry->object.vm_object;
1877                         offset = tentry->offset + (offset - tentry->start);
1878                         vm_map_unlock_read(smap);
1879                 } else {
1880                         object = current->object.vm_object;
1881                 }
1882                 /*
1883                  * Note that there is absolutely no sense in writing out
1884                  * anonymous objects, so we track down the vnode object
1885                  * to write out.
1886                  * We invalidate (remove) all pages from the address space
1887                  * anyway, for semantic correctness.
1888                  *
1889                  * note: certain anonymous maps, such as MAP_NOSYNC maps,
1890                  * may start out with a NULL object.
1891                  */
1892                 while (object && object->backing_object) {
1893                         object = object->backing_object;
1894                         offset += object->backing_object_offset;
1895                         if (object->size < OFF_TO_IDX(offset + size))
1896                                 size = IDX_TO_OFF(object->size) - offset;
1897                 }
1898                 if (object && (object->type == OBJT_VNODE) && 
1899                     (current->protection & VM_PROT_WRITE)) {
1900                         /*
1901                          * Flush pages if writing is allowed, invalidate them
1902                          * if invalidation requested.  Pages undergoing I/O
1903                          * will be ignored by vm_object_page_remove().
1904                          *
1905                          * We cannot lock the vnode and then wait for paging
1906                          * to complete without deadlocking against vm_fault.
1907                          * Instead we simply call vm_object_page_remove() and
1908                          * allow it to block internally on a page-by-page 
1909                          * basis when it encounters pages undergoing async 
1910                          * I/O.
1911                          */
1912                         int flags;
1913
1914                         vm_object_reference(object);
1915                         vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread);
1916                         flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1917                         flags |= invalidate ? OBJPC_INVAL : 0;
1918                         vm_object_page_clean(object,
1919                             OFF_TO_IDX(offset),
1920                             OFF_TO_IDX(offset + size + PAGE_MASK),
1921                             flags);
1922                         if (invalidate) {
1923                                 /*vm_object_pip_wait(object, "objmcl");*/
1924                                 vm_object_page_remove(object,
1925                                     OFF_TO_IDX(offset),
1926                                     OFF_TO_IDX(offset + size + PAGE_MASK),
1927                                     FALSE);
1928                         }
1929                         VOP_UNLOCK(object->handle, 0, curthread);
1930                         vm_object_deallocate(object);
1931                 }
1932                 start += size;
1933         }
1934
1935         vm_map_unlock_read(map);
1936         return (KERN_SUCCESS);
1937 }
1938
1939 /*
1940  *      vm_map_entry_unwire:    [ internal use only ]
1941  *
1942  *      Make the region specified by this entry pageable.
1943  *
1944  *      The map in question should be locked.
1945  *      [This is the reason for this routine's existence.]
1946  */
1947 static void 
1948 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
1949 {
1950         vm_fault_unwire(map, entry->start, entry->end);
1951         entry->wired_count = 0;
1952 }
1953
1954 /*
1955  *      vm_map_entry_delete:    [ internal use only ]
1956  *
1957  *      Deallocate the given entry from the target map.
1958  */
1959 static void
1960 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
1961 {
1962         vm_map_entry_unlink(map, entry);
1963         map->size -= entry->end - entry->start;
1964
1965         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1966                 vm_object_deallocate(entry->object.vm_object);
1967         }
1968
1969         vm_map_entry_dispose(map, entry);
1970 }
1971
1972 /*
1973  *      vm_map_delete:  [ internal use only ]
1974  *
1975  *      Deallocates the given address range from the target
1976  *      map.
1977  */
1978 int
1979 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
1980 {
1981         vm_object_t object;
1982         vm_map_entry_t entry;
1983         vm_map_entry_t first_entry;
1984
1985         /*
1986          * Find the start of the region, and clip it
1987          */
1988         if (!vm_map_lookup_entry(map, start, &first_entry))
1989                 entry = first_entry->next;
1990         else {
1991                 entry = first_entry;
1992                 vm_map_clip_start(map, entry, start);
1993         }
1994
1995         /*
1996          * Save the free space hint
1997          */
1998         if (entry == &map->header) {
1999                 map->first_free = &map->header;
2000         } else if (map->first_free->start >= start) {
2001                 map->first_free = entry->prev;
2002         }
2003
2004         /*
2005          * Step through all entries in this region
2006          */
2007         while ((entry != &map->header) && (entry->start < end)) {
2008                 vm_map_entry_t next;
2009                 vm_offset_t s, e;
2010                 vm_pindex_t offidxstart, offidxend, count;
2011
2012                 /*
2013                  * Wait for wiring or unwiring of an entry to complete.
2014                  */
2015                 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0) {
2016                         unsigned int last_timestamp;
2017                         vm_offset_t saved_start;
2018                         vm_map_entry_t tmp_entry;
2019
2020                         saved_start = entry->start;
2021                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2022                         last_timestamp = map->timestamp;
2023                         (void) vm_map_unlock_and_wait(map, FALSE);
2024                         vm_map_lock(map);
2025                         if (last_timestamp + 1 != map->timestamp) {
2026                                 /*
2027                                  * Look again for the entry because the map was
2028                                  * modified while it was unlocked.
2029                                  * Specifically, the entry may have been
2030                                  * clipped, merged, or deleted.
2031                                  */
2032                                 if (!vm_map_lookup_entry(map, saved_start,
2033                                                          &tmp_entry))
2034                                         entry = tmp_entry->next;
2035                                 else {
2036                                         entry = tmp_entry;
2037                                         vm_map_clip_start(map, entry,
2038                                                           saved_start);
2039                                 }
2040                         }
2041                         continue;
2042                 }
2043                 vm_map_clip_end(map, entry, end);
2044
2045                 s = entry->start;
2046                 e = entry->end;
2047                 next = entry->next;
2048
2049                 offidxstart = OFF_TO_IDX(entry->offset);
2050                 count = OFF_TO_IDX(e - s);
2051                 object = entry->object.vm_object;
2052
2053                 /*
2054                  * Unwire before removing addresses from the pmap; otherwise,
2055                  * unwiring will put the entries back in the pmap.
2056                  */
2057                 if (entry->wired_count != 0) {
2058                         vm_map_entry_unwire(map, entry);
2059                 }
2060
2061                 offidxend = offidxstart + count;
2062
2063                 if ((object == kernel_object) || (object == kmem_object)) {
2064                         vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2065                 } else {
2066                         mtx_lock(&Giant);
2067                         pmap_remove(map->pmap, s, e);
2068                         if (object != NULL &&
2069                             object->ref_count != 1 &&
2070                             (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
2071                             (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2072                                 vm_object_collapse(object);
2073                                 vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2074                                 if (object->type == OBJT_SWAP) {
2075                                         swap_pager_freespace(object, offidxstart, count);
2076                                 }
2077                                 if (offidxend >= object->size &&
2078                                     offidxstart < object->size) {
2079                                         object->size = offidxstart;
2080                                 }
2081                         }
2082                         mtx_unlock(&Giant);
2083                 }
2084
2085                 /*
2086                  * Delete the entry (which may delete the object) only after
2087                  * removing all pmap entries pointing to its pages.
2088                  * (Otherwise, its page frames may be reallocated, and any
2089                  * modify bits will be set in the wrong object!)
2090                  */
2091                 vm_map_entry_delete(map, entry);
2092                 entry = next;
2093         }
2094         return (KERN_SUCCESS);
2095 }
2096
2097 /*
2098  *      vm_map_remove:
2099  *
2100  *      Remove the given address range from the target map.
2101  *      This is the exported form of vm_map_delete.
2102  */
2103 int
2104 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2105 {
2106         int result, s = 0;
2107
2108         if (map == kmem_map)
2109                 s = splvm();
2110
2111         vm_map_lock(map);
2112         VM_MAP_RANGE_CHECK(map, start, end);
2113         result = vm_map_delete(map, start, end);
2114         vm_map_unlock(map);
2115
2116         if (map == kmem_map)
2117                 splx(s);
2118
2119         return (result);
2120 }
2121
2122 /*
2123  *      vm_map_check_protection:
2124  *
2125  *      Assert that the target map allows the specified
2126  *      privilege on the entire address region given.
2127  *      The entire region must be allocated.
2128  */
2129 boolean_t
2130 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2131                         vm_prot_t protection)
2132 {
2133         vm_map_entry_t entry;
2134         vm_map_entry_t tmp_entry;
2135
2136         vm_map_lock_read(map);
2137         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2138                 vm_map_unlock_read(map);
2139                 return (FALSE);
2140         }
2141         entry = tmp_entry;
2142
2143         while (start < end) {
2144                 if (entry == &map->header) {
2145                         vm_map_unlock_read(map);
2146                         return (FALSE);
2147                 }
2148                 /*
2149                  * No holes allowed!
2150                  */
2151                 if (start < entry->start) {
2152                         vm_map_unlock_read(map);
2153                         return (FALSE);
2154                 }
2155                 /*
2156                  * Check protection associated with entry.
2157                  */
2158                 if ((entry->protection & protection) != protection) {
2159                         vm_map_unlock_read(map);
2160                         return (FALSE);
2161                 }
2162                 /* go to next entry */
2163                 start = entry->end;
2164                 entry = entry->next;
2165         }
2166         vm_map_unlock_read(map);
2167         return (TRUE);
2168 }
2169
2170 /*
2171  *      vm_map_copy_entry:
2172  *
2173  *      Copies the contents of the source entry to the destination
2174  *      entry.  The entries *must* be aligned properly.
2175  */
2176 static void
2177 vm_map_copy_entry(
2178         vm_map_t src_map,
2179         vm_map_t dst_map,
2180         vm_map_entry_t src_entry, 
2181         vm_map_entry_t dst_entry)
2182 {
2183         vm_object_t src_object;
2184
2185         if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2186                 return;
2187
2188         if (src_entry->wired_count == 0) {
2189
2190                 /*
2191                  * If the source entry is marked needs_copy, it is already
2192                  * write-protected.
2193                  */
2194                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2195                         pmap_protect(src_map->pmap,
2196                             src_entry->start,
2197                             src_entry->end,
2198                             src_entry->protection & ~VM_PROT_WRITE);
2199                 }
2200
2201                 /*
2202                  * Make a copy of the object.
2203                  */
2204                 if ((src_object = src_entry->object.vm_object) != NULL) {
2205
2206                         if ((src_object->handle == NULL) &&
2207                                 (src_object->type == OBJT_DEFAULT ||
2208                                  src_object->type == OBJT_SWAP)) {
2209                                 vm_object_collapse(src_object);
2210                                 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2211                                         vm_object_split(src_entry);
2212                                         src_object = src_entry->object.vm_object;
2213                                 }
2214                         }
2215
2216                         vm_object_reference(src_object);
2217                         vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2218                         dst_entry->object.vm_object = src_object;
2219                         src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2220                         dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2221                         dst_entry->offset = src_entry->offset;
2222                 } else {
2223                         dst_entry->object.vm_object = NULL;
2224                         dst_entry->offset = 0;
2225                 }
2226
2227                 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2228                     dst_entry->end - dst_entry->start, src_entry->start);
2229         } else {
2230                 /*
2231                  * Of course, wired down pages can't be set copy-on-write.
2232                  * Cause wired pages to be copied into the new map by
2233                  * simulating faults (the new pages are pageable)
2234                  */
2235                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2236         }
2237 }
2238
2239 /*
2240  * vmspace_fork:
2241  * Create a new process vmspace structure and vm_map
2242  * based on those of an existing process.  The new map
2243  * is based on the old map, according to the inheritance
2244  * values on the regions in that map.
2245  *
2246  * The source map must not be locked.
2247  */
2248 struct vmspace *
2249 vmspace_fork(struct vmspace *vm1)
2250 {
2251         struct vmspace *vm2;
2252         vm_map_t old_map = &vm1->vm_map;
2253         vm_map_t new_map;
2254         vm_map_entry_t old_entry;
2255         vm_map_entry_t new_entry;
2256         vm_object_t object;
2257
2258         GIANT_REQUIRED;
2259
2260         vm_map_lock(old_map);
2261         old_map->infork = 1;
2262
2263         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2264         bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2265             (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
2266         new_map = &vm2->vm_map; /* XXX */
2267         new_map->timestamp = 1;
2268
2269         old_entry = old_map->header.next;
2270
2271         while (old_entry != &old_map->header) {
2272                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2273                         panic("vm_map_fork: encountered a submap");
2274
2275                 switch (old_entry->inheritance) {
2276                 case VM_INHERIT_NONE:
2277                         break;
2278
2279                 case VM_INHERIT_SHARE:
2280                         /*
2281                          * Clone the entry, creating the shared object if necessary.
2282                          */
2283                         object = old_entry->object.vm_object;
2284                         if (object == NULL) {
2285                                 object = vm_object_allocate(OBJT_DEFAULT,
2286                                         atop(old_entry->end - old_entry->start));
2287                                 old_entry->object.vm_object = object;
2288                                 old_entry->offset = (vm_offset_t) 0;
2289                         }
2290
2291                         /*
2292                          * Add the reference before calling vm_object_shadow
2293                          * to insure that a shadow object is created.
2294                          */
2295                         vm_object_reference(object);
2296                         if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2297                                 vm_object_shadow(&old_entry->object.vm_object,
2298                                         &old_entry->offset,
2299                                         atop(old_entry->end - old_entry->start));
2300                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2301                                 /* Transfer the second reference too. */
2302                                 vm_object_reference(
2303                                     old_entry->object.vm_object);
2304                                 vm_object_deallocate(object);
2305                                 object = old_entry->object.vm_object;
2306                         }
2307                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
2308
2309                         /*
2310                          * Clone the entry, referencing the shared object.
2311                          */
2312                         new_entry = vm_map_entry_create(new_map);
2313                         *new_entry = *old_entry;
2314                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2315                         new_entry->wired_count = 0;
2316
2317                         /*
2318                          * Insert the entry into the new map -- we know we're
2319                          * inserting at the end of the new map.
2320                          */
2321                         vm_map_entry_link(new_map, new_map->header.prev,
2322                             new_entry);
2323
2324                         /*
2325                          * Update the physical map
2326                          */
2327                         pmap_copy(new_map->pmap, old_map->pmap,
2328                             new_entry->start,
2329                             (old_entry->end - old_entry->start),
2330                             old_entry->start);
2331                         break;
2332
2333                 case VM_INHERIT_COPY:
2334                         /*
2335                          * Clone the entry and link into the map.
2336                          */
2337                         new_entry = vm_map_entry_create(new_map);
2338                         *new_entry = *old_entry;
2339                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2340                         new_entry->wired_count = 0;
2341                         new_entry->object.vm_object = NULL;
2342                         vm_map_entry_link(new_map, new_map->header.prev,
2343                             new_entry);
2344                         vm_map_copy_entry(old_map, new_map, old_entry,
2345                             new_entry);
2346                         break;
2347                 }
2348                 old_entry = old_entry->next;
2349         }
2350
2351         new_map->size = old_map->size;
2352         old_map->infork = 0;
2353         vm_map_unlock(old_map);
2354
2355         return (vm2);
2356 }
2357
2358 int
2359 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2360               vm_prot_t prot, vm_prot_t max, int cow)
2361 {
2362         vm_map_entry_t prev_entry;
2363         vm_map_entry_t new_stack_entry;
2364         vm_size_t      init_ssize;
2365         int            rv;
2366
2367         if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
2368                 return (KERN_NO_SPACE);
2369
2370         if (max_ssize < sgrowsiz)
2371                 init_ssize = max_ssize;
2372         else
2373                 init_ssize = sgrowsiz;
2374
2375         vm_map_lock(map);
2376
2377         /* If addr is already mapped, no go */
2378         if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2379                 vm_map_unlock(map);
2380                 return (KERN_NO_SPACE);
2381         }
2382
2383         /* If we would blow our VMEM resource limit, no go */
2384         if (map->size + init_ssize >
2385             curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2386                 vm_map_unlock(map);
2387                 return (KERN_NO_SPACE);
2388         }
2389
2390         /* If we can't accomodate max_ssize in the current mapping,
2391          * no go.  However, we need to be aware that subsequent user
2392          * mappings might map into the space we have reserved for
2393          * stack, and currently this space is not protected.  
2394          * 
2395          * Hopefully we will at least detect this condition 
2396          * when we try to grow the stack.
2397          */
2398         if ((prev_entry->next != &map->header) &&
2399             (prev_entry->next->start < addrbos + max_ssize)) {
2400                 vm_map_unlock(map);
2401                 return (KERN_NO_SPACE);
2402         }
2403
2404         /* We initially map a stack of only init_ssize.  We will
2405          * grow as needed later.  Since this is to be a grow 
2406          * down stack, we map at the top of the range.
2407          *
2408          * Note: we would normally expect prot and max to be
2409          * VM_PROT_ALL, and cow to be 0.  Possibly we should
2410          * eliminate these as input parameters, and just
2411          * pass these values here in the insert call.
2412          */
2413         rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2414                            addrbos + max_ssize, prot, max, cow);
2415
2416         /* Now set the avail_ssize amount */
2417         if (rv == KERN_SUCCESS){
2418                 if (prev_entry != &map->header)
2419                         vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2420                 new_stack_entry = prev_entry->next;
2421                 if (new_stack_entry->end   != addrbos + max_ssize ||
2422                     new_stack_entry->start != addrbos + max_ssize - init_ssize)
2423                         panic ("Bad entry start/end for new stack entry");
2424                 else 
2425                         new_stack_entry->avail_ssize = max_ssize - init_ssize;
2426         }
2427
2428         vm_map_unlock(map);
2429         return (rv);
2430 }
2431
2432 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2433  * desired address is already mapped, or if we successfully grow
2434  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2435  * stack range (this is strange, but preserves compatibility with
2436  * the grow function in vm_machdep.c).
2437  */
2438 int
2439 vm_map_growstack (struct proc *p, vm_offset_t addr)
2440 {
2441         vm_map_entry_t prev_entry;
2442         vm_map_entry_t stack_entry;
2443         vm_map_entry_t new_stack_entry;
2444         struct vmspace *vm = p->p_vmspace;
2445         vm_map_t map = &vm->vm_map;
2446         vm_offset_t    end;
2447         int      grow_amount;
2448         int      rv;
2449         int      is_procstack;
2450
2451         GIANT_REQUIRED;
2452         
2453 Retry:
2454         vm_map_lock_read(map);
2455
2456         /* If addr is already in the entry range, no need to grow.*/
2457         if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2458                 vm_map_unlock_read(map);
2459                 return (KERN_SUCCESS);
2460         }
2461
2462         if ((stack_entry = prev_entry->next) == &map->header) {
2463                 vm_map_unlock_read(map);
2464                 return (KERN_SUCCESS);
2465         } 
2466         if (prev_entry == &map->header) 
2467                 end = stack_entry->start - stack_entry->avail_ssize;
2468         else
2469                 end = prev_entry->end;
2470
2471         /* This next test mimics the old grow function in vm_machdep.c.
2472          * It really doesn't quite make sense, but we do it anyway
2473          * for compatibility.
2474          *
2475          * If not growable stack, return success.  This signals the
2476          * caller to proceed as he would normally with normal vm.
2477          */
2478         if (stack_entry->avail_ssize < 1 ||
2479             addr >= stack_entry->start ||
2480             addr <  stack_entry->start - stack_entry->avail_ssize) {
2481                 vm_map_unlock_read(map);
2482                 return (KERN_SUCCESS);
2483         } 
2484         
2485         /* Find the minimum grow amount */
2486         grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2487         if (grow_amount > stack_entry->avail_ssize) {
2488                 vm_map_unlock_read(map);
2489                 return (KERN_NO_SPACE);
2490         }
2491
2492         /* If there is no longer enough space between the entries
2493          * nogo, and adjust the available space.  Note: this 
2494          * should only happen if the user has mapped into the
2495          * stack area after the stack was created, and is
2496          * probably an error.
2497          *
2498          * This also effectively destroys any guard page the user
2499          * might have intended by limiting the stack size.
2500          */
2501         if (grow_amount > stack_entry->start - end) {
2502                 if (vm_map_lock_upgrade(map))
2503                         goto Retry;
2504
2505                 stack_entry->avail_ssize = stack_entry->start - end;
2506
2507                 vm_map_unlock(map);
2508                 return (KERN_NO_SPACE);
2509         }
2510
2511         is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2512
2513         /* If this is the main process stack, see if we're over the 
2514          * stack limit.
2515          */
2516         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2517                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2518                 vm_map_unlock_read(map);
2519                 return (KERN_NO_SPACE);
2520         }
2521
2522         /* Round up the grow amount modulo SGROWSIZ */
2523         grow_amount = roundup (grow_amount, sgrowsiz);
2524         if (grow_amount > stack_entry->avail_ssize) {
2525                 grow_amount = stack_entry->avail_ssize;
2526         }
2527         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2528                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2529                 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2530                               ctob(vm->vm_ssize);
2531         }
2532
2533         /* If we would blow our VMEM resource limit, no go */
2534         if (map->size + grow_amount >
2535             curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2536                 vm_map_unlock_read(map);
2537                 return (KERN_NO_SPACE);
2538         }
2539
2540         if (vm_map_lock_upgrade(map))
2541                 goto Retry;
2542
2543         /* Get the preliminary new entry start value */
2544         addr = stack_entry->start - grow_amount;
2545
2546         /* If this puts us into the previous entry, cut back our growth
2547          * to the available space.  Also, see the note above.
2548          */
2549         if (addr < end) {
2550                 stack_entry->avail_ssize = stack_entry->start - end;
2551                 addr = end;
2552         }
2553
2554         rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2555                            VM_PROT_ALL,
2556                            VM_PROT_ALL,
2557                            0);
2558
2559         /* Adjust the available stack space by the amount we grew. */
2560         if (rv == KERN_SUCCESS) {
2561                 if (prev_entry != &map->header)
2562                         vm_map_clip_end(map, prev_entry, addr);
2563                 new_stack_entry = prev_entry->next;
2564                 if (new_stack_entry->end   != stack_entry->start  ||
2565                     new_stack_entry->start != addr)
2566                         panic ("Bad stack grow start/end in new stack entry");
2567                 else {
2568                         new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2569                                                         (new_stack_entry->end -
2570                                                          new_stack_entry->start);
2571                         if (is_procstack)
2572                                 vm->vm_ssize += btoc(new_stack_entry->end -
2573                                                      new_stack_entry->start);
2574                 }
2575         }
2576
2577         vm_map_unlock(map);
2578         return (rv);
2579 }
2580
2581 /*
2582  * Unshare the specified VM space for exec.  If other processes are
2583  * mapped to it, then create a new one.  The new vmspace is null.
2584  */
2585 void
2586 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
2587 {
2588         struct vmspace *oldvmspace = p->p_vmspace;
2589         struct vmspace *newvmspace;
2590
2591         GIANT_REQUIRED;
2592         newvmspace = vmspace_alloc(minuser, maxuser);
2593         bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2594             (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2595         /*
2596          * This code is written like this for prototype purposes.  The
2597          * goal is to avoid running down the vmspace here, but let the
2598          * other process's that are still using the vmspace to finally
2599          * run it down.  Even though there is little or no chance of blocking
2600          * here, it is a good idea to keep this form for future mods.
2601          */
2602         p->p_vmspace = newvmspace;
2603         pmap_pinit2(vmspace_pmap(newvmspace));
2604         vmspace_free(oldvmspace);
2605         if (p == curthread->td_proc)            /* XXXKSE ? */
2606                 pmap_activate(curthread);
2607 }
2608
2609 /*
2610  * Unshare the specified VM space for forcing COW.  This
2611  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2612  */
2613 void
2614 vmspace_unshare(struct proc *p)
2615 {
2616         struct vmspace *oldvmspace = p->p_vmspace;
2617         struct vmspace *newvmspace;
2618
2619         GIANT_REQUIRED;
2620         if (oldvmspace->vm_refcnt == 1)
2621                 return;
2622         newvmspace = vmspace_fork(oldvmspace);
2623         p->p_vmspace = newvmspace;
2624         pmap_pinit2(vmspace_pmap(newvmspace));
2625         vmspace_free(oldvmspace);
2626         if (p == curthread->td_proc)            /* XXXKSE ? */
2627                 pmap_activate(curthread);
2628 }
2629
2630 /*
2631  *      vm_map_lookup:
2632  *
2633  *      Finds the VM object, offset, and
2634  *      protection for a given virtual address in the
2635  *      specified map, assuming a page fault of the
2636  *      type specified.
2637  *
2638  *      Leaves the map in question locked for read; return
2639  *      values are guaranteed until a vm_map_lookup_done
2640  *      call is performed.  Note that the map argument
2641  *      is in/out; the returned map must be used in
2642  *      the call to vm_map_lookup_done.
2643  *
2644  *      A handle (out_entry) is returned for use in
2645  *      vm_map_lookup_done, to make that fast.
2646  *
2647  *      If a lookup is requested with "write protection"
2648  *      specified, the map may be changed to perform virtual
2649  *      copying operations, although the data referenced will
2650  *      remain the same.
2651  */
2652 int
2653 vm_map_lookup(vm_map_t *var_map,                /* IN/OUT */
2654               vm_offset_t vaddr,
2655               vm_prot_t fault_typea,
2656               vm_map_entry_t *out_entry,        /* OUT */
2657               vm_object_t *object,              /* OUT */
2658               vm_pindex_t *pindex,              /* OUT */
2659               vm_prot_t *out_prot,              /* OUT */
2660               boolean_t *wired)                 /* OUT */
2661 {
2662         vm_map_entry_t entry;
2663         vm_map_t map = *var_map;
2664         vm_prot_t prot;
2665         vm_prot_t fault_type = fault_typea;
2666
2667 RetryLookup:;
2668         /*
2669          * Lookup the faulting address.
2670          */
2671
2672         vm_map_lock_read(map);
2673 #define RETURN(why) \
2674                 { \
2675                 vm_map_unlock_read(map); \
2676                 return (why); \
2677                 }
2678
2679         /*
2680          * If the map has an interesting hint, try it before calling full
2681          * blown lookup routine.
2682          */
2683         entry = map->root;
2684         *out_entry = entry;
2685         if (entry == NULL ||
2686             (vaddr < entry->start) || (vaddr >= entry->end)) {
2687                 /*
2688                  * Entry was either not a valid hint, or the vaddr was not
2689                  * contained in the entry, so do a full lookup.
2690                  */
2691                 if (!vm_map_lookup_entry(map, vaddr, out_entry))
2692                         RETURN(KERN_INVALID_ADDRESS);
2693
2694                 entry = *out_entry;
2695         }
2696         
2697         /*
2698          * Handle submaps.
2699          */
2700         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2701                 vm_map_t old_map = map;
2702
2703                 *var_map = map = entry->object.sub_map;
2704                 vm_map_unlock_read(old_map);
2705                 goto RetryLookup;
2706         }
2707
2708         /*
2709          * Check whether this task is allowed to have this page.
2710          * Note the special case for MAP_ENTRY_COW
2711          * pages with an override.  This is to implement a forced
2712          * COW for debuggers.
2713          */
2714         if (fault_type & VM_PROT_OVERRIDE_WRITE)
2715                 prot = entry->max_protection;
2716         else
2717                 prot = entry->protection;
2718         fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2719         if ((fault_type & prot) != fault_type) {
2720                         RETURN(KERN_PROTECTION_FAILURE);
2721         }
2722         if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2723             (entry->eflags & MAP_ENTRY_COW) &&
2724             (fault_type & VM_PROT_WRITE) &&
2725             (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2726                 RETURN(KERN_PROTECTION_FAILURE);
2727         }
2728
2729         /*
2730          * If this page is not pageable, we have to get it for all possible
2731          * accesses.
2732          */
2733         *wired = (entry->wired_count != 0);
2734         if (*wired)
2735                 prot = fault_type = entry->protection;
2736
2737         /*
2738          * If the entry was copy-on-write, we either ...
2739          */
2740         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2741                 /*
2742                  * If we want to write the page, we may as well handle that
2743                  * now since we've got the map locked.
2744                  *
2745                  * If we don't need to write the page, we just demote the
2746                  * permissions allowed.
2747                  */
2748                 if (fault_type & VM_PROT_WRITE) {
2749                         /*
2750                          * Make a new object, and place it in the object
2751                          * chain.  Note that no new references have appeared
2752                          * -- one just moved from the map to the new
2753                          * object.
2754                          */
2755                         if (vm_map_lock_upgrade(map))
2756                                 goto RetryLookup;
2757
2758                         vm_object_shadow(
2759                             &entry->object.vm_object,
2760                             &entry->offset,
2761                             atop(entry->end - entry->start));
2762                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2763
2764                         vm_map_lock_downgrade(map);
2765                 } else {
2766                         /*
2767                          * We're attempting to read a copy-on-write page --
2768                          * don't allow writes.
2769                          */
2770                         prot &= ~VM_PROT_WRITE;
2771                 }
2772         }
2773
2774         /*
2775          * Create an object if necessary.
2776          */
2777         if (entry->object.vm_object == NULL &&
2778             !map->system_map) {
2779                 if (vm_map_lock_upgrade(map)) 
2780                         goto RetryLookup;
2781                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2782                     atop(entry->end - entry->start));
2783                 entry->offset = 0;
2784                 vm_map_lock_downgrade(map);
2785         }
2786
2787         /*
2788          * Return the object/offset from this entry.  If the entry was
2789          * copy-on-write or empty, it has been fixed up.
2790          */
2791         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2792         *object = entry->object.vm_object;
2793
2794         /*
2795          * Return whether this is the only map sharing this data.
2796          */
2797         *out_prot = prot;
2798         return (KERN_SUCCESS);
2799
2800 #undef  RETURN
2801 }
2802
2803 /*
2804  *      vm_map_lookup_done:
2805  *
2806  *      Releases locks acquired by a vm_map_lookup
2807  *      (according to the handle returned by that lookup).
2808  */
2809 void
2810 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
2811 {
2812         /*
2813          * Unlock the main-level map
2814          */
2815         vm_map_unlock_read(map);
2816 }
2817
2818 #ifdef ENABLE_VFS_IOOPT
2819 /*
2820  * Experimental support for zero-copy I/O
2821  *
2822  * Implement uiomove with VM operations.  This handles (and collateral changes)
2823  * support every combination of source object modification, and COW type
2824  * operations.
2825  */
2826 int
2827 vm_uiomove(
2828         vm_map_t mapa,
2829         vm_object_t srcobject,
2830         off_t cp,
2831         int cnta,
2832         vm_offset_t uaddra,
2833         int *npages)
2834 {
2835         vm_map_t map;
2836         vm_object_t first_object, oldobject, object;
2837         vm_map_entry_t entry;
2838         vm_prot_t prot;
2839         boolean_t wired;
2840         int tcnt, rv;
2841         vm_offset_t uaddr, start, end, tend;
2842         vm_pindex_t first_pindex, oindex;
2843         vm_size_t osize;
2844         off_t ooffset;
2845         int cnt;
2846
2847         GIANT_REQUIRED;
2848
2849         if (npages)
2850                 *npages = 0;
2851
2852         cnt = cnta;
2853         uaddr = uaddra;
2854
2855         while (cnt > 0) {
2856                 map = mapa;
2857
2858                 if ((vm_map_lookup(&map, uaddr,
2859                         VM_PROT_READ, &entry, &first_object,
2860                         &first_pindex, &prot, &wired)) != KERN_SUCCESS) {
2861                         return EFAULT;
2862                 }
2863
2864                 vm_map_clip_start(map, entry, uaddr);
2865
2866                 tcnt = cnt;
2867                 tend = uaddr + tcnt;
2868                 if (tend > entry->end) {
2869                         tcnt = entry->end - uaddr;
2870                         tend = entry->end;
2871                 }
2872
2873                 vm_map_clip_end(map, entry, tend);
2874
2875                 start = entry->start;
2876                 end = entry->end;
2877
2878                 osize = atop(tcnt);
2879
2880                 oindex = OFF_TO_IDX(cp);
2881                 if (npages) {
2882                         vm_size_t idx;
2883                         for (idx = 0; idx < osize; idx++) {
2884                                 vm_page_t m;
2885                                 if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
2886                                         vm_map_lookup_done(map, entry);
2887                                         return 0;
2888                                 }
2889                                 /*
2890                                  * disallow busy or invalid pages, but allow
2891                                  * m->busy pages if they are entirely valid.
2892                                  */
2893                                 if ((m->flags & PG_BUSY) ||
2894                                         ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
2895                                         vm_map_lookup_done(map, entry);
2896                                         return 0;
2897                                 }
2898                         }
2899                 }
2900
2901 /*
2902  * If we are changing an existing map entry, just redirect
2903  * the object, and change mappings.
2904  */
2905                 if ((first_object->type == OBJT_VNODE) &&
2906                         ((oldobject = entry->object.vm_object) == first_object)) {
2907
2908                         if ((entry->offset != cp) || (oldobject != srcobject)) {
2909                                 /*
2910                                 * Remove old window into the file
2911                                 */
2912                                 pmap_remove (map->pmap, uaddr, tend);
2913
2914                                 /*
2915                                 * Force copy on write for mmaped regions
2916                                 */
2917                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2918
2919                                 /*
2920                                 * Point the object appropriately
2921                                 */
2922                                 if (oldobject != srcobject) {
2923
2924                                 /*
2925                                 * Set the object optimization hint flag
2926                                 */
2927                                         vm_object_set_flag(srcobject, OBJ_OPT);
2928                                         vm_object_reference(srcobject);
2929                                         entry->object.vm_object = srcobject;
2930
2931                                         if (oldobject) {
2932                                                 vm_object_deallocate(oldobject);
2933                                         }
2934                                 }
2935
2936                                 entry->offset = cp;
2937                                 map->timestamp++;
2938                         } else {
2939                                 pmap_remove (map->pmap, uaddr, tend);
2940                         }
2941
2942                 } else if ((first_object->ref_count == 1) &&
2943                         (first_object->size == osize) &&
2944                         ((first_object->type == OBJT_DEFAULT) ||
2945                                 (first_object->type == OBJT_SWAP)) ) {
2946
2947                         oldobject = first_object->backing_object;
2948
2949                         if ((first_object->backing_object_offset != cp) ||
2950                                 (oldobject != srcobject)) {
2951                                 /*
2952                                 * Remove old window into the file
2953                                 */
2954                                 pmap_remove (map->pmap, uaddr, tend);
2955
2956                                 /*
2957                                  * Remove unneeded old pages
2958                                  */
2959                                 vm_object_page_remove(first_object, 0, 0, 0);
2960
2961                                 /*
2962                                  * Invalidate swap space
2963                                  */
2964                                 if (first_object->type == OBJT_SWAP) {
2965                                         swap_pager_freespace(first_object,
2966                                                 0,
2967                                                 first_object->size);
2968                                 }
2969
2970                                 /*
2971                                  * Force copy on write for mmaped regions
2972                                  */
2973                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2974
2975                                 /*
2976                                  * Point the object appropriately
2977                                  */
2978                                 if (oldobject != srcobject) {
2979                                         /*
2980                                          * Set the object optimization hint flag
2981                                          */
2982                                         vm_object_set_flag(srcobject, OBJ_OPT);
2983                                         vm_object_reference(srcobject);
2984
2985                                         if (oldobject) {
2986                                                 TAILQ_REMOVE(&oldobject->shadow_head,
2987                                                         first_object, shadow_list);
2988                                                 oldobject->shadow_count--;
2989                                                 /* XXX bump generation? */
2990                                                 vm_object_deallocate(oldobject);
2991                                         }
2992
2993                                         TAILQ_INSERT_TAIL(&srcobject->shadow_head,
2994                                                 first_object, shadow_list);
2995                                         srcobject->shadow_count++;
2996                                         /* XXX bump generation? */
2997
2998                                         first_object->backing_object = srcobject;
2999                                 }
3000                                 first_object->backing_object_offset = cp;
3001                                 map->timestamp++;
3002                         } else {
3003                                 pmap_remove (map->pmap, uaddr, tend);
3004                         }
3005 /*
3006  * Otherwise, we have to do a logical mmap.
3007  */
3008                 } else {
3009
3010                         vm_object_set_flag(srcobject, OBJ_OPT);
3011                         vm_object_reference(srcobject);
3012
3013                         pmap_remove (map->pmap, uaddr, tend);
3014
3015                         vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3016                         vm_map_lock_upgrade(map);
3017
3018                         if (entry == &map->header) {
3019                                 map->first_free = &map->header;
3020                         } else if (map->first_free->start >= start) {
3021                                 map->first_free = entry->prev;
3022                         }
3023
3024                         vm_map_entry_delete(map, entry);
3025
3026                         object = srcobject;
3027                         ooffset = cp;
3028
3029                         rv = vm_map_insert(map, object, ooffset, start, tend,
3030                                 VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
3031
3032                         if (rv != KERN_SUCCESS)
3033                                 panic("vm_uiomove: could not insert new entry: %d", rv);
3034                 }
3035
3036 /*
3037  * Map the window directly, if it is already in memory
3038  */
3039                 pmap_object_init_pt(map->pmap, uaddr,
3040                         srcobject, oindex, tcnt, 0);
3041
3042                 map->timestamp++;
3043                 vm_map_unlock(map);
3044
3045                 cnt -= tcnt;
3046                 uaddr += tcnt;
3047                 cp += tcnt;
3048                 if (npages)
3049                         *npages += osize;
3050         }
3051         return 0;
3052 }
3053 #endif
3054
3055 #include "opt_ddb.h"
3056 #ifdef DDB
3057 #include <sys/kernel.h>
3058
3059 #include <ddb/ddb.h>
3060
3061 /*
3062  *      vm_map_print:   [ debug ]
3063  */
3064 DB_SHOW_COMMAND(map, vm_map_print)
3065 {
3066         static int nlines;
3067         /* XXX convert args. */
3068         vm_map_t map = (vm_map_t)addr;
3069         boolean_t full = have_addr;
3070
3071         vm_map_entry_t entry;
3072
3073         db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3074             (void *)map,
3075             (void *)map->pmap, map->nentries, map->timestamp);
3076         nlines++;
3077
3078         if (!full && db_indent)
3079                 return;
3080
3081         db_indent += 2;
3082         for (entry = map->header.next; entry != &map->header;
3083             entry = entry->next) {
3084                 db_iprintf("map entry %p: start=%p, end=%p\n",
3085                     (void *)entry, (void *)entry->start, (void *)entry->end);
3086                 nlines++;
3087                 {
3088                         static char *inheritance_name[4] =
3089                         {"share", "copy", "none", "donate_copy"};
3090
3091                         db_iprintf(" prot=%x/%x/%s",
3092                             entry->protection,
3093                             entry->max_protection,
3094                             inheritance_name[(int)(unsigned char)entry->inheritance]);
3095                         if (entry->wired_count != 0)
3096                                 db_printf(", wired");
3097                 }
3098                 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3099                         /* XXX no %qd in kernel.  Truncate entry->offset. */
3100                         db_printf(", share=%p, offset=0x%lx\n",
3101                             (void *)entry->object.sub_map,
3102                             (long)entry->offset);
3103                         nlines++;
3104                         if ((entry->prev == &map->header) ||
3105                             (entry->prev->object.sub_map !=
3106                                 entry->object.sub_map)) {
3107                                 db_indent += 2;
3108                                 vm_map_print((db_expr_t)(intptr_t)
3109                                              entry->object.sub_map,
3110                                              full, 0, (char *)0);
3111                                 db_indent -= 2;
3112                         }
3113                 } else {
3114                         /* XXX no %qd in kernel.  Truncate entry->offset. */
3115                         db_printf(", object=%p, offset=0x%lx",
3116                             (void *)entry->object.vm_object,
3117                             (long)entry->offset);
3118                         if (entry->eflags & MAP_ENTRY_COW)
3119                                 db_printf(", copy (%s)",
3120                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3121                         db_printf("\n");
3122                         nlines++;
3123
3124                         if ((entry->prev == &map->header) ||
3125                             (entry->prev->object.vm_object !=
3126                                 entry->object.vm_object)) {
3127                                 db_indent += 2;
3128                                 vm_object_print((db_expr_t)(intptr_t)
3129                                                 entry->object.vm_object,
3130                                                 full, 0, (char *)0);
3131                                 nlines += 4;
3132                                 db_indent -= 2;
3133                         }
3134                 }
3135         }
3136         db_indent -= 2;
3137         if (db_indent == 0)
3138                 nlines = 0;
3139 }
3140
3141
3142 DB_SHOW_COMMAND(procvm, procvm)
3143 {
3144         struct proc *p;
3145
3146         if (have_addr) {
3147                 p = (struct proc *) addr;
3148         } else {
3149                 p = curproc;
3150         }
3151
3152         db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3153             (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3154             (void *)vmspace_pmap(p->p_vmspace));
3155
3156         vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3157 }
3158
3159 #endif /* DDB */