]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/vm/vm_map.c
This commit was generated by cvs2svn to compensate for changes in r100513,
[FreeBSD/FreeBSD.git] / sys / vm / vm_map.c
1 /*
2  * Copyright (c) 1991, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66
67 /*
68  *      Virtual memory mapping module.
69  */
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/ktr.h>
74 #include <sys/lock.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/vmmeter.h>
78 #include <sys/mman.h>
79 #include <sys/vnode.h>
80 #include <sys/resourcevar.h>
81
82 #include <vm/vm.h>
83 #include <vm/vm_param.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_pager.h>
89 #include <vm/vm_kern.h>
90 #include <vm/vm_extern.h>
91 #include <vm/swap_pager.h>
92 #include <vm/uma.h>
93
94 /*
95  *      Virtual memory maps provide for the mapping, protection,
96  *      and sharing of virtual memory objects.  In addition,
97  *      this module provides for an efficient virtual copy of
98  *      memory from one map to another.
99  *
100  *      Synchronization is required prior to most operations.
101  *
102  *      Maps consist of an ordered doubly-linked list of simple
103  *      entries; a single hint is used to speed up lookups.
104  *
105  *      Since portions of maps are specified by start/end addresses,
106  *      which may not align with existing map entries, all
107  *      routines merely "clip" entries to these start/end values.
108  *      [That is, an entry is split into two, bordering at a
109  *      start or end value.]  Note that these clippings may not
110  *      always be necessary (as the two resulting entries are then
111  *      not changed); however, the clipping is done for convenience.
112  *
113  *      As mentioned above, virtual copy operations are performed
114  *      by copying VM object references from one map to
115  *      another, and then marking both regions as copy-on-write.
116  */
117
118 /*
119  *      vm_map_startup:
120  *
121  *      Initialize the vm_map module.  Must be called before
122  *      any other vm_map routines.
123  *
124  *      Map and entry structures are allocated from the general
125  *      purpose memory pool with some exceptions:
126  *
127  *      - The kernel map and kmem submap are allocated statically.
128  *      - Kernel map entries are allocated out of a static pool.
129  *
130  *      These restrictions are necessary since malloc() uses the
131  *      maps and requires map entries.
132  */
133
134 static uma_zone_t mapentzone;
135 static uma_zone_t kmapentzone;
136 static uma_zone_t mapzone;
137 static uma_zone_t vmspace_zone;
138 static struct vm_object kmapentobj;
139 static void vmspace_zinit(void *mem, int size);
140 static void vmspace_zfini(void *mem, int size);
141 static void vm_map_zinit(void *mem, int size);
142 static void vm_map_zfini(void *mem, int size);
143 static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max);
144
145 #ifdef INVARIANTS
146 static void vm_map_zdtor(void *mem, int size, void *arg);
147 static void vmspace_zdtor(void *mem, int size, void *arg);
148 #endif
149
150 void
151 vm_map_startup(void)
152 {
153         mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
154 #ifdef INVARIANTS
155             vm_map_zdtor,
156 #else
157             NULL,
158 #endif
159             vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
160         uma_prealloc(mapzone, MAX_KMAP);
161         kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry), 
162             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
163             UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
164         uma_prealloc(kmapentzone, MAX_KMAPENT);
165         mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), 
166             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
167         uma_prealloc(mapentzone, MAX_MAPENT);
168 }
169
170 static void
171 vmspace_zfini(void *mem, int size)
172 {
173         struct vmspace *vm;
174
175         vm = (struct vmspace *)mem;
176
177         vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map));
178 }
179
180 static void
181 vmspace_zinit(void *mem, int size)
182 {
183         struct vmspace *vm;
184
185         vm = (struct vmspace *)mem;
186
187         vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map));
188 }
189
190 static void
191 vm_map_zfini(void *mem, int size)
192 {
193         vm_map_t map;
194
195         map = (vm_map_t)mem;
196
197         lockdestroy(&map->lock);
198 }
199
200 static void
201 vm_map_zinit(void *mem, int size)
202 {
203         vm_map_t map;
204
205         map = (vm_map_t)mem;
206         map->nentries = 0;
207         map->size = 0;
208         map->infork = 0;
209         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
210 }
211
212 #ifdef INVARIANTS
213 static void
214 vmspace_zdtor(void *mem, int size, void *arg)
215 {
216         struct vmspace *vm;
217
218         vm = (struct vmspace *)mem;
219
220         vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
221 }
222 static void
223 vm_map_zdtor(void *mem, int size, void *arg)
224 {
225         vm_map_t map;
226
227         map = (vm_map_t)mem;
228         KASSERT(map->nentries == 0,
229             ("map %p nentries == %d on free.", 
230             map, map->nentries));
231         KASSERT(map->size == 0,
232             ("map %p size == %lu on free.",
233             map, (unsigned long)map->size));
234         KASSERT(map->infork == 0,
235             ("map %p infork == %d on free.",
236             map, map->infork));
237 }
238 #endif  /* INVARIANTS */
239
240 /*
241  * Allocate a vmspace structure, including a vm_map and pmap,
242  * and initialize those structures.  The refcnt is set to 1.
243  * The remaining fields must be initialized by the caller.
244  */
245 struct vmspace *
246 vmspace_alloc(min, max)
247         vm_offset_t min, max;
248 {
249         struct vmspace *vm;
250
251         GIANT_REQUIRED;
252         vm = uma_zalloc(vmspace_zone, M_WAITOK);
253         CTR1(KTR_VM, "vmspace_alloc: %p", vm);
254         _vm_map_init(&vm->vm_map, min, max);
255         pmap_pinit(vmspace_pmap(vm));
256         vm->vm_map.pmap = vmspace_pmap(vm);             /* XXX */
257         vm->vm_refcnt = 1;
258         vm->vm_shm = NULL;
259         vm->vm_freer = NULL;
260         return (vm);
261 }
262
263 void
264 vm_init2(void) 
265 {
266         uma_zone_set_obj(kmapentzone, &kmapentobj, lmin(cnt.v_page_count,
267             (VM_MAX_KERNEL_ADDRESS - KERNBASE) / PAGE_SIZE) / 8);
268         vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
269 #ifdef INVARIANTS
270             vmspace_zdtor,
271 #else
272             NULL,
273 #endif
274             vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
275         pmap_init2();
276         vm_object_init2();
277 }
278
279 static __inline void
280 vmspace_dofree(struct vmspace *vm)
281 {
282         CTR1(KTR_VM, "vmspace_free: %p", vm);
283         /*
284          * Lock the map, to wait out all other references to it.
285          * Delete all of the mappings and pages they hold, then call
286          * the pmap module to reclaim anything left.
287          */
288         vm_map_lock(&vm->vm_map);
289         (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
290             vm->vm_map.max_offset);
291         vm_map_unlock(&vm->vm_map);
292
293         pmap_release(vmspace_pmap(vm));
294         uma_zfree(vmspace_zone, vm);
295 }
296
297 void
298 vmspace_free(struct vmspace *vm)
299 {
300         GIANT_REQUIRED;
301
302         if (vm->vm_refcnt == 0)
303                 panic("vmspace_free: attempt to free already freed vmspace");
304
305         if (--vm->vm_refcnt == 0)
306                 vmspace_dofree(vm);
307 }
308
309 void
310 vmspace_exitfree(struct proc *p)
311 {
312         struct vmspace *vm;
313
314         GIANT_REQUIRED;
315         if (p == p->p_vmspace->vm_freer) {
316                 vm = p->p_vmspace;
317                 p->p_vmspace = NULL;
318                 vmspace_dofree(vm);
319         }
320 }
321
322 /*
323  * vmspace_swap_count() - count the approximate swap useage in pages for a
324  *                        vmspace.
325  *
326  *      Swap useage is determined by taking the proportional swap used by
327  *      VM objects backing the VM map.  To make up for fractional losses,
328  *      if the VM object has any swap use at all the associated map entries
329  *      count for at least 1 swap page.
330  */
331 int
332 vmspace_swap_count(struct vmspace *vmspace)
333 {
334         vm_map_t map = &vmspace->vm_map;
335         vm_map_entry_t cur;
336         int count = 0;
337
338         vm_map_lock_read(map);
339         for (cur = map->header.next; cur != &map->header; cur = cur->next) {
340                 vm_object_t object;
341
342                 if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
343                     (object = cur->object.vm_object) != NULL &&
344                     object->type == OBJT_SWAP
345                 ) {
346                         int n = (cur->end - cur->start) / PAGE_SIZE;
347
348                         if (object->un_pager.swp.swp_bcount) {
349                                 count += object->un_pager.swp.swp_bcount *
350                                     SWAP_META_PAGES * n / object->size + 1;
351                         }
352                 }
353         }
354         vm_map_unlock_read(map);
355         return (count);
356 }
357
358 void
359 _vm_map_lock(vm_map_t map, const char *file, int line)
360 {
361         int error;
362
363         if (map->system_map)
364                 GIANT_REQUIRED;
365         error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
366         KASSERT(error == 0, ("%s: failed to get lock", __func__));
367         map->timestamp++;
368 }
369
370 void
371 _vm_map_unlock(vm_map_t map, const char *file, int line)
372 {
373
374         lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
375 }
376
377 void
378 _vm_map_lock_read(vm_map_t map, const char *file, int line)
379 {
380         int error;
381
382         if (map->system_map)
383                 GIANT_REQUIRED;
384         error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
385         KASSERT(error == 0, ("%s: failed to get lock", __func__));
386 }
387
388 void
389 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
390 {
391
392         lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
393 }
394
395 int
396 _vm_map_trylock(vm_map_t map, const char *file, int line)
397 {
398         int error;
399
400         if (map->system_map)
401                 GIANT_REQUIRED;
402         error = lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, curthread);
403         return (error == 0);
404 }
405
406 int
407 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
408 {
409
410         KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
411                 ("%s: lock not held", __func__));
412         map->timestamp++;
413         return (0);
414 }
415
416 void
417 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
418 {
419
420         KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
421                 ("%s: lock not held", __func__));
422 }
423
424 /*
425  *      vm_map_unlock_and_wait:
426  */
427 int
428 vm_map_unlock_and_wait(vm_map_t map, boolean_t user_wait)
429 {
430         int retval;
431
432         mtx_lock(&Giant);
433         vm_map_unlock(map);
434         retval = tsleep(&map->root, PVM, "vmmapw", 0);
435         mtx_unlock(&Giant);
436         return (retval);
437 }
438
439 /*
440  *      vm_map_wakeup:
441  */
442 void
443 vm_map_wakeup(vm_map_t map)
444 {
445
446         /*
447          * Acquire and release Giant to prevent a wakeup() from being
448          * performed (and lost) between the vm_map_unlock() and the
449          * tsleep() in vm_map_unlock_and_wait().
450          */
451         mtx_lock(&Giant);
452         mtx_unlock(&Giant);
453         wakeup(&map->root);
454 }
455
456 long
457 vmspace_resident_count(struct vmspace *vmspace)
458 {
459         return pmap_resident_count(vmspace_pmap(vmspace));
460 }
461
462 /*
463  *      vm_map_create:
464  *
465  *      Creates and returns a new empty VM map with
466  *      the given physical map structure, and having
467  *      the given lower and upper address bounds.
468  */
469 vm_map_t
470 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
471 {
472         vm_map_t result;
473
474         result = uma_zalloc(mapzone, M_WAITOK);
475         CTR1(KTR_VM, "vm_map_create: %p", result);
476         _vm_map_init(result, min, max);
477         result->pmap = pmap;
478         return (result);
479 }
480
481 /*
482  * Initialize an existing vm_map structure
483  * such as that in the vmspace structure.
484  * The pmap is set elsewhere.
485  */
486 static void
487 _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
488 {
489
490         map->header.next = map->header.prev = &map->header;
491         map->needs_wakeup = FALSE;
492         map->system_map = 0;
493         map->min_offset = min;
494         map->max_offset = max;
495         map->first_free = &map->header;
496         map->root = NULL;
497         map->timestamp = 0;
498 }
499
500 void
501 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
502 {
503         _vm_map_init(map, min, max);
504         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
505 }
506
507 /*
508  *      vm_map_entry_dispose:   [ internal use only ]
509  *
510  *      Inverse of vm_map_entry_create.
511  */
512 static void
513 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
514 {
515         uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
516 }
517
518 /*
519  *      vm_map_entry_create:    [ internal use only ]
520  *
521  *      Allocates a VM map entry for insertion.
522  *      No entry fields are filled in.
523  */
524 static vm_map_entry_t
525 vm_map_entry_create(vm_map_t map)
526 {
527         vm_map_entry_t new_entry;
528
529         if (map->system_map)
530                 new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
531         else
532                 new_entry = uma_zalloc(mapentzone, M_WAITOK);
533         if (new_entry == NULL)
534                 panic("vm_map_entry_create: kernel resources exhausted");
535         return (new_entry);
536 }
537
538 /*
539  *      vm_map_entry_set_behavior:
540  *
541  *      Set the expected access behavior, either normal, random, or
542  *      sequential.
543  */
544 static __inline void
545 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
546 {
547         entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
548             (behavior & MAP_ENTRY_BEHAV_MASK);
549 }
550
551 /*
552  *      vm_map_entry_splay:
553  *
554  *      Implements Sleator and Tarjan's top-down splay algorithm.  Returns
555  *      the vm_map_entry containing the given address.  If, however, that
556  *      address is not found in the vm_map, returns a vm_map_entry that is
557  *      adjacent to the address, coming before or after it.
558  */
559 static vm_map_entry_t
560 vm_map_entry_splay(vm_offset_t address, vm_map_entry_t root)
561 {
562         struct vm_map_entry dummy;
563         vm_map_entry_t lefttreemax, righttreemin, y;
564
565         if (root == NULL)
566                 return (root);
567         lefttreemax = righttreemin = &dummy;
568         for (;; root = y) {
569                 if (address < root->start) {
570                         if ((y = root->left) == NULL)
571                                 break;
572                         if (address < y->start) {
573                                 /* Rotate right. */
574                                 root->left = y->right;
575                                 y->right = root;
576                                 root = y;
577                                 if ((y = root->left) == NULL)
578                                         break;
579                         }
580                         /* Link into the new root's right tree. */
581                         righttreemin->left = root;
582                         righttreemin = root;
583                 } else if (address >= root->end) {
584                         if ((y = root->right) == NULL)
585                                 break;
586                         if (address >= y->end) {
587                                 /* Rotate left. */
588                                 root->right = y->left;
589                                 y->left = root;
590                                 root = y;
591                                 if ((y = root->right) == NULL)
592                                         break;
593                         }
594                         /* Link into the new root's left tree. */
595                         lefttreemax->right = root;
596                         lefttreemax = root;
597                 } else
598                         break;
599         }
600         /* Assemble the new root. */
601         lefttreemax->right = root->left;
602         righttreemin->left = root->right;
603         root->left = dummy.right;
604         root->right = dummy.left;
605         return (root);
606 }
607
608 /*
609  *      vm_map_entry_{un,}link:
610  *
611  *      Insert/remove entries from maps.
612  */
613 static void
614 vm_map_entry_link(vm_map_t map,
615                   vm_map_entry_t after_where,
616                   vm_map_entry_t entry)
617 {
618
619         CTR4(KTR_VM,
620             "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
621             map->nentries, entry, after_where);
622         map->nentries++;
623         entry->prev = after_where;
624         entry->next = after_where->next;
625         entry->next->prev = entry;
626         after_where->next = entry;
627
628         if (after_where != &map->header) {
629                 if (after_where != map->root)
630                         vm_map_entry_splay(after_where->start, map->root);
631                 entry->right = after_where->right;
632                 entry->left = after_where;
633                 after_where->right = NULL;
634         } else {
635                 entry->right = map->root;
636                 entry->left = NULL;
637         }
638         map->root = entry;
639 }
640
641 static void
642 vm_map_entry_unlink(vm_map_t map,
643                     vm_map_entry_t entry)
644 {
645         vm_map_entry_t next, prev, root;
646
647         if (entry != map->root)
648                 vm_map_entry_splay(entry->start, map->root);
649         if (entry->left == NULL)
650                 root = entry->right;
651         else {
652                 root = vm_map_entry_splay(entry->start, entry->left);
653                 root->right = entry->right;
654         }
655         map->root = root;
656
657         prev = entry->prev;
658         next = entry->next;
659         next->prev = prev;
660         prev->next = next;
661         map->nentries--;
662         CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
663             map->nentries, entry);
664 }
665
666 /*
667  *      vm_map_lookup_entry:    [ internal use only ]
668  *
669  *      Finds the map entry containing (or
670  *      immediately preceding) the specified address
671  *      in the given map; the entry is returned
672  *      in the "entry" parameter.  The boolean
673  *      result indicates whether the address is
674  *      actually contained in the map.
675  */
676 boolean_t
677 vm_map_lookup_entry(
678         vm_map_t map,
679         vm_offset_t address,
680         vm_map_entry_t *entry)  /* OUT */
681 {
682         vm_map_entry_t cur;
683
684         cur = vm_map_entry_splay(address, map->root);
685         if (cur == NULL)
686                 *entry = &map->header;
687         else {
688                 map->root = cur;
689
690                 if (address >= cur->start) {
691                         *entry = cur;
692                         if (cur->end > address)
693                                 return (TRUE);
694                 } else
695                         *entry = cur->prev;
696         }
697         return (FALSE);
698 }
699
700 /*
701  *      vm_map_insert:
702  *
703  *      Inserts the given whole VM object into the target
704  *      map at the specified address range.  The object's
705  *      size should match that of the address range.
706  *
707  *      Requires that the map be locked, and leaves it so.
708  *
709  *      If object is non-NULL, ref count must be bumped by caller
710  *      prior to making call to account for the new entry.
711  */
712 int
713 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
714               vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
715               int cow)
716 {
717         vm_map_entry_t new_entry;
718         vm_map_entry_t prev_entry;
719         vm_map_entry_t temp_entry;
720         vm_eflags_t protoeflags;
721
722         /*
723          * Check that the start and end points are not bogus.
724          */
725         if ((start < map->min_offset) || (end > map->max_offset) ||
726             (start >= end))
727                 return (KERN_INVALID_ADDRESS);
728
729         /*
730          * Find the entry prior to the proposed starting address; if it's part
731          * of an existing entry, this range is bogus.
732          */
733         if (vm_map_lookup_entry(map, start, &temp_entry))
734                 return (KERN_NO_SPACE);
735
736         prev_entry = temp_entry;
737
738         /*
739          * Assert that the next entry doesn't overlap the end point.
740          */
741         if ((prev_entry->next != &map->header) &&
742             (prev_entry->next->start < end))
743                 return (KERN_NO_SPACE);
744
745         protoeflags = 0;
746
747         if (cow & MAP_COPY_ON_WRITE)
748                 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
749
750         if (cow & MAP_NOFAULT) {
751                 protoeflags |= MAP_ENTRY_NOFAULT;
752
753                 KASSERT(object == NULL,
754                         ("vm_map_insert: paradoxical MAP_NOFAULT request"));
755         }
756         if (cow & MAP_DISABLE_SYNCER)
757                 protoeflags |= MAP_ENTRY_NOSYNC;
758         if (cow & MAP_DISABLE_COREDUMP)
759                 protoeflags |= MAP_ENTRY_NOCOREDUMP;
760
761         if (object) {
762                 /*
763                  * When object is non-NULL, it could be shared with another
764                  * process.  We have to set or clear OBJ_ONEMAPPING 
765                  * appropriately.
766                  */
767                 mtx_lock(&Giant);
768                 if ((object->ref_count > 1) || (object->shadow_count != 0)) {
769                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
770                 }
771                 mtx_unlock(&Giant);
772         }
773         else if ((prev_entry != &map->header) &&
774                  (prev_entry->eflags == protoeflags) &&
775                  (prev_entry->end == start) &&
776                  (prev_entry->wired_count == 0) &&
777                  ((prev_entry->object.vm_object == NULL) ||
778                   vm_object_coalesce(prev_entry->object.vm_object,
779                                      OFF_TO_IDX(prev_entry->offset),
780                                      (vm_size_t)(prev_entry->end - prev_entry->start),
781                                      (vm_size_t)(end - prev_entry->end)))) {
782                 /*
783                  * We were able to extend the object.  Determine if we
784                  * can extend the previous map entry to include the 
785                  * new range as well.
786                  */
787                 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
788                     (prev_entry->protection == prot) &&
789                     (prev_entry->max_protection == max)) {
790                         map->size += (end - prev_entry->end);
791                         prev_entry->end = end;
792                         vm_map_simplify_entry(map, prev_entry);
793                         return (KERN_SUCCESS);
794                 }
795
796                 /*
797                  * If we can extend the object but cannot extend the
798                  * map entry, we have to create a new map entry.  We
799                  * must bump the ref count on the extended object to
800                  * account for it.  object may be NULL.
801                  */
802                 object = prev_entry->object.vm_object;
803                 offset = prev_entry->offset +
804                         (prev_entry->end - prev_entry->start);
805                 vm_object_reference(object);
806         }
807
808         /*
809          * NOTE: if conditionals fail, object can be NULL here.  This occurs
810          * in things like the buffer map where we manage kva but do not manage
811          * backing objects.
812          */
813
814         /*
815          * Create a new entry
816          */
817         new_entry = vm_map_entry_create(map);
818         new_entry->start = start;
819         new_entry->end = end;
820
821         new_entry->eflags = protoeflags;
822         new_entry->object.vm_object = object;
823         new_entry->offset = offset;
824         new_entry->avail_ssize = 0;
825
826         new_entry->inheritance = VM_INHERIT_DEFAULT;
827         new_entry->protection = prot;
828         new_entry->max_protection = max;
829         new_entry->wired_count = 0;
830
831         /*
832          * Insert the new entry into the list
833          */
834         vm_map_entry_link(map, prev_entry, new_entry);
835         map->size += new_entry->end - new_entry->start;
836
837         /*
838          * Update the free space hint
839          */
840         if ((map->first_free == prev_entry) &&
841             (prev_entry->end >= new_entry->start)) {
842                 map->first_free = new_entry;
843         }
844
845 #if 0
846         /*
847          * Temporarily removed to avoid MAP_STACK panic, due to
848          * MAP_STACK being a huge hack.  Will be added back in
849          * when MAP_STACK (and the user stack mapping) is fixed.
850          */
851         /*
852          * It may be possible to simplify the entry
853          */
854         vm_map_simplify_entry(map, new_entry);
855 #endif
856
857         if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
858                 mtx_lock(&Giant);
859                 pmap_object_init_pt(map->pmap, start,
860                                     object, OFF_TO_IDX(offset), end - start,
861                                     cow & MAP_PREFAULT_PARTIAL);
862                 mtx_unlock(&Giant);
863         }
864
865         return (KERN_SUCCESS);
866 }
867
868 /*
869  * Find sufficient space for `length' bytes in the given map, starting at
870  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
871  */
872 int
873 vm_map_findspace(
874         vm_map_t map,
875         vm_offset_t start,
876         vm_size_t length,
877         vm_offset_t *addr)
878 {
879         vm_map_entry_t entry, next;
880         vm_offset_t end;
881
882         if (start < map->min_offset)
883                 start = map->min_offset;
884         if (start > map->max_offset)
885                 return (1);
886
887         /*
888          * Look for the first possible address; if there's already something
889          * at this address, we have to start after it.
890          */
891         if (start == map->min_offset) {
892                 if ((entry = map->first_free) != &map->header)
893                         start = entry->end;
894         } else {
895                 vm_map_entry_t tmp;
896
897                 if (vm_map_lookup_entry(map, start, &tmp))
898                         start = tmp->end;
899                 entry = tmp;
900         }
901
902         /*
903          * Look through the rest of the map, trying to fit a new region in the
904          * gap between existing regions, or after the very last region.
905          */
906         for (;; start = (entry = next)->end) {
907                 /*
908                  * Find the end of the proposed new region.  Be sure we didn't
909                  * go beyond the end of the map, or wrap around the address;
910                  * if so, we lose.  Otherwise, if this is the last entry, or
911                  * if the proposed new region fits before the next entry, we
912                  * win.
913                  */
914                 end = start + length;
915                 if (end > map->max_offset || end < start)
916                         return (1);
917                 next = entry->next;
918                 if (next == &map->header || next->start >= end)
919                         break;
920         }
921         *addr = start;
922         if (map == kernel_map) {
923                 vm_offset_t ksize;
924                 if ((ksize = round_page(start + length)) > kernel_vm_end) {
925                         mtx_lock(&Giant);
926                         pmap_growkernel(ksize);
927                         mtx_unlock(&Giant);
928                 }
929         }
930         return (0);
931 }
932
933 /*
934  *      vm_map_find finds an unallocated region in the target address
935  *      map with the given length.  The search is defined to be
936  *      first-fit from the specified address; the region found is
937  *      returned in the same parameter.
938  *
939  *      If object is non-NULL, ref count must be bumped by caller
940  *      prior to making call to account for the new entry.
941  */
942 int
943 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
944             vm_offset_t *addr,  /* IN/OUT */
945             vm_size_t length, boolean_t find_space, vm_prot_t prot,
946             vm_prot_t max, int cow)
947 {
948         vm_offset_t start;
949         int result, s = 0;
950
951         start = *addr;
952
953         if (map == kmem_map)
954                 s = splvm();
955
956         vm_map_lock(map);
957         if (find_space) {
958                 if (vm_map_findspace(map, start, length, addr)) {
959                         vm_map_unlock(map);
960                         if (map == kmem_map)
961                                 splx(s);
962                         return (KERN_NO_SPACE);
963                 }
964                 start = *addr;
965         }
966         result = vm_map_insert(map, object, offset,
967                 start, start + length, prot, max, cow);
968         vm_map_unlock(map);
969
970         if (map == kmem_map)
971                 splx(s);
972
973         return (result);
974 }
975
976 /*
977  *      vm_map_simplify_entry:
978  *
979  *      Simplify the given map entry by merging with either neighbor.  This
980  *      routine also has the ability to merge with both neighbors.
981  *
982  *      The map must be locked.
983  *
984  *      This routine guarentees that the passed entry remains valid (though
985  *      possibly extended).  When merging, this routine may delete one or
986  *      both neighbors.
987  */
988 void
989 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
990 {
991         vm_map_entry_t next, prev;
992         vm_size_t prevsize, esize;
993
994         if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP))
995                 return;
996
997         prev = entry->prev;
998         if (prev != &map->header) {
999                 prevsize = prev->end - prev->start;
1000                 if ( (prev->end == entry->start) &&
1001                      (prev->object.vm_object == entry->object.vm_object) &&
1002                      (!prev->object.vm_object ||
1003                         (prev->offset + prevsize == entry->offset)) &&
1004                      (prev->eflags == entry->eflags) &&
1005                      (prev->protection == entry->protection) &&
1006                      (prev->max_protection == entry->max_protection) &&
1007                      (prev->inheritance == entry->inheritance) &&
1008                      (prev->wired_count == entry->wired_count)) {
1009                         if (map->first_free == prev)
1010                                 map->first_free = entry;
1011                         vm_map_entry_unlink(map, prev);
1012                         entry->start = prev->start;
1013                         entry->offset = prev->offset;
1014                         if (prev->object.vm_object)
1015                                 vm_object_deallocate(prev->object.vm_object);
1016                         vm_map_entry_dispose(map, prev);
1017                 }
1018         }
1019
1020         next = entry->next;
1021         if (next != &map->header) {
1022                 esize = entry->end - entry->start;
1023                 if ((entry->end == next->start) &&
1024                     (next->object.vm_object == entry->object.vm_object) &&
1025                      (!entry->object.vm_object ||
1026                         (entry->offset + esize == next->offset)) &&
1027                     (next->eflags == entry->eflags) &&
1028                     (next->protection == entry->protection) &&
1029                     (next->max_protection == entry->max_protection) &&
1030                     (next->inheritance == entry->inheritance) &&
1031                     (next->wired_count == entry->wired_count)) {
1032                         if (map->first_free == next)
1033                                 map->first_free = entry;
1034                         vm_map_entry_unlink(map, next);
1035                         entry->end = next->end;
1036                         if (next->object.vm_object)
1037                                 vm_object_deallocate(next->object.vm_object);
1038                         vm_map_entry_dispose(map, next);
1039                 }
1040         }
1041 }
1042 /*
1043  *      vm_map_clip_start:      [ internal use only ]
1044  *
1045  *      Asserts that the given entry begins at or after
1046  *      the specified address; if necessary,
1047  *      it splits the entry into two.
1048  */
1049 #define vm_map_clip_start(map, entry, startaddr) \
1050 { \
1051         if (startaddr > entry->start) \
1052                 _vm_map_clip_start(map, entry, startaddr); \
1053 }
1054
1055 /*
1056  *      This routine is called only when it is known that
1057  *      the entry must be split.
1058  */
1059 static void
1060 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
1061 {
1062         vm_map_entry_t new_entry;
1063
1064         /*
1065          * Split off the front portion -- note that we must insert the new
1066          * entry BEFORE this one, so that this entry has the specified
1067          * starting address.
1068          */
1069         vm_map_simplify_entry(map, entry);
1070
1071         /*
1072          * If there is no object backing this entry, we might as well create
1073          * one now.  If we defer it, an object can get created after the map
1074          * is clipped, and individual objects will be created for the split-up
1075          * map.  This is a bit of a hack, but is also about the best place to
1076          * put this improvement.
1077          */
1078         if (entry->object.vm_object == NULL && !map->system_map) {
1079                 vm_object_t object;
1080                 object = vm_object_allocate(OBJT_DEFAULT,
1081                                 atop(entry->end - entry->start));
1082                 entry->object.vm_object = object;
1083                 entry->offset = 0;
1084         }
1085
1086         new_entry = vm_map_entry_create(map);
1087         *new_entry = *entry;
1088
1089         new_entry->end = start;
1090         entry->offset += (start - entry->start);
1091         entry->start = start;
1092
1093         vm_map_entry_link(map, entry->prev, new_entry);
1094
1095         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1096                 vm_object_reference(new_entry->object.vm_object);
1097         }
1098 }
1099
1100 /*
1101  *      vm_map_clip_end:        [ internal use only ]
1102  *
1103  *      Asserts that the given entry ends at or before
1104  *      the specified address; if necessary,
1105  *      it splits the entry into two.
1106  */
1107 #define vm_map_clip_end(map, entry, endaddr) \
1108 { \
1109         if (endaddr < entry->end) \
1110                 _vm_map_clip_end(map, entry, endaddr); \
1111 }
1112
1113 /*
1114  *      This routine is called only when it is known that
1115  *      the entry must be split.
1116  */
1117 static void
1118 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1119 {
1120         vm_map_entry_t new_entry;
1121
1122         /*
1123          * If there is no object backing this entry, we might as well create
1124          * one now.  If we defer it, an object can get created after the map
1125          * is clipped, and individual objects will be created for the split-up
1126          * map.  This is a bit of a hack, but is also about the best place to
1127          * put this improvement.
1128          */
1129         if (entry->object.vm_object == NULL && !map->system_map) {
1130                 vm_object_t object;
1131                 object = vm_object_allocate(OBJT_DEFAULT,
1132                                 atop(entry->end - entry->start));
1133                 entry->object.vm_object = object;
1134                 entry->offset = 0;
1135         }
1136
1137         /*
1138          * Create a new entry and insert it AFTER the specified entry
1139          */
1140         new_entry = vm_map_entry_create(map);
1141         *new_entry = *entry;
1142
1143         new_entry->start = entry->end = end;
1144         new_entry->offset += (end - entry->start);
1145
1146         vm_map_entry_link(map, entry, new_entry);
1147
1148         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1149                 vm_object_reference(new_entry->object.vm_object);
1150         }
1151 }
1152
1153 /*
1154  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
1155  *
1156  *      Asserts that the starting and ending region
1157  *      addresses fall within the valid range of the map.
1158  */
1159 #define VM_MAP_RANGE_CHECK(map, start, end)             \
1160                 {                                       \
1161                 if (start < vm_map_min(map))            \
1162                         start = vm_map_min(map);        \
1163                 if (end > vm_map_max(map))              \
1164                         end = vm_map_max(map);          \
1165                 if (start > end)                        \
1166                         start = end;                    \
1167                 }
1168
1169 /*
1170  *      vm_map_submap:          [ kernel use only ]
1171  *
1172  *      Mark the given range as handled by a subordinate map.
1173  *
1174  *      This range must have been created with vm_map_find,
1175  *      and no other operations may have been performed on this
1176  *      range prior to calling vm_map_submap.
1177  *
1178  *      Only a limited number of operations can be performed
1179  *      within this rage after calling vm_map_submap:
1180  *              vm_fault
1181  *      [Don't try vm_map_copy!]
1182  *
1183  *      To remove a submapping, one must first remove the
1184  *      range from the superior map, and then destroy the
1185  *      submap (if desired).  [Better yet, don't try it.]
1186  */
1187 int
1188 vm_map_submap(
1189         vm_map_t map,
1190         vm_offset_t start,
1191         vm_offset_t end,
1192         vm_map_t submap)
1193 {
1194         vm_map_entry_t entry;
1195         int result = KERN_INVALID_ARGUMENT;
1196
1197         vm_map_lock(map);
1198
1199         VM_MAP_RANGE_CHECK(map, start, end);
1200
1201         if (vm_map_lookup_entry(map, start, &entry)) {
1202                 vm_map_clip_start(map, entry, start);
1203         } else
1204                 entry = entry->next;
1205
1206         vm_map_clip_end(map, entry, end);
1207
1208         if ((entry->start == start) && (entry->end == end) &&
1209             ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1210             (entry->object.vm_object == NULL)) {
1211                 entry->object.sub_map = submap;
1212                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1213                 result = KERN_SUCCESS;
1214         }
1215         vm_map_unlock(map);
1216
1217         return (result);
1218 }
1219
1220 /*
1221  *      vm_map_protect:
1222  *
1223  *      Sets the protection of the specified address
1224  *      region in the target map.  If "set_max" is
1225  *      specified, the maximum protection is to be set;
1226  *      otherwise, only the current protection is affected.
1227  */
1228 int
1229 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1230                vm_prot_t new_prot, boolean_t set_max)
1231 {
1232         vm_map_entry_t current;
1233         vm_map_entry_t entry;
1234
1235         vm_map_lock(map);
1236
1237         VM_MAP_RANGE_CHECK(map, start, end);
1238
1239         if (vm_map_lookup_entry(map, start, &entry)) {
1240                 vm_map_clip_start(map, entry, start);
1241         } else {
1242                 entry = entry->next;
1243         }
1244
1245         /*
1246          * Make a first pass to check for protection violations.
1247          */
1248         current = entry;
1249         while ((current != &map->header) && (current->start < end)) {
1250                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1251                         vm_map_unlock(map);
1252                         return (KERN_INVALID_ARGUMENT);
1253                 }
1254                 if ((new_prot & current->max_protection) != new_prot) {
1255                         vm_map_unlock(map);
1256                         return (KERN_PROTECTION_FAILURE);
1257                 }
1258                 current = current->next;
1259         }
1260
1261         /*
1262          * Go back and fix up protections. [Note that clipping is not
1263          * necessary the second time.]
1264          */
1265         current = entry;
1266         while ((current != &map->header) && (current->start < end)) {
1267                 vm_prot_t old_prot;
1268
1269                 vm_map_clip_end(map, current, end);
1270
1271                 old_prot = current->protection;
1272                 if (set_max)
1273                         current->protection =
1274                             (current->max_protection = new_prot) &
1275                             old_prot;
1276                 else
1277                         current->protection = new_prot;
1278
1279                 /*
1280                  * Update physical map if necessary. Worry about copy-on-write
1281                  * here -- CHECK THIS XXX
1282                  */
1283                 if (current->protection != old_prot) {
1284                         mtx_lock(&Giant);
1285 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1286                                                         VM_PROT_ALL)
1287                         pmap_protect(map->pmap, current->start,
1288                             current->end,
1289                             current->protection & MASK(current));
1290 #undef  MASK
1291                         mtx_unlock(&Giant);
1292                 }
1293                 vm_map_simplify_entry(map, current);
1294                 current = current->next;
1295         }
1296         vm_map_unlock(map);
1297         return (KERN_SUCCESS);
1298 }
1299
1300 /*
1301  *      vm_map_madvise:
1302  *
1303  *      This routine traverses a processes map handling the madvise
1304  *      system call.  Advisories are classified as either those effecting
1305  *      the vm_map_entry structure, or those effecting the underlying 
1306  *      objects.
1307  */
1308 int
1309 vm_map_madvise(
1310         vm_map_t map,
1311         vm_offset_t start, 
1312         vm_offset_t end,
1313         int behav)
1314 {
1315         vm_map_entry_t current, entry;
1316         int modify_map = 0;
1317
1318         /*
1319          * Some madvise calls directly modify the vm_map_entry, in which case
1320          * we need to use an exclusive lock on the map and we need to perform 
1321          * various clipping operations.  Otherwise we only need a read-lock
1322          * on the map.
1323          */
1324         switch(behav) {
1325         case MADV_NORMAL:
1326         case MADV_SEQUENTIAL:
1327         case MADV_RANDOM:
1328         case MADV_NOSYNC:
1329         case MADV_AUTOSYNC:
1330         case MADV_NOCORE:
1331         case MADV_CORE:
1332                 modify_map = 1;
1333                 vm_map_lock(map);
1334                 break;
1335         case MADV_WILLNEED:
1336         case MADV_DONTNEED:
1337         case MADV_FREE:
1338                 vm_map_lock_read(map);
1339                 break;
1340         default:
1341                 return (KERN_INVALID_ARGUMENT);
1342         }
1343
1344         /*
1345          * Locate starting entry and clip if necessary.
1346          */
1347         VM_MAP_RANGE_CHECK(map, start, end);
1348
1349         if (vm_map_lookup_entry(map, start, &entry)) {
1350                 if (modify_map)
1351                         vm_map_clip_start(map, entry, start);
1352         } else {
1353                 entry = entry->next;
1354         }
1355
1356         if (modify_map) {
1357                 /*
1358                  * madvise behaviors that are implemented in the vm_map_entry.
1359                  *
1360                  * We clip the vm_map_entry so that behavioral changes are
1361                  * limited to the specified address range.
1362                  */
1363                 for (current = entry;
1364                      (current != &map->header) && (current->start < end);
1365                      current = current->next
1366                 ) {
1367                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1368                                 continue;
1369
1370                         vm_map_clip_end(map, current, end);
1371
1372                         switch (behav) {
1373                         case MADV_NORMAL:
1374                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1375                                 break;
1376                         case MADV_SEQUENTIAL:
1377                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1378                                 break;
1379                         case MADV_RANDOM:
1380                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1381                                 break;
1382                         case MADV_NOSYNC:
1383                                 current->eflags |= MAP_ENTRY_NOSYNC;
1384                                 break;
1385                         case MADV_AUTOSYNC:
1386                                 current->eflags &= ~MAP_ENTRY_NOSYNC;
1387                                 break;
1388                         case MADV_NOCORE:
1389                                 current->eflags |= MAP_ENTRY_NOCOREDUMP;
1390                                 break;
1391                         case MADV_CORE:
1392                                 current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1393                                 break;
1394                         default:
1395                                 break;
1396                         }
1397                         vm_map_simplify_entry(map, current);
1398                 }
1399                 vm_map_unlock(map);
1400         } else {
1401                 vm_pindex_t pindex;
1402                 int count;
1403
1404                 /*
1405                  * madvise behaviors that are implemented in the underlying
1406                  * vm_object.
1407                  *
1408                  * Since we don't clip the vm_map_entry, we have to clip
1409                  * the vm_object pindex and count.
1410                  */
1411                 for (current = entry;
1412                      (current != &map->header) && (current->start < end);
1413                      current = current->next
1414                 ) {
1415                         vm_offset_t useStart;
1416
1417                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1418                                 continue;
1419
1420                         pindex = OFF_TO_IDX(current->offset);
1421                         count = atop(current->end - current->start);
1422                         useStart = current->start;
1423
1424                         if (current->start < start) {
1425                                 pindex += atop(start - current->start);
1426                                 count -= atop(start - current->start);
1427                                 useStart = start;
1428                         }
1429                         if (current->end > end)
1430                                 count -= atop(current->end - end);
1431
1432                         if (count <= 0)
1433                                 continue;
1434
1435                         vm_object_madvise(current->object.vm_object,
1436                                           pindex, count, behav);
1437                         if (behav == MADV_WILLNEED) {
1438                                 mtx_lock(&Giant);
1439                                 pmap_object_init_pt(
1440                                     map->pmap, 
1441                                     useStart,
1442                                     current->object.vm_object,
1443                                     pindex, 
1444                                     (count << PAGE_SHIFT),
1445                                     MAP_PREFAULT_MADVISE
1446                                 );
1447                                 mtx_unlock(&Giant);
1448                         }
1449                 }
1450                 vm_map_unlock_read(map);
1451         }
1452         return (0);
1453 }       
1454
1455
1456 /*
1457  *      vm_map_inherit:
1458  *
1459  *      Sets the inheritance of the specified address
1460  *      range in the target map.  Inheritance
1461  *      affects how the map will be shared with
1462  *      child maps at the time of vm_map_fork.
1463  */
1464 int
1465 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1466                vm_inherit_t new_inheritance)
1467 {
1468         vm_map_entry_t entry;
1469         vm_map_entry_t temp_entry;
1470
1471         switch (new_inheritance) {
1472         case VM_INHERIT_NONE:
1473         case VM_INHERIT_COPY:
1474         case VM_INHERIT_SHARE:
1475                 break;
1476         default:
1477                 return (KERN_INVALID_ARGUMENT);
1478         }
1479         vm_map_lock(map);
1480         VM_MAP_RANGE_CHECK(map, start, end);
1481         if (vm_map_lookup_entry(map, start, &temp_entry)) {
1482                 entry = temp_entry;
1483                 vm_map_clip_start(map, entry, start);
1484         } else
1485                 entry = temp_entry->next;
1486         while ((entry != &map->header) && (entry->start < end)) {
1487                 vm_map_clip_end(map, entry, end);
1488                 entry->inheritance = new_inheritance;
1489                 vm_map_simplify_entry(map, entry);
1490                 entry = entry->next;
1491         }
1492         vm_map_unlock(map);
1493         return (KERN_SUCCESS);
1494 }
1495
1496 /*
1497  *      vm_map_unwire:
1498  *
1499  *      Implements both kernel and user unwiring.
1500  */
1501 int
1502 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1503         boolean_t user_unwire)
1504 {
1505         vm_map_entry_t entry, first_entry, tmp_entry;
1506         vm_offset_t saved_start;
1507         unsigned int last_timestamp;
1508         int rv;
1509         boolean_t need_wakeup, result;
1510
1511         vm_map_lock(map);
1512         VM_MAP_RANGE_CHECK(map, start, end);
1513         if (!vm_map_lookup_entry(map, start, &first_entry)) {
1514                 vm_map_unlock(map);
1515                 return (KERN_INVALID_ADDRESS);
1516         }
1517         last_timestamp = map->timestamp;
1518         entry = first_entry;
1519         while (entry != &map->header && entry->start < end) {
1520                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1521                         /*
1522                          * We have not yet clipped the entry.
1523                          */
1524                         saved_start = (start >= entry->start) ? start :
1525                             entry->start;
1526                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1527                         if (vm_map_unlock_and_wait(map, user_unwire)) {
1528                                 /*
1529                                  * Allow interruption of user unwiring?
1530                                  */
1531                         }
1532                         vm_map_lock(map);
1533                         if (last_timestamp+1 != map->timestamp) {
1534                                 /*
1535                                  * Look again for the entry because the map was
1536                                  * modified while it was unlocked.
1537                                  * Specifically, the entry may have been
1538                                  * clipped, merged, or deleted.
1539                                  */
1540                                 if (!vm_map_lookup_entry(map, saved_start,
1541                                     &tmp_entry)) {
1542                                         if (saved_start == start) {
1543                                                 /*
1544                                                  * First_entry has been deleted.
1545                                                  */
1546                                                 vm_map_unlock(map);
1547                                                 return (KERN_INVALID_ADDRESS);
1548                                         }
1549                                         end = saved_start;
1550                                         rv = KERN_INVALID_ADDRESS;
1551                                         goto done;
1552                                 }
1553                                 if (entry == first_entry)
1554                                         first_entry = tmp_entry;
1555                                 else
1556                                         first_entry = NULL;
1557                                 entry = tmp_entry;
1558                         }
1559                         last_timestamp = map->timestamp;
1560                         continue;
1561                 }
1562                 vm_map_clip_start(map, entry, start);
1563                 vm_map_clip_end(map, entry, end);
1564                 /*
1565                  * Mark the entry in case the map lock is released.  (See
1566                  * above.)
1567                  */
1568                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1569                 /*
1570                  * Check the map for holes in the specified region.
1571                  */
1572                 if (entry->end < end && (entry->next == &map->header ||
1573                     entry->next->start > entry->end)) {
1574                         end = entry->end;
1575                         rv = KERN_INVALID_ADDRESS;
1576                         goto done;
1577                 }
1578                 /*
1579                  * Require that the entry is wired.
1580                  */
1581                 if (entry->wired_count == 0 || (user_unwire &&
1582                     (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)) {
1583                         end = entry->end;
1584                         rv = KERN_INVALID_ARGUMENT;
1585                         goto done;
1586                 }
1587                 entry = entry->next;
1588         }
1589         rv = KERN_SUCCESS;
1590 done:
1591         need_wakeup = FALSE;
1592         if (first_entry == NULL) {
1593                 result = vm_map_lookup_entry(map, start, &first_entry);
1594                 KASSERT(result, ("vm_map_unwire: lookup failed"));
1595         }
1596         entry = first_entry;
1597         while (entry != &map->header && entry->start < end) {
1598                 if (rv == KERN_SUCCESS) {
1599                         if (user_unwire)
1600                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1601                         entry->wired_count--;
1602                         if (entry->wired_count == 0) {
1603                                 /*
1604                                  * Retain the map lock.
1605                                  */
1606                                 vm_fault_unwire(map, entry->start, entry->end);
1607                         }
1608                 }
1609                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1610                         ("vm_map_unwire: in-transition flag missing"));
1611                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1612                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1613                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1614                         need_wakeup = TRUE;
1615                 }
1616                 vm_map_simplify_entry(map, entry);
1617                 entry = entry->next;
1618         }
1619         vm_map_unlock(map);
1620         if (need_wakeup)
1621                 vm_map_wakeup(map);
1622         return (rv);
1623 }
1624
1625 /*
1626  *      vm_map_wire:
1627  *
1628  *      Implements both kernel and user wiring.
1629  */
1630 int
1631 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1632         boolean_t user_wire)
1633 {
1634         vm_map_entry_t entry, first_entry, tmp_entry;
1635         vm_offset_t saved_end, saved_start;
1636         unsigned int last_timestamp;
1637         int rv;
1638         boolean_t need_wakeup, result;
1639
1640         vm_map_lock(map);
1641         VM_MAP_RANGE_CHECK(map, start, end);
1642         if (!vm_map_lookup_entry(map, start, &first_entry)) {
1643                 vm_map_unlock(map);
1644                 return (KERN_INVALID_ADDRESS);
1645         }
1646         last_timestamp = map->timestamp;
1647         entry = first_entry;
1648         while (entry != &map->header && entry->start < end) {
1649                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1650                         /*
1651                          * We have not yet clipped the entry.
1652                          */
1653                         saved_start = (start >= entry->start) ? start :
1654                             entry->start;
1655                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1656                         if (vm_map_unlock_and_wait(map, user_wire)) {
1657                                 /*
1658                                  * Allow interruption of user wiring?
1659                                  */
1660                         }
1661                         vm_map_lock(map);
1662                         if (last_timestamp + 1 != map->timestamp) {
1663                                 /*
1664                                  * Look again for the entry because the map was
1665                                  * modified while it was unlocked.
1666                                  * Specifically, the entry may have been
1667                                  * clipped, merged, or deleted.
1668                                  */
1669                                 if (!vm_map_lookup_entry(map, saved_start,
1670                                     &tmp_entry)) {
1671                                         if (saved_start == start) {
1672                                                 /*
1673                                                  * first_entry has been deleted.
1674                                                  */
1675                                                 vm_map_unlock(map);
1676                                                 return (KERN_INVALID_ADDRESS);
1677                                         }
1678                                         end = saved_start;
1679                                         rv = KERN_INVALID_ADDRESS;
1680                                         goto done;
1681                                 }
1682                                 if (entry == first_entry)
1683                                         first_entry = tmp_entry;
1684                                 else
1685                                         first_entry = NULL;
1686                                 entry = tmp_entry;
1687                         }
1688                         last_timestamp = map->timestamp;
1689                         continue;
1690                 }
1691                 vm_map_clip_start(map, entry, start);
1692                 vm_map_clip_end(map, entry, end);
1693                 /*
1694                  * Mark the entry in case the map lock is released.  (See
1695                  * above.)
1696                  */
1697                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1698                 /*
1699                  *
1700                  */
1701                 if (entry->wired_count == 0) {
1702                         entry->wired_count++;
1703                         saved_start = entry->start;
1704                         saved_end = entry->end;
1705                         /*
1706                          * Release the map lock, relying on the in-transition
1707                          * mark.
1708                          */
1709                         vm_map_unlock(map);
1710                         if (user_wire)
1711                                 rv = vm_fault_user_wire(map, saved_start,
1712                                     saved_end);
1713                         else
1714                                 rv = vm_fault_wire(map, saved_start, saved_end);
1715                         vm_map_lock(map);
1716                         if (last_timestamp + 1 != map->timestamp) {
1717                                 /*
1718                                  * Look again for the entry because the map was
1719                                  * modified while it was unlocked.  The entry
1720                                  * may have been clipped, but NOT merged or
1721                                  * deleted.
1722                                  */
1723                                 result = vm_map_lookup_entry(map, saved_start,
1724                                     &tmp_entry);
1725                                 KASSERT(result, ("vm_map_wire: lookup failed"));
1726                                 if (entry == first_entry)
1727                                         first_entry = tmp_entry;
1728                                 else
1729                                         first_entry = NULL;
1730                                 entry = tmp_entry;
1731                                 while (entry->end < saved_end) {
1732                                         if (rv != KERN_SUCCESS) {
1733                                                 KASSERT(entry->wired_count == 1,
1734                                                     ("vm_map_wire: bad count"));
1735                                                 entry->wired_count = -1;
1736                                         }
1737                                         entry = entry->next;
1738                                 }
1739                         }
1740                         last_timestamp = map->timestamp;
1741                         if (rv != KERN_SUCCESS) {
1742                                 KASSERT(entry->wired_count == 1,
1743                                     ("vm_map_wire: bad count"));
1744                                 /*
1745                                  * Assign an out-of-range value to represent
1746                                  * the failure to wire this entry.
1747                                  */
1748                                 entry->wired_count = -1;
1749                                 end = entry->end;
1750                                 goto done;
1751                         }
1752                 } else if (!user_wire ||
1753                            (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
1754                         entry->wired_count++;
1755                 }
1756                 /*
1757                  * Check the map for holes in the specified region.
1758                  */
1759                 if (entry->end < end && (entry->next == &map->header ||
1760                     entry->next->start > entry->end)) {
1761                         end = entry->end;
1762                         rv = KERN_INVALID_ADDRESS;
1763                         goto done;
1764                 }
1765                 entry = entry->next;
1766         }
1767         rv = KERN_SUCCESS;
1768 done:
1769         need_wakeup = FALSE;
1770         if (first_entry == NULL) {
1771                 result = vm_map_lookup_entry(map, start, &first_entry);
1772                 KASSERT(result, ("vm_map_wire: lookup failed"));
1773         }
1774         entry = first_entry;
1775         while (entry != &map->header && entry->start < end) {
1776                 if (rv == KERN_SUCCESS) {
1777                         if (user_wire)
1778                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
1779                 } else if (entry->wired_count == -1) {
1780                         /*
1781                          * Wiring failed on this entry.  Thus, unwiring is
1782                          * unnecessary.
1783                          */
1784                         entry->wired_count = 0;
1785                 } else {
1786                         if (!user_wire || (entry->wired_count == 1 &&
1787                             (entry->eflags & MAP_ENTRY_USER_WIRED) == 0))
1788                                 entry->wired_count--;
1789                         if (entry->wired_count == 0) {
1790                                 /*
1791                                  * Retain the map lock.
1792                                  */
1793                                 vm_fault_unwire(map, entry->start, entry->end);
1794                         }
1795                 }
1796                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1797                         ("vm_map_wire: in-transition flag missing"));
1798                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1799                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1800                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1801                         need_wakeup = TRUE;
1802                 }
1803                 vm_map_simplify_entry(map, entry);
1804                 entry = entry->next;
1805         }
1806         vm_map_unlock(map);
1807         if (need_wakeup)
1808                 vm_map_wakeup(map);
1809         return (rv);
1810 }
1811
1812 /*
1813  * vm_map_clean
1814  *
1815  * Push any dirty cached pages in the address range to their pager.
1816  * If syncio is TRUE, dirty pages are written synchronously.
1817  * If invalidate is TRUE, any cached pages are freed as well.
1818  *
1819  * Returns an error if any part of the specified range is not mapped.
1820  */
1821 int
1822 vm_map_clean(
1823         vm_map_t map,
1824         vm_offset_t start,
1825         vm_offset_t end,
1826         boolean_t syncio,
1827         boolean_t invalidate)
1828 {
1829         vm_map_entry_t current;
1830         vm_map_entry_t entry;
1831         vm_size_t size;
1832         vm_object_t object;
1833         vm_ooffset_t offset;
1834
1835         GIANT_REQUIRED;
1836
1837         vm_map_lock_read(map);
1838         VM_MAP_RANGE_CHECK(map, start, end);
1839         if (!vm_map_lookup_entry(map, start, &entry)) {
1840                 vm_map_unlock_read(map);
1841                 return (KERN_INVALID_ADDRESS);
1842         }
1843         /*
1844          * Make a first pass to check for holes.
1845          */
1846         for (current = entry; current->start < end; current = current->next) {
1847                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1848                         vm_map_unlock_read(map);
1849                         return (KERN_INVALID_ARGUMENT);
1850                 }
1851                 if (end > current->end &&
1852                     (current->next == &map->header ||
1853                         current->end != current->next->start)) {
1854                         vm_map_unlock_read(map);
1855                         return (KERN_INVALID_ADDRESS);
1856                 }
1857         }
1858
1859         if (invalidate)
1860                 pmap_remove(vm_map_pmap(map), start, end);
1861         /*
1862          * Make a second pass, cleaning/uncaching pages from the indicated
1863          * objects as we go.
1864          */
1865         for (current = entry; current->start < end; current = current->next) {
1866                 offset = current->offset + (start - current->start);
1867                 size = (end <= current->end ? end : current->end) - start;
1868                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1869                         vm_map_t smap;
1870                         vm_map_entry_t tentry;
1871                         vm_size_t tsize;
1872
1873                         smap = current->object.sub_map;
1874                         vm_map_lock_read(smap);
1875                         (void) vm_map_lookup_entry(smap, offset, &tentry);
1876                         tsize = tentry->end - offset;
1877                         if (tsize < size)
1878                                 size = tsize;
1879                         object = tentry->object.vm_object;
1880                         offset = tentry->offset + (offset - tentry->start);
1881                         vm_map_unlock_read(smap);
1882                 } else {
1883                         object = current->object.vm_object;
1884                 }
1885                 /*
1886                  * Note that there is absolutely no sense in writing out
1887                  * anonymous objects, so we track down the vnode object
1888                  * to write out.
1889                  * We invalidate (remove) all pages from the address space
1890                  * anyway, for semantic correctness.
1891                  *
1892                  * note: certain anonymous maps, such as MAP_NOSYNC maps,
1893                  * may start out with a NULL object.
1894                  */
1895                 while (object && object->backing_object) {
1896                         object = object->backing_object;
1897                         offset += object->backing_object_offset;
1898                         if (object->size < OFF_TO_IDX(offset + size))
1899                                 size = IDX_TO_OFF(object->size) - offset;
1900                 }
1901                 if (object && (object->type == OBJT_VNODE) && 
1902                     (current->protection & VM_PROT_WRITE)) {
1903                         /*
1904                          * Flush pages if writing is allowed, invalidate them
1905                          * if invalidation requested.  Pages undergoing I/O
1906                          * will be ignored by vm_object_page_remove().
1907                          *
1908                          * We cannot lock the vnode and then wait for paging
1909                          * to complete without deadlocking against vm_fault.
1910                          * Instead we simply call vm_object_page_remove() and
1911                          * allow it to block internally on a page-by-page 
1912                          * basis when it encounters pages undergoing async 
1913                          * I/O.
1914                          */
1915                         int flags;
1916
1917                         vm_object_reference(object);
1918                         vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread);
1919                         flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1920                         flags |= invalidate ? OBJPC_INVAL : 0;
1921                         vm_object_page_clean(object,
1922                             OFF_TO_IDX(offset),
1923                             OFF_TO_IDX(offset + size + PAGE_MASK),
1924                             flags);
1925                         if (invalidate) {
1926                                 /*vm_object_pip_wait(object, "objmcl");*/
1927                                 vm_object_page_remove(object,
1928                                     OFF_TO_IDX(offset),
1929                                     OFF_TO_IDX(offset + size + PAGE_MASK),
1930                                     FALSE);
1931                         }
1932                         VOP_UNLOCK(object->handle, 0, curthread);
1933                         vm_object_deallocate(object);
1934                 }
1935                 start += size;
1936         }
1937
1938         vm_map_unlock_read(map);
1939         return (KERN_SUCCESS);
1940 }
1941
1942 /*
1943  *      vm_map_entry_unwire:    [ internal use only ]
1944  *
1945  *      Make the region specified by this entry pageable.
1946  *
1947  *      The map in question should be locked.
1948  *      [This is the reason for this routine's existence.]
1949  */
1950 static void 
1951 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
1952 {
1953         vm_fault_unwire(map, entry->start, entry->end);
1954         entry->wired_count = 0;
1955 }
1956
1957 /*
1958  *      vm_map_entry_delete:    [ internal use only ]
1959  *
1960  *      Deallocate the given entry from the target map.
1961  */
1962 static void
1963 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
1964 {
1965         vm_map_entry_unlink(map, entry);
1966         map->size -= entry->end - entry->start;
1967
1968         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1969                 vm_object_deallocate(entry->object.vm_object);
1970         }
1971
1972         vm_map_entry_dispose(map, entry);
1973 }
1974
1975 /*
1976  *      vm_map_delete:  [ internal use only ]
1977  *
1978  *      Deallocates the given address range from the target
1979  *      map.
1980  */
1981 int
1982 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
1983 {
1984         vm_object_t object;
1985         vm_map_entry_t entry;
1986         vm_map_entry_t first_entry;
1987
1988         /*
1989          * Find the start of the region, and clip it
1990          */
1991         if (!vm_map_lookup_entry(map, start, &first_entry))
1992                 entry = first_entry->next;
1993         else {
1994                 entry = first_entry;
1995                 vm_map_clip_start(map, entry, start);
1996         }
1997
1998         /*
1999          * Save the free space hint
2000          */
2001         if (entry == &map->header) {
2002                 map->first_free = &map->header;
2003         } else if (map->first_free->start >= start) {
2004                 map->first_free = entry->prev;
2005         }
2006
2007         /*
2008          * Step through all entries in this region
2009          */
2010         while ((entry != &map->header) && (entry->start < end)) {
2011                 vm_map_entry_t next;
2012                 vm_offset_t s, e;
2013                 vm_pindex_t offidxstart, offidxend, count;
2014
2015                 /*
2016                  * Wait for wiring or unwiring of an entry to complete.
2017                  */
2018                 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0) {
2019                         unsigned int last_timestamp;
2020                         vm_offset_t saved_start;
2021                         vm_map_entry_t tmp_entry;
2022
2023                         saved_start = entry->start;
2024                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2025                         last_timestamp = map->timestamp;
2026                         (void) vm_map_unlock_and_wait(map, FALSE);
2027                         vm_map_lock(map);
2028                         if (last_timestamp + 1 != map->timestamp) {
2029                                 /*
2030                                  * Look again for the entry because the map was
2031                                  * modified while it was unlocked.
2032                                  * Specifically, the entry may have been
2033                                  * clipped, merged, or deleted.
2034                                  */
2035                                 if (!vm_map_lookup_entry(map, saved_start,
2036                                                          &tmp_entry))
2037                                         entry = tmp_entry->next;
2038                                 else {
2039                                         entry = tmp_entry;
2040                                         vm_map_clip_start(map, entry,
2041                                                           saved_start);
2042                                 }
2043                         }
2044                         continue;
2045                 }
2046                 vm_map_clip_end(map, entry, end);
2047
2048                 s = entry->start;
2049                 e = entry->end;
2050                 next = entry->next;
2051
2052                 offidxstart = OFF_TO_IDX(entry->offset);
2053                 count = OFF_TO_IDX(e - s);
2054                 object = entry->object.vm_object;
2055
2056                 /*
2057                  * Unwire before removing addresses from the pmap; otherwise,
2058                  * unwiring will put the entries back in the pmap.
2059                  */
2060                 if (entry->wired_count != 0) {
2061                         vm_map_entry_unwire(map, entry);
2062                 }
2063
2064                 offidxend = offidxstart + count;
2065
2066                 if ((object == kernel_object) || (object == kmem_object)) {
2067                         vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2068                 } else {
2069                         mtx_lock(&Giant);
2070                         pmap_remove(map->pmap, s, e);
2071                         if (object != NULL &&
2072                             object->ref_count != 1 &&
2073                             (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
2074                             (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2075                                 vm_object_collapse(object);
2076                                 vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2077                                 if (object->type == OBJT_SWAP) {
2078                                         swap_pager_freespace(object, offidxstart, count);
2079                                 }
2080                                 if (offidxend >= object->size &&
2081                                     offidxstart < object->size) {
2082                                         object->size = offidxstart;
2083                                 }
2084                         }
2085                         mtx_unlock(&Giant);
2086                 }
2087
2088                 /*
2089                  * Delete the entry (which may delete the object) only after
2090                  * removing all pmap entries pointing to its pages.
2091                  * (Otherwise, its page frames may be reallocated, and any
2092                  * modify bits will be set in the wrong object!)
2093                  */
2094                 vm_map_entry_delete(map, entry);
2095                 entry = next;
2096         }
2097         return (KERN_SUCCESS);
2098 }
2099
2100 /*
2101  *      vm_map_remove:
2102  *
2103  *      Remove the given address range from the target map.
2104  *      This is the exported form of vm_map_delete.
2105  */
2106 int
2107 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2108 {
2109         int result, s = 0;
2110
2111         if (map == kmem_map)
2112                 s = splvm();
2113
2114         vm_map_lock(map);
2115         VM_MAP_RANGE_CHECK(map, start, end);
2116         result = vm_map_delete(map, start, end);
2117         vm_map_unlock(map);
2118
2119         if (map == kmem_map)
2120                 splx(s);
2121
2122         return (result);
2123 }
2124
2125 /*
2126  *      vm_map_check_protection:
2127  *
2128  *      Assert that the target map allows the specified
2129  *      privilege on the entire address region given.
2130  *      The entire region must be allocated.
2131  */
2132 boolean_t
2133 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2134                         vm_prot_t protection)
2135 {
2136         vm_map_entry_t entry;
2137         vm_map_entry_t tmp_entry;
2138
2139         vm_map_lock_read(map);
2140         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2141                 vm_map_unlock_read(map);
2142                 return (FALSE);
2143         }
2144         entry = tmp_entry;
2145
2146         while (start < end) {
2147                 if (entry == &map->header) {
2148                         vm_map_unlock_read(map);
2149                         return (FALSE);
2150                 }
2151                 /*
2152                  * No holes allowed!
2153                  */
2154                 if (start < entry->start) {
2155                         vm_map_unlock_read(map);
2156                         return (FALSE);
2157                 }
2158                 /*
2159                  * Check protection associated with entry.
2160                  */
2161                 if ((entry->protection & protection) != protection) {
2162                         vm_map_unlock_read(map);
2163                         return (FALSE);
2164                 }
2165                 /* go to next entry */
2166                 start = entry->end;
2167                 entry = entry->next;
2168         }
2169         vm_map_unlock_read(map);
2170         return (TRUE);
2171 }
2172
2173 /*
2174  *      vm_map_copy_entry:
2175  *
2176  *      Copies the contents of the source entry to the destination
2177  *      entry.  The entries *must* be aligned properly.
2178  */
2179 static void
2180 vm_map_copy_entry(
2181         vm_map_t src_map,
2182         vm_map_t dst_map,
2183         vm_map_entry_t src_entry, 
2184         vm_map_entry_t dst_entry)
2185 {
2186         vm_object_t src_object;
2187
2188         if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2189                 return;
2190
2191         if (src_entry->wired_count == 0) {
2192
2193                 /*
2194                  * If the source entry is marked needs_copy, it is already
2195                  * write-protected.
2196                  */
2197                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2198                         pmap_protect(src_map->pmap,
2199                             src_entry->start,
2200                             src_entry->end,
2201                             src_entry->protection & ~VM_PROT_WRITE);
2202                 }
2203
2204                 /*
2205                  * Make a copy of the object.
2206                  */
2207                 if ((src_object = src_entry->object.vm_object) != NULL) {
2208
2209                         if ((src_object->handle == NULL) &&
2210                                 (src_object->type == OBJT_DEFAULT ||
2211                                  src_object->type == OBJT_SWAP)) {
2212                                 vm_object_collapse(src_object);
2213                                 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2214                                         vm_object_split(src_entry);
2215                                         src_object = src_entry->object.vm_object;
2216                                 }
2217                         }
2218
2219                         vm_object_reference(src_object);
2220                         vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2221                         dst_entry->object.vm_object = src_object;
2222                         src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2223                         dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2224                         dst_entry->offset = src_entry->offset;
2225                 } else {
2226                         dst_entry->object.vm_object = NULL;
2227                         dst_entry->offset = 0;
2228                 }
2229
2230                 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2231                     dst_entry->end - dst_entry->start, src_entry->start);
2232         } else {
2233                 /*
2234                  * Of course, wired down pages can't be set copy-on-write.
2235                  * Cause wired pages to be copied into the new map by
2236                  * simulating faults (the new pages are pageable)
2237                  */
2238                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2239         }
2240 }
2241
2242 /*
2243  * vmspace_fork:
2244  * Create a new process vmspace structure and vm_map
2245  * based on those of an existing process.  The new map
2246  * is based on the old map, according to the inheritance
2247  * values on the regions in that map.
2248  *
2249  * The source map must not be locked.
2250  */
2251 struct vmspace *
2252 vmspace_fork(struct vmspace *vm1)
2253 {
2254         struct vmspace *vm2;
2255         vm_map_t old_map = &vm1->vm_map;
2256         vm_map_t new_map;
2257         vm_map_entry_t old_entry;
2258         vm_map_entry_t new_entry;
2259         vm_object_t object;
2260
2261         GIANT_REQUIRED;
2262
2263         vm_map_lock(old_map);
2264         old_map->infork = 1;
2265
2266         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2267         bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2268             (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
2269         new_map = &vm2->vm_map; /* XXX */
2270         new_map->timestamp = 1;
2271
2272         old_entry = old_map->header.next;
2273
2274         while (old_entry != &old_map->header) {
2275                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2276                         panic("vm_map_fork: encountered a submap");
2277
2278                 switch (old_entry->inheritance) {
2279                 case VM_INHERIT_NONE:
2280                         break;
2281
2282                 case VM_INHERIT_SHARE:
2283                         /*
2284                          * Clone the entry, creating the shared object if necessary.
2285                          */
2286                         object = old_entry->object.vm_object;
2287                         if (object == NULL) {
2288                                 object = vm_object_allocate(OBJT_DEFAULT,
2289                                         atop(old_entry->end - old_entry->start));
2290                                 old_entry->object.vm_object = object;
2291                                 old_entry->offset = (vm_offset_t) 0;
2292                         }
2293
2294                         /*
2295                          * Add the reference before calling vm_object_shadow
2296                          * to insure that a shadow object is created.
2297                          */
2298                         vm_object_reference(object);
2299                         if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2300                                 vm_object_shadow(&old_entry->object.vm_object,
2301                                         &old_entry->offset,
2302                                         atop(old_entry->end - old_entry->start));
2303                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2304                                 /* Transfer the second reference too. */
2305                                 vm_object_reference(
2306                                     old_entry->object.vm_object);
2307                                 vm_object_deallocate(object);
2308                                 object = old_entry->object.vm_object;
2309                         }
2310                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
2311
2312                         /*
2313                          * Clone the entry, referencing the shared object.
2314                          */
2315                         new_entry = vm_map_entry_create(new_map);
2316                         *new_entry = *old_entry;
2317                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2318                         new_entry->wired_count = 0;
2319
2320                         /*
2321                          * Insert the entry into the new map -- we know we're
2322                          * inserting at the end of the new map.
2323                          */
2324                         vm_map_entry_link(new_map, new_map->header.prev,
2325                             new_entry);
2326
2327                         /*
2328                          * Update the physical map
2329                          */
2330                         pmap_copy(new_map->pmap, old_map->pmap,
2331                             new_entry->start,
2332                             (old_entry->end - old_entry->start),
2333                             old_entry->start);
2334                         break;
2335
2336                 case VM_INHERIT_COPY:
2337                         /*
2338                          * Clone the entry and link into the map.
2339                          */
2340                         new_entry = vm_map_entry_create(new_map);
2341                         *new_entry = *old_entry;
2342                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2343                         new_entry->wired_count = 0;
2344                         new_entry->object.vm_object = NULL;
2345                         vm_map_entry_link(new_map, new_map->header.prev,
2346                             new_entry);
2347                         vm_map_copy_entry(old_map, new_map, old_entry,
2348                             new_entry);
2349                         break;
2350                 }
2351                 old_entry = old_entry->next;
2352         }
2353
2354         new_map->size = old_map->size;
2355         old_map->infork = 0;
2356         vm_map_unlock(old_map);
2357
2358         return (vm2);
2359 }
2360
2361 int
2362 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2363               vm_prot_t prot, vm_prot_t max, int cow)
2364 {
2365         vm_map_entry_t prev_entry;
2366         vm_map_entry_t new_stack_entry;
2367         vm_size_t      init_ssize;
2368         int            rv;
2369
2370         if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
2371                 return (KERN_NO_SPACE);
2372
2373         if (max_ssize < sgrowsiz)
2374                 init_ssize = max_ssize;
2375         else
2376                 init_ssize = sgrowsiz;
2377
2378         vm_map_lock(map);
2379
2380         /* If addr is already mapped, no go */
2381         if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2382                 vm_map_unlock(map);
2383                 return (KERN_NO_SPACE);
2384         }
2385
2386         /* If we would blow our VMEM resource limit, no go */
2387         if (map->size + init_ssize >
2388             curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2389                 vm_map_unlock(map);
2390                 return (KERN_NO_SPACE);
2391         }
2392
2393         /* If we can't accomodate max_ssize in the current mapping,
2394          * no go.  However, we need to be aware that subsequent user
2395          * mappings might map into the space we have reserved for
2396          * stack, and currently this space is not protected.  
2397          * 
2398          * Hopefully we will at least detect this condition 
2399          * when we try to grow the stack.
2400          */
2401         if ((prev_entry->next != &map->header) &&
2402             (prev_entry->next->start < addrbos + max_ssize)) {
2403                 vm_map_unlock(map);
2404                 return (KERN_NO_SPACE);
2405         }
2406
2407         /* We initially map a stack of only init_ssize.  We will
2408          * grow as needed later.  Since this is to be a grow 
2409          * down stack, we map at the top of the range.
2410          *
2411          * Note: we would normally expect prot and max to be
2412          * VM_PROT_ALL, and cow to be 0.  Possibly we should
2413          * eliminate these as input parameters, and just
2414          * pass these values here in the insert call.
2415          */
2416         rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2417                            addrbos + max_ssize, prot, max, cow);
2418
2419         /* Now set the avail_ssize amount */
2420         if (rv == KERN_SUCCESS){
2421                 if (prev_entry != &map->header)
2422                         vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2423                 new_stack_entry = prev_entry->next;
2424                 if (new_stack_entry->end   != addrbos + max_ssize ||
2425                     new_stack_entry->start != addrbos + max_ssize - init_ssize)
2426                         panic ("Bad entry start/end for new stack entry");
2427                 else 
2428                         new_stack_entry->avail_ssize = max_ssize - init_ssize;
2429         }
2430
2431         vm_map_unlock(map);
2432         return (rv);
2433 }
2434
2435 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2436  * desired address is already mapped, or if we successfully grow
2437  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2438  * stack range (this is strange, but preserves compatibility with
2439  * the grow function in vm_machdep.c).
2440  */
2441 int
2442 vm_map_growstack (struct proc *p, vm_offset_t addr)
2443 {
2444         vm_map_entry_t prev_entry;
2445         vm_map_entry_t stack_entry;
2446         vm_map_entry_t new_stack_entry;
2447         struct vmspace *vm = p->p_vmspace;
2448         vm_map_t map = &vm->vm_map;
2449         vm_offset_t    end;
2450         int      grow_amount;
2451         int      rv;
2452         int      is_procstack;
2453
2454         GIANT_REQUIRED;
2455         
2456 Retry:
2457         vm_map_lock_read(map);
2458
2459         /* If addr is already in the entry range, no need to grow.*/
2460         if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2461                 vm_map_unlock_read(map);
2462                 return (KERN_SUCCESS);
2463         }
2464
2465         if ((stack_entry = prev_entry->next) == &map->header) {
2466                 vm_map_unlock_read(map);
2467                 return (KERN_SUCCESS);
2468         } 
2469         if (prev_entry == &map->header) 
2470                 end = stack_entry->start - stack_entry->avail_ssize;
2471         else
2472                 end = prev_entry->end;
2473
2474         /* This next test mimics the old grow function in vm_machdep.c.
2475          * It really doesn't quite make sense, but we do it anyway
2476          * for compatibility.
2477          *
2478          * If not growable stack, return success.  This signals the
2479          * caller to proceed as he would normally with normal vm.
2480          */
2481         if (stack_entry->avail_ssize < 1 ||
2482             addr >= stack_entry->start ||
2483             addr <  stack_entry->start - stack_entry->avail_ssize) {
2484                 vm_map_unlock_read(map);
2485                 return (KERN_SUCCESS);
2486         } 
2487         
2488         /* Find the minimum grow amount */
2489         grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2490         if (grow_amount > stack_entry->avail_ssize) {
2491                 vm_map_unlock_read(map);
2492                 return (KERN_NO_SPACE);
2493         }
2494
2495         /* If there is no longer enough space between the entries
2496          * nogo, and adjust the available space.  Note: this 
2497          * should only happen if the user has mapped into the
2498          * stack area after the stack was created, and is
2499          * probably an error.
2500          *
2501          * This also effectively destroys any guard page the user
2502          * might have intended by limiting the stack size.
2503          */
2504         if (grow_amount > stack_entry->start - end) {
2505                 if (vm_map_lock_upgrade(map))
2506                         goto Retry;
2507
2508                 stack_entry->avail_ssize = stack_entry->start - end;
2509
2510                 vm_map_unlock(map);
2511                 return (KERN_NO_SPACE);
2512         }
2513
2514         is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2515
2516         /* If this is the main process stack, see if we're over the 
2517          * stack limit.
2518          */
2519         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2520                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2521                 vm_map_unlock_read(map);
2522                 return (KERN_NO_SPACE);
2523         }
2524
2525         /* Round up the grow amount modulo SGROWSIZ */
2526         grow_amount = roundup (grow_amount, sgrowsiz);
2527         if (grow_amount > stack_entry->avail_ssize) {
2528                 grow_amount = stack_entry->avail_ssize;
2529         }
2530         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2531                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2532                 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2533                               ctob(vm->vm_ssize);
2534         }
2535
2536         /* If we would blow our VMEM resource limit, no go */
2537         if (map->size + grow_amount >
2538             curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2539                 vm_map_unlock_read(map);
2540                 return (KERN_NO_SPACE);
2541         }
2542
2543         if (vm_map_lock_upgrade(map))
2544                 goto Retry;
2545
2546         /* Get the preliminary new entry start value */
2547         addr = stack_entry->start - grow_amount;
2548
2549         /* If this puts us into the previous entry, cut back our growth
2550          * to the available space.  Also, see the note above.
2551          */
2552         if (addr < end) {
2553                 stack_entry->avail_ssize = stack_entry->start - end;
2554                 addr = end;
2555         }
2556
2557         rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2558                            VM_PROT_ALL,
2559                            VM_PROT_ALL,
2560                            0);
2561
2562         /* Adjust the available stack space by the amount we grew. */
2563         if (rv == KERN_SUCCESS) {
2564                 if (prev_entry != &map->header)
2565                         vm_map_clip_end(map, prev_entry, addr);
2566                 new_stack_entry = prev_entry->next;
2567                 if (new_stack_entry->end   != stack_entry->start  ||
2568                     new_stack_entry->start != addr)
2569                         panic ("Bad stack grow start/end in new stack entry");
2570                 else {
2571                         new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2572                                                         (new_stack_entry->end -
2573                                                          new_stack_entry->start);
2574                         if (is_procstack)
2575                                 vm->vm_ssize += btoc(new_stack_entry->end -
2576                                                      new_stack_entry->start);
2577                 }
2578         }
2579
2580         vm_map_unlock(map);
2581         return (rv);
2582 }
2583
2584 /*
2585  * Unshare the specified VM space for exec.  If other processes are
2586  * mapped to it, then create a new one.  The new vmspace is null.
2587  */
2588 void
2589 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
2590 {
2591         struct vmspace *oldvmspace = p->p_vmspace;
2592         struct vmspace *newvmspace;
2593
2594         GIANT_REQUIRED;
2595         newvmspace = vmspace_alloc(minuser, maxuser);
2596         bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2597             (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2598         /*
2599          * This code is written like this for prototype purposes.  The
2600          * goal is to avoid running down the vmspace here, but let the
2601          * other process's that are still using the vmspace to finally
2602          * run it down.  Even though there is little or no chance of blocking
2603          * here, it is a good idea to keep this form for future mods.
2604          */
2605         p->p_vmspace = newvmspace;
2606         pmap_pinit2(vmspace_pmap(newvmspace));
2607         vmspace_free(oldvmspace);
2608         if (p == curthread->td_proc)            /* XXXKSE ? */
2609                 pmap_activate(curthread);
2610 }
2611
2612 /*
2613  * Unshare the specified VM space for forcing COW.  This
2614  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2615  */
2616 void
2617 vmspace_unshare(struct proc *p)
2618 {
2619         struct vmspace *oldvmspace = p->p_vmspace;
2620         struct vmspace *newvmspace;
2621
2622         GIANT_REQUIRED;
2623         if (oldvmspace->vm_refcnt == 1)
2624                 return;
2625         newvmspace = vmspace_fork(oldvmspace);
2626         p->p_vmspace = newvmspace;
2627         pmap_pinit2(vmspace_pmap(newvmspace));
2628         vmspace_free(oldvmspace);
2629         if (p == curthread->td_proc)            /* XXXKSE ? */
2630                 pmap_activate(curthread);
2631 }
2632
2633 /*
2634  *      vm_map_lookup:
2635  *
2636  *      Finds the VM object, offset, and
2637  *      protection for a given virtual address in the
2638  *      specified map, assuming a page fault of the
2639  *      type specified.
2640  *
2641  *      Leaves the map in question locked for read; return
2642  *      values are guaranteed until a vm_map_lookup_done
2643  *      call is performed.  Note that the map argument
2644  *      is in/out; the returned map must be used in
2645  *      the call to vm_map_lookup_done.
2646  *
2647  *      A handle (out_entry) is returned for use in
2648  *      vm_map_lookup_done, to make that fast.
2649  *
2650  *      If a lookup is requested with "write protection"
2651  *      specified, the map may be changed to perform virtual
2652  *      copying operations, although the data referenced will
2653  *      remain the same.
2654  */
2655 int
2656 vm_map_lookup(vm_map_t *var_map,                /* IN/OUT */
2657               vm_offset_t vaddr,
2658               vm_prot_t fault_typea,
2659               vm_map_entry_t *out_entry,        /* OUT */
2660               vm_object_t *object,              /* OUT */
2661               vm_pindex_t *pindex,              /* OUT */
2662               vm_prot_t *out_prot,              /* OUT */
2663               boolean_t *wired)                 /* OUT */
2664 {
2665         vm_map_entry_t entry;
2666         vm_map_t map = *var_map;
2667         vm_prot_t prot;
2668         vm_prot_t fault_type = fault_typea;
2669
2670 RetryLookup:;
2671         /*
2672          * Lookup the faulting address.
2673          */
2674
2675         vm_map_lock_read(map);
2676 #define RETURN(why) \
2677                 { \
2678                 vm_map_unlock_read(map); \
2679                 return (why); \
2680                 }
2681
2682         /*
2683          * If the map has an interesting hint, try it before calling full
2684          * blown lookup routine.
2685          */
2686         entry = map->root;
2687         *out_entry = entry;
2688         if (entry == NULL ||
2689             (vaddr < entry->start) || (vaddr >= entry->end)) {
2690                 /*
2691                  * Entry was either not a valid hint, or the vaddr was not
2692                  * contained in the entry, so do a full lookup.
2693                  */
2694                 if (!vm_map_lookup_entry(map, vaddr, out_entry))
2695                         RETURN(KERN_INVALID_ADDRESS);
2696
2697                 entry = *out_entry;
2698         }
2699         
2700         /*
2701          * Handle submaps.
2702          */
2703         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2704                 vm_map_t old_map = map;
2705
2706                 *var_map = map = entry->object.sub_map;
2707                 vm_map_unlock_read(old_map);
2708                 goto RetryLookup;
2709         }
2710
2711         /*
2712          * Check whether this task is allowed to have this page.
2713          * Note the special case for MAP_ENTRY_COW
2714          * pages with an override.  This is to implement a forced
2715          * COW for debuggers.
2716          */
2717         if (fault_type & VM_PROT_OVERRIDE_WRITE)
2718                 prot = entry->max_protection;
2719         else
2720                 prot = entry->protection;
2721         fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2722         if ((fault_type & prot) != fault_type) {
2723                         RETURN(KERN_PROTECTION_FAILURE);
2724         }
2725         if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2726             (entry->eflags & MAP_ENTRY_COW) &&
2727             (fault_type & VM_PROT_WRITE) &&
2728             (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2729                 RETURN(KERN_PROTECTION_FAILURE);
2730         }
2731
2732         /*
2733          * If this page is not pageable, we have to get it for all possible
2734          * accesses.
2735          */
2736         *wired = (entry->wired_count != 0);
2737         if (*wired)
2738                 prot = fault_type = entry->protection;
2739
2740         /*
2741          * If the entry was copy-on-write, we either ...
2742          */
2743         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2744                 /*
2745                  * If we want to write the page, we may as well handle that
2746                  * now since we've got the map locked.
2747                  *
2748                  * If we don't need to write the page, we just demote the
2749                  * permissions allowed.
2750                  */
2751                 if (fault_type & VM_PROT_WRITE) {
2752                         /*
2753                          * Make a new object, and place it in the object
2754                          * chain.  Note that no new references have appeared
2755                          * -- one just moved from the map to the new
2756                          * object.
2757                          */
2758                         if (vm_map_lock_upgrade(map))
2759                                 goto RetryLookup;
2760
2761                         vm_object_shadow(
2762                             &entry->object.vm_object,
2763                             &entry->offset,
2764                             atop(entry->end - entry->start));
2765                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2766
2767                         vm_map_lock_downgrade(map);
2768                 } else {
2769                         /*
2770                          * We're attempting to read a copy-on-write page --
2771                          * don't allow writes.
2772                          */
2773                         prot &= ~VM_PROT_WRITE;
2774                 }
2775         }
2776
2777         /*
2778          * Create an object if necessary.
2779          */
2780         if (entry->object.vm_object == NULL &&
2781             !map->system_map) {
2782                 if (vm_map_lock_upgrade(map)) 
2783                         goto RetryLookup;
2784                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2785                     atop(entry->end - entry->start));
2786                 entry->offset = 0;
2787                 vm_map_lock_downgrade(map);
2788         }
2789
2790         /*
2791          * Return the object/offset from this entry.  If the entry was
2792          * copy-on-write or empty, it has been fixed up.
2793          */
2794         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2795         *object = entry->object.vm_object;
2796
2797         /*
2798          * Return whether this is the only map sharing this data.
2799          */
2800         *out_prot = prot;
2801         return (KERN_SUCCESS);
2802
2803 #undef  RETURN
2804 }
2805
2806 /*
2807  *      vm_map_lookup_done:
2808  *
2809  *      Releases locks acquired by a vm_map_lookup
2810  *      (according to the handle returned by that lookup).
2811  */
2812 void
2813 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
2814 {
2815         /*
2816          * Unlock the main-level map
2817          */
2818         vm_map_unlock_read(map);
2819 }
2820
2821 #ifdef ENABLE_VFS_IOOPT
2822 /*
2823  * Experimental support for zero-copy I/O
2824  *
2825  * Implement uiomove with VM operations.  This handles (and collateral changes)
2826  * support every combination of source object modification, and COW type
2827  * operations.
2828  */
2829 int
2830 vm_uiomove(
2831         vm_map_t mapa,
2832         vm_object_t srcobject,
2833         off_t cp,
2834         int cnta,
2835         vm_offset_t uaddra,
2836         int *npages)
2837 {
2838         vm_map_t map;
2839         vm_object_t first_object, oldobject, object;
2840         vm_map_entry_t entry;
2841         vm_prot_t prot;
2842         boolean_t wired;
2843         int tcnt, rv;
2844         vm_offset_t uaddr, start, end, tend;
2845         vm_pindex_t first_pindex, oindex;
2846         vm_size_t osize;
2847         off_t ooffset;
2848         int cnt;
2849
2850         GIANT_REQUIRED;
2851
2852         if (npages)
2853                 *npages = 0;
2854
2855         cnt = cnta;
2856         uaddr = uaddra;
2857
2858         while (cnt > 0) {
2859                 map = mapa;
2860
2861                 if ((vm_map_lookup(&map, uaddr,
2862                         VM_PROT_READ, &entry, &first_object,
2863                         &first_pindex, &prot, &wired)) != KERN_SUCCESS) {
2864                         return EFAULT;
2865                 }
2866
2867                 vm_map_clip_start(map, entry, uaddr);
2868
2869                 tcnt = cnt;
2870                 tend = uaddr + tcnt;
2871                 if (tend > entry->end) {
2872                         tcnt = entry->end - uaddr;
2873                         tend = entry->end;
2874                 }
2875
2876                 vm_map_clip_end(map, entry, tend);
2877
2878                 start = entry->start;
2879                 end = entry->end;
2880
2881                 osize = atop(tcnt);
2882
2883                 oindex = OFF_TO_IDX(cp);
2884                 if (npages) {
2885                         vm_size_t idx;
2886                         for (idx = 0; idx < osize; idx++) {
2887                                 vm_page_t m;
2888                                 if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
2889                                         vm_map_lookup_done(map, entry);
2890                                         return 0;
2891                                 }
2892                                 /*
2893                                  * disallow busy or invalid pages, but allow
2894                                  * m->busy pages if they are entirely valid.
2895                                  */
2896                                 if ((m->flags & PG_BUSY) ||
2897                                         ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
2898                                         vm_map_lookup_done(map, entry);
2899                                         return 0;
2900                                 }
2901                         }
2902                 }
2903
2904 /*
2905  * If we are changing an existing map entry, just redirect
2906  * the object, and change mappings.
2907  */
2908                 if ((first_object->type == OBJT_VNODE) &&
2909                         ((oldobject = entry->object.vm_object) == first_object)) {
2910
2911                         if ((entry->offset != cp) || (oldobject != srcobject)) {
2912                                 /*
2913                                 * Remove old window into the file
2914                                 */
2915                                 pmap_remove (map->pmap, uaddr, tend);
2916
2917                                 /*
2918                                 * Force copy on write for mmaped regions
2919                                 */
2920                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2921
2922                                 /*
2923                                 * Point the object appropriately
2924                                 */
2925                                 if (oldobject != srcobject) {
2926
2927                                 /*
2928                                 * Set the object optimization hint flag
2929                                 */
2930                                         vm_object_set_flag(srcobject, OBJ_OPT);
2931                                         vm_object_reference(srcobject);
2932                                         entry->object.vm_object = srcobject;
2933
2934                                         if (oldobject) {
2935                                                 vm_object_deallocate(oldobject);
2936                                         }
2937                                 }
2938
2939                                 entry->offset = cp;
2940                                 map->timestamp++;
2941                         } else {
2942                                 pmap_remove (map->pmap, uaddr, tend);
2943                         }
2944
2945                 } else if ((first_object->ref_count == 1) &&
2946                         (first_object->size == osize) &&
2947                         ((first_object->type == OBJT_DEFAULT) ||
2948                                 (first_object->type == OBJT_SWAP)) ) {
2949
2950                         oldobject = first_object->backing_object;
2951
2952                         if ((first_object->backing_object_offset != cp) ||
2953                                 (oldobject != srcobject)) {
2954                                 /*
2955                                 * Remove old window into the file
2956                                 */
2957                                 pmap_remove (map->pmap, uaddr, tend);
2958
2959                                 /*
2960                                  * Remove unneeded old pages
2961                                  */
2962                                 vm_object_page_remove(first_object, 0, 0, 0);
2963
2964                                 /*
2965                                  * Invalidate swap space
2966                                  */
2967                                 if (first_object->type == OBJT_SWAP) {
2968                                         swap_pager_freespace(first_object,
2969                                                 0,
2970                                                 first_object->size);
2971                                 }
2972
2973                                 /*
2974                                  * Force copy on write for mmaped regions
2975                                  */
2976                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2977
2978                                 /*
2979                                  * Point the object appropriately
2980                                  */
2981                                 if (oldobject != srcobject) {
2982                                         /*
2983                                          * Set the object optimization hint flag
2984                                          */
2985                                         vm_object_set_flag(srcobject, OBJ_OPT);
2986                                         vm_object_reference(srcobject);
2987
2988                                         if (oldobject) {
2989                                                 TAILQ_REMOVE(&oldobject->shadow_head,
2990                                                         first_object, shadow_list);
2991                                                 oldobject->shadow_count--;
2992                                                 /* XXX bump generation? */
2993                                                 vm_object_deallocate(oldobject);
2994                                         }
2995
2996                                         TAILQ_INSERT_TAIL(&srcobject->shadow_head,
2997                                                 first_object, shadow_list);
2998                                         srcobject->shadow_count++;
2999                                         /* XXX bump generation? */
3000
3001                                         first_object->backing_object = srcobject;
3002                                 }
3003                                 first_object->backing_object_offset = cp;
3004                                 map->timestamp++;
3005                         } else {
3006                                 pmap_remove (map->pmap, uaddr, tend);
3007                         }
3008 /*
3009  * Otherwise, we have to do a logical mmap.
3010  */
3011                 } else {
3012
3013                         vm_object_set_flag(srcobject, OBJ_OPT);
3014                         vm_object_reference(srcobject);
3015
3016                         pmap_remove (map->pmap, uaddr, tend);
3017
3018                         vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3019                         vm_map_lock_upgrade(map);
3020
3021                         if (entry == &map->header) {
3022                                 map->first_free = &map->header;
3023                         } else if (map->first_free->start >= start) {
3024                                 map->first_free = entry->prev;
3025                         }
3026
3027                         vm_map_entry_delete(map, entry);
3028
3029                         object = srcobject;
3030                         ooffset = cp;
3031
3032                         rv = vm_map_insert(map, object, ooffset, start, tend,
3033                                 VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
3034
3035                         if (rv != KERN_SUCCESS)
3036                                 panic("vm_uiomove: could not insert new entry: %d", rv);
3037                 }
3038
3039 /*
3040  * Map the window directly, if it is already in memory
3041  */
3042                 pmap_object_init_pt(map->pmap, uaddr,
3043                         srcobject, oindex, tcnt, 0);
3044
3045                 map->timestamp++;
3046                 vm_map_unlock(map);
3047
3048                 cnt -= tcnt;
3049                 uaddr += tcnt;
3050                 cp += tcnt;
3051                 if (npages)
3052                         *npages += osize;
3053         }
3054         return 0;
3055 }
3056 #endif
3057
3058 #include "opt_ddb.h"
3059 #ifdef DDB
3060 #include <sys/kernel.h>
3061
3062 #include <ddb/ddb.h>
3063
3064 /*
3065  *      vm_map_print:   [ debug ]
3066  */
3067 DB_SHOW_COMMAND(map, vm_map_print)
3068 {
3069         static int nlines;
3070         /* XXX convert args. */
3071         vm_map_t map = (vm_map_t)addr;
3072         boolean_t full = have_addr;
3073
3074         vm_map_entry_t entry;
3075
3076         db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3077             (void *)map,
3078             (void *)map->pmap, map->nentries, map->timestamp);
3079         nlines++;
3080
3081         if (!full && db_indent)
3082                 return;
3083
3084         db_indent += 2;
3085         for (entry = map->header.next; entry != &map->header;
3086             entry = entry->next) {
3087                 db_iprintf("map entry %p: start=%p, end=%p\n",
3088                     (void *)entry, (void *)entry->start, (void *)entry->end);
3089                 nlines++;
3090                 {
3091                         static char *inheritance_name[4] =
3092                         {"share", "copy", "none", "donate_copy"};
3093
3094                         db_iprintf(" prot=%x/%x/%s",
3095                             entry->protection,
3096                             entry->max_protection,
3097                             inheritance_name[(int)(unsigned char)entry->inheritance]);
3098                         if (entry->wired_count != 0)
3099                                 db_printf(", wired");
3100                 }
3101                 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3102                         /* XXX no %qd in kernel.  Truncate entry->offset. */
3103                         db_printf(", share=%p, offset=0x%lx\n",
3104                             (void *)entry->object.sub_map,
3105                             (long)entry->offset);
3106                         nlines++;
3107                         if ((entry->prev == &map->header) ||
3108                             (entry->prev->object.sub_map !=
3109                                 entry->object.sub_map)) {
3110                                 db_indent += 2;
3111                                 vm_map_print((db_expr_t)(intptr_t)
3112                                              entry->object.sub_map,
3113                                              full, 0, (char *)0);
3114                                 db_indent -= 2;
3115                         }
3116                 } else {
3117                         /* XXX no %qd in kernel.  Truncate entry->offset. */
3118                         db_printf(", object=%p, offset=0x%lx",
3119                             (void *)entry->object.vm_object,
3120                             (long)entry->offset);
3121                         if (entry->eflags & MAP_ENTRY_COW)
3122                                 db_printf(", copy (%s)",
3123                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3124                         db_printf("\n");
3125                         nlines++;
3126
3127                         if ((entry->prev == &map->header) ||
3128                             (entry->prev->object.vm_object !=
3129                                 entry->object.vm_object)) {
3130                                 db_indent += 2;
3131                                 vm_object_print((db_expr_t)(intptr_t)
3132                                                 entry->object.vm_object,
3133                                                 full, 0, (char *)0);
3134                                 nlines += 4;
3135                                 db_indent -= 2;
3136                         }
3137                 }
3138         }
3139         db_indent -= 2;
3140         if (db_indent == 0)
3141                 nlines = 0;
3142 }
3143
3144
3145 DB_SHOW_COMMAND(procvm, procvm)
3146 {
3147         struct proc *p;
3148
3149         if (have_addr) {
3150                 p = (struct proc *) addr;
3151         } else {
3152                 p = curproc;
3153         }
3154
3155         db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3156             (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3157             (void *)vmspace_pmap(p->p_vmspace));
3158
3159         vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3160 }
3161
3162 #endif /* DDB */