]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/vm/vm_map.c
Pass the caller's file name and line number to the vm_map locking functions.
[FreeBSD/FreeBSD.git] / sys / vm / vm_map.c
1 /*
2  * Copyright (c) 1991, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66
67 /*
68  *      Virtual memory mapping module.
69  */
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/ktr.h>
74 #include <sys/lock.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/vmmeter.h>
78 #include <sys/mman.h>
79 #include <sys/vnode.h>
80 #include <sys/resourcevar.h>
81
82 #include <vm/vm.h>
83 #include <vm/vm_param.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_pager.h>
89 #include <vm/vm_kern.h>
90 #include <vm/vm_extern.h>
91 #include <vm/swap_pager.h>
92 #include <vm/uma.h>
93
94 /*
95  *      Virtual memory maps provide for the mapping, protection,
96  *      and sharing of virtual memory objects.  In addition,
97  *      this module provides for an efficient virtual copy of
98  *      memory from one map to another.
99  *
100  *      Synchronization is required prior to most operations.
101  *
102  *      Maps consist of an ordered doubly-linked list of simple
103  *      entries; a single hint is used to speed up lookups.
104  *
105  *      Since portions of maps are specified by start/end addresses,
106  *      which may not align with existing map entries, all
107  *      routines merely "clip" entries to these start/end values.
108  *      [That is, an entry is split into two, bordering at a
109  *      start or end value.]  Note that these clippings may not
110  *      always be necessary (as the two resulting entries are then
111  *      not changed); however, the clipping is done for convenience.
112  *
113  *      As mentioned above, virtual copy operations are performed
114  *      by copying VM object references from one map to
115  *      another, and then marking both regions as copy-on-write.
116  */
117
118 /*
119  *      vm_map_startup:
120  *
121  *      Initialize the vm_map module.  Must be called before
122  *      any other vm_map routines.
123  *
124  *      Map and entry structures are allocated from the general
125  *      purpose memory pool with some exceptions:
126  *
127  *      - The kernel map and kmem submap are allocated statically.
128  *      - Kernel map entries are allocated out of a static pool.
129  *
130  *      These restrictions are necessary since malloc() uses the
131  *      maps and requires map entries.
132  */
133
134 static uma_zone_t mapentzone;
135 static uma_zone_t kmapentzone;
136 static uma_zone_t mapzone;
137 static uma_zone_t vmspace_zone;
138 static struct vm_object kmapentobj;
139 static void vmspace_zinit(void *mem, int size);
140 static void vmspace_zfini(void *mem, int size);
141 static void vm_map_zinit(void *mem, int size);
142 static void vm_map_zfini(void *mem, int size);
143 static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max);
144
145 #ifdef INVARIANTS
146 static void vm_map_zdtor(void *mem, int size, void *arg);
147 static void vmspace_zdtor(void *mem, int size, void *arg);
148 #endif
149
150 void
151 vm_map_startup(void)
152 {
153         mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
154 #ifdef INVARIANTS
155             vm_map_zdtor,
156 #else
157             NULL,
158 #endif
159             vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
160         uma_prealloc(mapzone, MAX_KMAP);
161         kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry), 
162             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
163         uma_prealloc(kmapentzone, MAX_KMAPENT);
164         mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), 
165             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
166         uma_prealloc(mapentzone, MAX_MAPENT);
167 }
168
169 static void
170 vmspace_zfini(void *mem, int size)
171 {
172         struct vmspace *vm;
173
174         vm = (struct vmspace *)mem;
175
176         vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map));
177 }
178
179 static void
180 vmspace_zinit(void *mem, int size)
181 {
182         struct vmspace *vm;
183
184         vm = (struct vmspace *)mem;
185
186         vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map));
187 }
188
189 static void
190 vm_map_zfini(void *mem, int size)
191 {
192         vm_map_t map;
193
194         GIANT_REQUIRED;
195         map = (vm_map_t)mem;
196
197         lockdestroy(&map->lock);
198 }
199
200 static void
201 vm_map_zinit(void *mem, int size)
202 {
203         vm_map_t map;
204
205         GIANT_REQUIRED;
206
207         map = (vm_map_t)mem;
208         map->nentries = 0;
209         map->size = 0;
210         map->infork = 0;
211         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
212 }
213
214 #ifdef INVARIANTS
215 static void
216 vmspace_zdtor(void *mem, int size, void *arg)
217 {
218         struct vmspace *vm;
219
220         vm = (struct vmspace *)mem;
221
222         vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
223 }
224 static void
225 vm_map_zdtor(void *mem, int size, void *arg)
226 {
227         vm_map_t map;
228
229         map = (vm_map_t)mem;
230         KASSERT(map->nentries == 0,
231             ("map %p nentries == %d on free.", 
232             map, map->nentries));
233         KASSERT(map->size == 0,
234             ("map %p size == %lu on free.",
235             map, (unsigned long)map->size));
236         KASSERT(map->infork == 0,
237             ("map %p infork == %d on free.",
238             map, map->infork));
239 }
240 #endif  /* INVARIANTS */
241
242 /*
243  * Allocate a vmspace structure, including a vm_map and pmap,
244  * and initialize those structures.  The refcnt is set to 1.
245  * The remaining fields must be initialized by the caller.
246  */
247 struct vmspace *
248 vmspace_alloc(min, max)
249         vm_offset_t min, max;
250 {
251         struct vmspace *vm;
252
253         GIANT_REQUIRED;
254         vm = uma_zalloc(vmspace_zone, M_WAITOK);
255         CTR1(KTR_VM, "vmspace_alloc: %p", vm);
256         _vm_map_init(&vm->vm_map, min, max);
257         pmap_pinit(vmspace_pmap(vm));
258         vm->vm_map.pmap = vmspace_pmap(vm);             /* XXX */
259         vm->vm_refcnt = 1;
260         vm->vm_shm = NULL;
261         vm->vm_freer = NULL;
262         return (vm);
263 }
264
265 void
266 vm_init2(void) 
267 {
268         uma_zone_set_obj(kmapentzone, &kmapentobj, cnt.v_page_count / 4);
269         vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
270 #ifdef INVARIANTS
271             vmspace_zdtor,
272 #else
273             NULL,
274 #endif
275             vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
276         pmap_init2();
277         vm_object_init2();
278 }
279
280 static __inline void
281 vmspace_dofree(struct vmspace *vm)
282 {
283         CTR1(KTR_VM, "vmspace_free: %p", vm);
284         /*
285          * Lock the map, to wait out all other references to it.
286          * Delete all of the mappings and pages they hold, then call
287          * the pmap module to reclaim anything left.
288          */
289         vm_map_lock(&vm->vm_map);
290         (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
291             vm->vm_map.max_offset);
292         vm_map_unlock(&vm->vm_map);
293
294         pmap_release(vmspace_pmap(vm));
295         uma_zfree(vmspace_zone, vm);
296 }
297
298 void
299 vmspace_free(struct vmspace *vm)
300 {
301         GIANT_REQUIRED;
302
303         if (vm->vm_refcnt == 0)
304                 panic("vmspace_free: attempt to free already freed vmspace");
305
306         if (--vm->vm_refcnt == 0)
307                 vmspace_dofree(vm);
308 }
309
310 void
311 vmspace_exitfree(struct proc *p)
312 {
313         struct vmspace *vm;
314
315         GIANT_REQUIRED;
316         if (p == p->p_vmspace->vm_freer) {
317                 vm = p->p_vmspace;
318                 p->p_vmspace = NULL;
319                 vmspace_dofree(vm);
320         }
321 }
322
323 /*
324  * vmspace_swap_count() - count the approximate swap useage in pages for a
325  *                        vmspace.
326  *
327  *      Swap useage is determined by taking the proportional swap used by
328  *      VM objects backing the VM map.  To make up for fractional losses,
329  *      if the VM object has any swap use at all the associated map entries
330  *      count for at least 1 swap page.
331  */
332 int
333 vmspace_swap_count(struct vmspace *vmspace)
334 {
335         vm_map_t map = &vmspace->vm_map;
336         vm_map_entry_t cur;
337         int count = 0;
338
339         vm_map_lock_read(map);
340         for (cur = map->header.next; cur != &map->header; cur = cur->next) {
341                 vm_object_t object;
342
343                 if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
344                     (object = cur->object.vm_object) != NULL &&
345                     object->type == OBJT_SWAP
346                 ) {
347                         int n = (cur->end - cur->start) / PAGE_SIZE;
348
349                         if (object->un_pager.swp.swp_bcount) {
350                                 count += object->un_pager.swp.swp_bcount *
351                                     SWAP_META_PAGES * n / object->size + 1;
352                         }
353                 }
354         }
355         vm_map_unlock_read(map);
356         return (count);
357 }
358
359 u_char   
360 vm_map_entry_behavior(struct vm_map_entry *entry)
361 {                  
362         return entry->eflags & MAP_ENTRY_BEHAV_MASK;
363 }
364
365 void
366 vm_map_entry_set_behavior(struct vm_map_entry *entry, u_char behavior)
367 {              
368         entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
369                 (behavior & MAP_ENTRY_BEHAV_MASK);
370 }                       
371
372 void
373 _vm_map_lock(vm_map_t map, const char *file, int line)
374 {
375         vm_map_printf("locking map LK_EXCLUSIVE: %p\n", map);
376         if (lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread) != 0)
377                 panic("vm_map_lock: failed to get lock");
378         map->timestamp++;
379 }
380
381 void
382 _vm_map_unlock(vm_map_t map, const char *file, int line)
383 {
384         vm_map_printf("locking map LK_RELEASE: %p\n", map);
385         lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread);
386 }
387
388 void
389 _vm_map_lock_read(vm_map_t map, const char *file, int line)
390 {
391         vm_map_printf("locking map LK_SHARED: %p\n", map);
392         lockmgr(&(map)->lock, LK_SHARED, NULL, curthread);
393 }
394
395 void
396 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
397 {
398         vm_map_printf("locking map LK_RELEASE: %p\n", map);
399         lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread);
400 }
401
402 int
403 _vm_map_trylock(vm_map_t map, const char *file, int line)
404 {
405
406         return (lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL,
407                     curthread) == 0);
408 }
409
410 static __inline__ int
411 __vm_map_lock_upgrade(vm_map_t map, struct thread *td) {
412         int error;
413
414         vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map); 
415         error = lockmgr(&map->lock, LK_EXCLUPGRADE, NULL, td);
416         if (error == 0)
417                 map->timestamp++;
418         return error;
419 }
420
421 int
422 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
423 {
424     return (__vm_map_lock_upgrade(map, curthread));
425 }
426
427 void
428 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
429 {
430         vm_map_printf("locking map LK_DOWNGRADE: %p\n", map);
431         lockmgr(&map->lock, LK_DOWNGRADE, NULL, curthread);
432 }
433
434 void
435 _vm_map_set_recursive(vm_map_t map, const char *file, int line)
436 {
437         mtx_lock((map)->lock.lk_interlock);
438         map->lock.lk_flags |= LK_CANRECURSE;
439         mtx_unlock((map)->lock.lk_interlock);
440 }
441
442 void
443 _vm_map_clear_recursive(vm_map_t map, const char *file, int line)
444 {
445         mtx_lock((map)->lock.lk_interlock);
446         map->lock.lk_flags &= ~LK_CANRECURSE;
447         mtx_unlock((map)->lock.lk_interlock);
448 }
449
450 struct pmap *
451 vmspace_pmap(struct vmspace *vmspace)
452 {
453         return &vmspace->vm_pmap;
454 }
455
456 long
457 vmspace_resident_count(struct vmspace *vmspace)
458 {
459         return pmap_resident_count(vmspace_pmap(vmspace));
460 }
461
462 /*
463  *      vm_map_create:
464  *
465  *      Creates and returns a new empty VM map with
466  *      the given physical map structure, and having
467  *      the given lower and upper address bounds.
468  */
469 vm_map_t
470 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
471 {
472         vm_map_t result;
473
474         GIANT_REQUIRED;
475
476         result = uma_zalloc(mapzone, M_WAITOK);
477         CTR1(KTR_VM, "vm_map_create: %p", result);
478         _vm_map_init(result, min, max);
479         result->pmap = pmap;
480         return (result);
481 }
482
483 /*
484  * Initialize an existing vm_map structure
485  * such as that in the vmspace structure.
486  * The pmap is set elsewhere.
487  */
488 static void
489 _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
490 {
491
492         map->header.next = map->header.prev = &map->header;
493         map->system_map = 0;
494         map->min_offset = min;
495         map->max_offset = max;
496         map->first_free = &map->header;
497         map->hint = &map->header;
498         map->timestamp = 0;
499 }
500
501 void
502 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
503 {
504         _vm_map_init(map, min, max);
505         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
506 }
507
508 /*
509  *      vm_map_entry_dispose:   [ internal use only ]
510  *
511  *      Inverse of vm_map_entry_create.
512  */
513 static void
514 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
515 {
516         uma_zfree((map->system_map || !mapentzone)
517             ? kmapentzone : mapentzone, entry);
518 }
519
520 /*
521  *      vm_map_entry_create:    [ internal use only ]
522  *
523  *      Allocates a VM map entry for insertion.
524  *      No entry fields are filled in.
525  */
526 static vm_map_entry_t
527 vm_map_entry_create(vm_map_t map)
528 {
529         vm_map_entry_t new_entry;
530
531         new_entry = uma_zalloc((map->system_map || !mapentzone) ? 
532                 kmapentzone : mapentzone, M_WAITOK);
533         if (new_entry == NULL)
534             panic("vm_map_entry_create: kernel resources exhausted");
535         return (new_entry);
536 }
537
538 /*
539  *      vm_map_entry_{un,}link:
540  *
541  *      Insert/remove entries from maps.
542  */
543 static __inline void
544 vm_map_entry_link(vm_map_t map,
545                   vm_map_entry_t after_where,
546                   vm_map_entry_t entry)
547 {
548
549         CTR4(KTR_VM,
550             "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
551             map->nentries, entry, after_where);
552         map->nentries++;
553         entry->prev = after_where;
554         entry->next = after_where->next;
555         entry->next->prev = entry;
556         after_where->next = entry;
557 }
558
559 static __inline void
560 vm_map_entry_unlink(vm_map_t map,
561                     vm_map_entry_t entry)
562 {
563         vm_map_entry_t prev = entry->prev;
564         vm_map_entry_t next = entry->next;
565
566         next->prev = prev;
567         prev->next = next;
568         map->nentries--;
569         CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
570             map->nentries, entry);
571 }
572
573 /*
574  *      SAVE_HINT:
575  *
576  *      Saves the specified entry as the hint for
577  *      future lookups.
578  */
579 #define SAVE_HINT(map,value) \
580                 (map)->hint = (value);
581
582 /*
583  *      vm_map_lookup_entry:    [ internal use only ]
584  *
585  *      Finds the map entry containing (or
586  *      immediately preceding) the specified address
587  *      in the given map; the entry is returned
588  *      in the "entry" parameter.  The boolean
589  *      result indicates whether the address is
590  *      actually contained in the map.
591  */
592 boolean_t
593 vm_map_lookup_entry(
594         vm_map_t map,
595         vm_offset_t address,
596         vm_map_entry_t *entry)  /* OUT */
597 {
598         vm_map_entry_t cur;
599         vm_map_entry_t last;
600
601         GIANT_REQUIRED;
602         /*
603          * Start looking either from the head of the list, or from the hint.
604          */
605         cur = map->hint;
606
607         if (cur == &map->header)
608                 cur = cur->next;
609
610         if (address >= cur->start) {
611                 /*
612                  * Go from hint to end of list.
613                  *
614                  * But first, make a quick check to see if we are already looking
615                  * at the entry we want (which is usually the case). Note also
616                  * that we don't need to save the hint here... it is the same
617                  * hint (unless we are at the header, in which case the hint
618                  * didn't buy us anything anyway).
619                  */
620                 last = &map->header;
621                 if ((cur != last) && (cur->end > address)) {
622                         *entry = cur;
623                         return (TRUE);
624                 }
625         } else {
626                 /*
627                  * Go from start to hint, *inclusively*
628                  */
629                 last = cur->next;
630                 cur = map->header.next;
631         }
632
633         /*
634          * Search linearly
635          */
636         while (cur != last) {
637                 if (cur->end > address) {
638                         if (address >= cur->start) {
639                                 /*
640                                  * Save this lookup for future hints, and
641                                  * return
642                                  */
643                                 *entry = cur;
644                                 SAVE_HINT(map, cur);
645                                 return (TRUE);
646                         }
647                         break;
648                 }
649                 cur = cur->next;
650         }
651         *entry = cur->prev;
652         SAVE_HINT(map, *entry);
653         return (FALSE);
654 }
655
656 /*
657  *      vm_map_insert:
658  *
659  *      Inserts the given whole VM object into the target
660  *      map at the specified address range.  The object's
661  *      size should match that of the address range.
662  *
663  *      Requires that the map be locked, and leaves it so.
664  *
665  *      If object is non-NULL, ref count must be bumped by caller
666  *      prior to making call to account for the new entry.
667  */
668 int
669 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
670               vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
671               int cow)
672 {
673         vm_map_entry_t new_entry;
674         vm_map_entry_t prev_entry;
675         vm_map_entry_t temp_entry;
676         vm_eflags_t protoeflags;
677
678         GIANT_REQUIRED;
679
680         /*
681          * Check that the start and end points are not bogus.
682          */
683         if ((start < map->min_offset) || (end > map->max_offset) ||
684             (start >= end))
685                 return (KERN_INVALID_ADDRESS);
686
687         /*
688          * Find the entry prior to the proposed starting address; if it's part
689          * of an existing entry, this range is bogus.
690          */
691         if (vm_map_lookup_entry(map, start, &temp_entry))
692                 return (KERN_NO_SPACE);
693
694         prev_entry = temp_entry;
695
696         /*
697          * Assert that the next entry doesn't overlap the end point.
698          */
699         if ((prev_entry->next != &map->header) &&
700             (prev_entry->next->start < end))
701                 return (KERN_NO_SPACE);
702
703         protoeflags = 0;
704
705         if (cow & MAP_COPY_ON_WRITE)
706                 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
707
708         if (cow & MAP_NOFAULT) {
709                 protoeflags |= MAP_ENTRY_NOFAULT;
710
711                 KASSERT(object == NULL,
712                         ("vm_map_insert: paradoxical MAP_NOFAULT request"));
713         }
714         if (cow & MAP_DISABLE_SYNCER)
715                 protoeflags |= MAP_ENTRY_NOSYNC;
716         if (cow & MAP_DISABLE_COREDUMP)
717                 protoeflags |= MAP_ENTRY_NOCOREDUMP;
718
719         if (object) {
720                 /*
721                  * When object is non-NULL, it could be shared with another
722                  * process.  We have to set or clear OBJ_ONEMAPPING 
723                  * appropriately.
724                  */
725                 if ((object->ref_count > 1) || (object->shadow_count != 0)) {
726                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
727                 }
728         }
729         else if ((prev_entry != &map->header) &&
730                  (prev_entry->eflags == protoeflags) &&
731                  (prev_entry->end == start) &&
732                  (prev_entry->wired_count == 0) &&
733                  ((prev_entry->object.vm_object == NULL) ||
734                   vm_object_coalesce(prev_entry->object.vm_object,
735                                      OFF_TO_IDX(prev_entry->offset),
736                                      (vm_size_t)(prev_entry->end - prev_entry->start),
737                                      (vm_size_t)(end - prev_entry->end)))) {
738                 /*
739                  * We were able to extend the object.  Determine if we
740                  * can extend the previous map entry to include the 
741                  * new range as well.
742                  */
743                 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
744                     (prev_entry->protection == prot) &&
745                     (prev_entry->max_protection == max)) {
746                         map->size += (end - prev_entry->end);
747                         prev_entry->end = end;
748                         vm_map_simplify_entry(map, prev_entry);
749                         return (KERN_SUCCESS);
750                 }
751
752                 /*
753                  * If we can extend the object but cannot extend the
754                  * map entry, we have to create a new map entry.  We
755                  * must bump the ref count on the extended object to
756                  * account for it.  object may be NULL.
757                  */
758                 object = prev_entry->object.vm_object;
759                 offset = prev_entry->offset +
760                         (prev_entry->end - prev_entry->start);
761                 vm_object_reference(object);
762         }
763
764         /*
765          * NOTE: if conditionals fail, object can be NULL here.  This occurs
766          * in things like the buffer map where we manage kva but do not manage
767          * backing objects.
768          */
769
770         /*
771          * Create a new entry
772          */
773         new_entry = vm_map_entry_create(map);
774         new_entry->start = start;
775         new_entry->end = end;
776
777         new_entry->eflags = protoeflags;
778         new_entry->object.vm_object = object;
779         new_entry->offset = offset;
780         new_entry->avail_ssize = 0;
781
782         new_entry->inheritance = VM_INHERIT_DEFAULT;
783         new_entry->protection = prot;
784         new_entry->max_protection = max;
785         new_entry->wired_count = 0;
786
787         /*
788          * Insert the new entry into the list
789          */
790         vm_map_entry_link(map, prev_entry, new_entry);
791         map->size += new_entry->end - new_entry->start;
792
793         /*
794          * Update the free space hint
795          */
796         if ((map->first_free == prev_entry) &&
797             (prev_entry->end >= new_entry->start)) {
798                 map->first_free = new_entry;
799         }
800
801 #if 0
802         /*
803          * Temporarily removed to avoid MAP_STACK panic, due to
804          * MAP_STACK being a huge hack.  Will be added back in
805          * when MAP_STACK (and the user stack mapping) is fixed.
806          */
807         /*
808          * It may be possible to simplify the entry
809          */
810         vm_map_simplify_entry(map, new_entry);
811 #endif
812
813         if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
814                 pmap_object_init_pt(map->pmap, start,
815                                     object, OFF_TO_IDX(offset), end - start,
816                                     cow & MAP_PREFAULT_PARTIAL);
817         }
818
819         return (KERN_SUCCESS);
820 }
821
822 /*
823  * Find sufficient space for `length' bytes in the given map, starting at
824  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
825  */
826 int
827 vm_map_findspace(
828         vm_map_t map,
829         vm_offset_t start,
830         vm_size_t length,
831         vm_offset_t *addr)
832 {
833         vm_map_entry_t entry, next;
834         vm_offset_t end;
835
836         GIANT_REQUIRED;
837         if (start < map->min_offset)
838                 start = map->min_offset;
839         if (start > map->max_offset)
840                 return (1);
841
842         /*
843          * Look for the first possible address; if there's already something
844          * at this address, we have to start after it.
845          */
846         if (start == map->min_offset) {
847                 if ((entry = map->first_free) != &map->header)
848                         start = entry->end;
849         } else {
850                 vm_map_entry_t tmp;
851
852                 if (vm_map_lookup_entry(map, start, &tmp))
853                         start = tmp->end;
854                 entry = tmp;
855         }
856
857         /*
858          * Look through the rest of the map, trying to fit a new region in the
859          * gap between existing regions, or after the very last region.
860          */
861         for (;; start = (entry = next)->end) {
862                 /*
863                  * Find the end of the proposed new region.  Be sure we didn't
864                  * go beyond the end of the map, or wrap around the address;
865                  * if so, we lose.  Otherwise, if this is the last entry, or
866                  * if the proposed new region fits before the next entry, we
867                  * win.
868                  */
869                 end = start + length;
870                 if (end > map->max_offset || end < start)
871                         return (1);
872                 next = entry->next;
873                 if (next == &map->header || next->start >= end)
874                         break;
875         }
876         SAVE_HINT(map, entry);
877         *addr = start;
878         if (map == kernel_map) {
879                 vm_offset_t ksize;
880                 if ((ksize = round_page(start + length)) > kernel_vm_end) {
881                         pmap_growkernel(ksize);
882                 }
883         }
884         return (0);
885 }
886
887 /*
888  *      vm_map_find finds an unallocated region in the target address
889  *      map with the given length.  The search is defined to be
890  *      first-fit from the specified address; the region found is
891  *      returned in the same parameter.
892  *
893  *      If object is non-NULL, ref count must be bumped by caller
894  *      prior to making call to account for the new entry.
895  */
896 int
897 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
898             vm_offset_t *addr,  /* IN/OUT */
899             vm_size_t length, boolean_t find_space, vm_prot_t prot,
900             vm_prot_t max, int cow)
901 {
902         vm_offset_t start;
903         int result, s = 0;
904
905         GIANT_REQUIRED;
906
907         start = *addr;
908
909         if (map == kmem_map)
910                 s = splvm();
911
912         vm_map_lock(map);
913         if (find_space) {
914                 if (vm_map_findspace(map, start, length, addr)) {
915                         vm_map_unlock(map);
916                         if (map == kmem_map)
917                                 splx(s);
918                         return (KERN_NO_SPACE);
919                 }
920                 start = *addr;
921         }
922         result = vm_map_insert(map, object, offset,
923                 start, start + length, prot, max, cow);
924         vm_map_unlock(map);
925
926         if (map == kmem_map)
927                 splx(s);
928
929         return (result);
930 }
931
932 /*
933  *      vm_map_simplify_entry:
934  *
935  *      Simplify the given map entry by merging with either neighbor.  This
936  *      routine also has the ability to merge with both neighbors.
937  *
938  *      The map must be locked.
939  *
940  *      This routine guarentees that the passed entry remains valid (though
941  *      possibly extended).  When merging, this routine may delete one or
942  *      both neighbors.
943  */
944 void
945 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
946 {
947         vm_map_entry_t next, prev;
948         vm_size_t prevsize, esize;
949
950         GIANT_REQUIRED;
951
952         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
953                 return;
954
955         prev = entry->prev;
956         if (prev != &map->header) {
957                 prevsize = prev->end - prev->start;
958                 if ( (prev->end == entry->start) &&
959                      (prev->object.vm_object == entry->object.vm_object) &&
960                      (!prev->object.vm_object ||
961                         (prev->offset + prevsize == entry->offset)) &&
962                      (prev->eflags == entry->eflags) &&
963                      (prev->protection == entry->protection) &&
964                      (prev->max_protection == entry->max_protection) &&
965                      (prev->inheritance == entry->inheritance) &&
966                      (prev->wired_count == entry->wired_count)) {
967                         if (map->first_free == prev)
968                                 map->first_free = entry;
969                         if (map->hint == prev)
970                                 map->hint = entry;
971                         vm_map_entry_unlink(map, prev);
972                         entry->start = prev->start;
973                         entry->offset = prev->offset;
974                         if (prev->object.vm_object)
975                                 vm_object_deallocate(prev->object.vm_object);
976                         vm_map_entry_dispose(map, prev);
977                 }
978         }
979
980         next = entry->next;
981         if (next != &map->header) {
982                 esize = entry->end - entry->start;
983                 if ((entry->end == next->start) &&
984                     (next->object.vm_object == entry->object.vm_object) &&
985                      (!entry->object.vm_object ||
986                         (entry->offset + esize == next->offset)) &&
987                     (next->eflags == entry->eflags) &&
988                     (next->protection == entry->protection) &&
989                     (next->max_protection == entry->max_protection) &&
990                     (next->inheritance == entry->inheritance) &&
991                     (next->wired_count == entry->wired_count)) {
992                         if (map->first_free == next)
993                                 map->first_free = entry;
994                         if (map->hint == next)
995                                 map->hint = entry;
996                         vm_map_entry_unlink(map, next);
997                         entry->end = next->end;
998                         if (next->object.vm_object)
999                                 vm_object_deallocate(next->object.vm_object);
1000                         vm_map_entry_dispose(map, next);
1001                 }
1002         }
1003 }
1004 /*
1005  *      vm_map_clip_start:      [ internal use only ]
1006  *
1007  *      Asserts that the given entry begins at or after
1008  *      the specified address; if necessary,
1009  *      it splits the entry into two.
1010  */
1011 #define vm_map_clip_start(map, entry, startaddr) \
1012 { \
1013         if (startaddr > entry->start) \
1014                 _vm_map_clip_start(map, entry, startaddr); \
1015 }
1016
1017 /*
1018  *      This routine is called only when it is known that
1019  *      the entry must be split.
1020  */
1021 static void
1022 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
1023 {
1024         vm_map_entry_t new_entry;
1025
1026         /*
1027          * Split off the front portion -- note that we must insert the new
1028          * entry BEFORE this one, so that this entry has the specified
1029          * starting address.
1030          */
1031         vm_map_simplify_entry(map, entry);
1032
1033         /*
1034          * If there is no object backing this entry, we might as well create
1035          * one now.  If we defer it, an object can get created after the map
1036          * is clipped, and individual objects will be created for the split-up
1037          * map.  This is a bit of a hack, but is also about the best place to
1038          * put this improvement.
1039          */
1040         if (entry->object.vm_object == NULL && !map->system_map) {
1041                 vm_object_t object;
1042                 object = vm_object_allocate(OBJT_DEFAULT,
1043                                 atop(entry->end - entry->start));
1044                 entry->object.vm_object = object;
1045                 entry->offset = 0;
1046         }
1047
1048         new_entry = vm_map_entry_create(map);
1049         *new_entry = *entry;
1050
1051         new_entry->end = start;
1052         entry->offset += (start - entry->start);
1053         entry->start = start;
1054
1055         vm_map_entry_link(map, entry->prev, new_entry);
1056
1057         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1058                 vm_object_reference(new_entry->object.vm_object);
1059         }
1060 }
1061
1062 /*
1063  *      vm_map_clip_end:        [ internal use only ]
1064  *
1065  *      Asserts that the given entry ends at or before
1066  *      the specified address; if necessary,
1067  *      it splits the entry into two.
1068  */
1069 #define vm_map_clip_end(map, entry, endaddr) \
1070 { \
1071         if (endaddr < entry->end) \
1072                 _vm_map_clip_end(map, entry, endaddr); \
1073 }
1074
1075 /*
1076  *      This routine is called only when it is known that
1077  *      the entry must be split.
1078  */
1079 static void
1080 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1081 {
1082         vm_map_entry_t new_entry;
1083
1084         /*
1085          * If there is no object backing this entry, we might as well create
1086          * one now.  If we defer it, an object can get created after the map
1087          * is clipped, and individual objects will be created for the split-up
1088          * map.  This is a bit of a hack, but is also about the best place to
1089          * put this improvement.
1090          */
1091         if (entry->object.vm_object == NULL && !map->system_map) {
1092                 vm_object_t object;
1093                 object = vm_object_allocate(OBJT_DEFAULT,
1094                                 atop(entry->end - entry->start));
1095                 entry->object.vm_object = object;
1096                 entry->offset = 0;
1097         }
1098
1099         /*
1100          * Create a new entry and insert it AFTER the specified entry
1101          */
1102         new_entry = vm_map_entry_create(map);
1103         *new_entry = *entry;
1104
1105         new_entry->start = entry->end = end;
1106         new_entry->offset += (end - entry->start);
1107
1108         vm_map_entry_link(map, entry, new_entry);
1109
1110         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1111                 vm_object_reference(new_entry->object.vm_object);
1112         }
1113 }
1114
1115 /*
1116  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
1117  *
1118  *      Asserts that the starting and ending region
1119  *      addresses fall within the valid range of the map.
1120  */
1121 #define VM_MAP_RANGE_CHECK(map, start, end)             \
1122                 {                                       \
1123                 if (start < vm_map_min(map))            \
1124                         start = vm_map_min(map);        \
1125                 if (end > vm_map_max(map))              \
1126                         end = vm_map_max(map);          \
1127                 if (start > end)                        \
1128                         start = end;                    \
1129                 }
1130
1131 /*
1132  *      vm_map_submap:          [ kernel use only ]
1133  *
1134  *      Mark the given range as handled by a subordinate map.
1135  *
1136  *      This range must have been created with vm_map_find,
1137  *      and no other operations may have been performed on this
1138  *      range prior to calling vm_map_submap.
1139  *
1140  *      Only a limited number of operations can be performed
1141  *      within this rage after calling vm_map_submap:
1142  *              vm_fault
1143  *      [Don't try vm_map_copy!]
1144  *
1145  *      To remove a submapping, one must first remove the
1146  *      range from the superior map, and then destroy the
1147  *      submap (if desired).  [Better yet, don't try it.]
1148  */
1149 int
1150 vm_map_submap(
1151         vm_map_t map,
1152         vm_offset_t start,
1153         vm_offset_t end,
1154         vm_map_t submap)
1155 {
1156         vm_map_entry_t entry;
1157         int result = KERN_INVALID_ARGUMENT;
1158
1159         GIANT_REQUIRED;
1160
1161         vm_map_lock(map);
1162
1163         VM_MAP_RANGE_CHECK(map, start, end);
1164
1165         if (vm_map_lookup_entry(map, start, &entry)) {
1166                 vm_map_clip_start(map, entry, start);
1167         } else
1168                 entry = entry->next;
1169
1170         vm_map_clip_end(map, entry, end);
1171
1172         if ((entry->start == start) && (entry->end == end) &&
1173             ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1174             (entry->object.vm_object == NULL)) {
1175                 entry->object.sub_map = submap;
1176                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1177                 result = KERN_SUCCESS;
1178         }
1179         vm_map_unlock(map);
1180
1181         return (result);
1182 }
1183
1184 /*
1185  *      vm_map_protect:
1186  *
1187  *      Sets the protection of the specified address
1188  *      region in the target map.  If "set_max" is
1189  *      specified, the maximum protection is to be set;
1190  *      otherwise, only the current protection is affected.
1191  */
1192 int
1193 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1194                vm_prot_t new_prot, boolean_t set_max)
1195 {
1196         vm_map_entry_t current;
1197         vm_map_entry_t entry;
1198
1199         GIANT_REQUIRED;
1200         vm_map_lock(map);
1201
1202         VM_MAP_RANGE_CHECK(map, start, end);
1203
1204         if (vm_map_lookup_entry(map, start, &entry)) {
1205                 vm_map_clip_start(map, entry, start);
1206         } else {
1207                 entry = entry->next;
1208         }
1209
1210         /*
1211          * Make a first pass to check for protection violations.
1212          */
1213         current = entry;
1214         while ((current != &map->header) && (current->start < end)) {
1215                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1216                         vm_map_unlock(map);
1217                         return (KERN_INVALID_ARGUMENT);
1218                 }
1219                 if ((new_prot & current->max_protection) != new_prot) {
1220                         vm_map_unlock(map);
1221                         return (KERN_PROTECTION_FAILURE);
1222                 }
1223                 current = current->next;
1224         }
1225
1226         /*
1227          * Go back and fix up protections. [Note that clipping is not
1228          * necessary the second time.]
1229          */
1230         current = entry;
1231         while ((current != &map->header) && (current->start < end)) {
1232                 vm_prot_t old_prot;
1233
1234                 vm_map_clip_end(map, current, end);
1235
1236                 old_prot = current->protection;
1237                 if (set_max)
1238                         current->protection =
1239                             (current->max_protection = new_prot) &
1240                             old_prot;
1241                 else
1242                         current->protection = new_prot;
1243
1244                 /*
1245                  * Update physical map if necessary. Worry about copy-on-write
1246                  * here -- CHECK THIS XXX
1247                  */
1248                 if (current->protection != old_prot) {
1249 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1250                                                         VM_PROT_ALL)
1251                         pmap_protect(map->pmap, current->start,
1252                             current->end,
1253                             current->protection & MASK(current));
1254 #undef  MASK
1255                 }
1256                 vm_map_simplify_entry(map, current);
1257                 current = current->next;
1258         }
1259         vm_map_unlock(map);
1260         return (KERN_SUCCESS);
1261 }
1262
1263 /*
1264  *      vm_map_madvise:
1265  *
1266  *      This routine traverses a processes map handling the madvise
1267  *      system call.  Advisories are classified as either those effecting
1268  *      the vm_map_entry structure, or those effecting the underlying 
1269  *      objects.
1270  */
1271 int
1272 vm_map_madvise(
1273         vm_map_t map,
1274         vm_offset_t start, 
1275         vm_offset_t end,
1276         int behav)
1277 {
1278         vm_map_entry_t current, entry;
1279         int modify_map = 0;
1280
1281         GIANT_REQUIRED;
1282
1283         /*
1284          * Some madvise calls directly modify the vm_map_entry, in which case
1285          * we need to use an exclusive lock on the map and we need to perform 
1286          * various clipping operations.  Otherwise we only need a read-lock
1287          * on the map.
1288          */
1289         switch(behav) {
1290         case MADV_NORMAL:
1291         case MADV_SEQUENTIAL:
1292         case MADV_RANDOM:
1293         case MADV_NOSYNC:
1294         case MADV_AUTOSYNC:
1295         case MADV_NOCORE:
1296         case MADV_CORE:
1297                 modify_map = 1;
1298                 vm_map_lock(map);
1299                 break;
1300         case MADV_WILLNEED:
1301         case MADV_DONTNEED:
1302         case MADV_FREE:
1303                 vm_map_lock_read(map);
1304                 break;
1305         default:
1306                 return (KERN_INVALID_ARGUMENT);
1307         }
1308
1309         /*
1310          * Locate starting entry and clip if necessary.
1311          */
1312         VM_MAP_RANGE_CHECK(map, start, end);
1313
1314         if (vm_map_lookup_entry(map, start, &entry)) {
1315                 if (modify_map)
1316                         vm_map_clip_start(map, entry, start);
1317         } else {
1318                 entry = entry->next;
1319         }
1320
1321         if (modify_map) {
1322                 /*
1323                  * madvise behaviors that are implemented in the vm_map_entry.
1324                  *
1325                  * We clip the vm_map_entry so that behavioral changes are
1326                  * limited to the specified address range.
1327                  */
1328                 for (current = entry;
1329                      (current != &map->header) && (current->start < end);
1330                      current = current->next
1331                 ) {
1332                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1333                                 continue;
1334
1335                         vm_map_clip_end(map, current, end);
1336
1337                         switch (behav) {
1338                         case MADV_NORMAL:
1339                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1340                                 break;
1341                         case MADV_SEQUENTIAL:
1342                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1343                                 break;
1344                         case MADV_RANDOM:
1345                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1346                                 break;
1347                         case MADV_NOSYNC:
1348                                 current->eflags |= MAP_ENTRY_NOSYNC;
1349                                 break;
1350                         case MADV_AUTOSYNC:
1351                                 current->eflags &= ~MAP_ENTRY_NOSYNC;
1352                                 break;
1353                         case MADV_NOCORE:
1354                                 current->eflags |= MAP_ENTRY_NOCOREDUMP;
1355                                 break;
1356                         case MADV_CORE:
1357                                 current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1358                                 break;
1359                         default:
1360                                 break;
1361                         }
1362                         vm_map_simplify_entry(map, current);
1363                 }
1364                 vm_map_unlock(map);
1365         } else {
1366                 vm_pindex_t pindex;
1367                 int count;
1368
1369                 /*
1370                  * madvise behaviors that are implemented in the underlying
1371                  * vm_object.
1372                  *
1373                  * Since we don't clip the vm_map_entry, we have to clip
1374                  * the vm_object pindex and count.
1375                  */
1376                 for (current = entry;
1377                      (current != &map->header) && (current->start < end);
1378                      current = current->next
1379                 ) {
1380                         vm_offset_t useStart;
1381
1382                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1383                                 continue;
1384
1385                         pindex = OFF_TO_IDX(current->offset);
1386                         count = atop(current->end - current->start);
1387                         useStart = current->start;
1388
1389                         if (current->start < start) {
1390                                 pindex += atop(start - current->start);
1391                                 count -= atop(start - current->start);
1392                                 useStart = start;
1393                         }
1394                         if (current->end > end)
1395                                 count -= atop(current->end - end);
1396
1397                         if (count <= 0)
1398                                 continue;
1399
1400                         vm_object_madvise(current->object.vm_object,
1401                                           pindex, count, behav);
1402                         if (behav == MADV_WILLNEED) {
1403                                 pmap_object_init_pt(
1404                                     map->pmap, 
1405                                     useStart,
1406                                     current->object.vm_object,
1407                                     pindex, 
1408                                     (count << PAGE_SHIFT),
1409                                     MAP_PREFAULT_MADVISE
1410                                 );
1411                         }
1412                 }
1413                 vm_map_unlock_read(map);
1414         }
1415         return (0);
1416 }       
1417
1418
1419 /*
1420  *      vm_map_inherit:
1421  *
1422  *      Sets the inheritance of the specified address
1423  *      range in the target map.  Inheritance
1424  *      affects how the map will be shared with
1425  *      child maps at the time of vm_map_fork.
1426  */
1427 int
1428 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1429                vm_inherit_t new_inheritance)
1430 {
1431         vm_map_entry_t entry;
1432         vm_map_entry_t temp_entry;
1433
1434         GIANT_REQUIRED;
1435
1436         switch (new_inheritance) {
1437         case VM_INHERIT_NONE:
1438         case VM_INHERIT_COPY:
1439         case VM_INHERIT_SHARE:
1440                 break;
1441         default:
1442                 return (KERN_INVALID_ARGUMENT);
1443         }
1444
1445         vm_map_lock(map);
1446
1447         VM_MAP_RANGE_CHECK(map, start, end);
1448
1449         if (vm_map_lookup_entry(map, start, &temp_entry)) {
1450                 entry = temp_entry;
1451                 vm_map_clip_start(map, entry, start);
1452         } else
1453                 entry = temp_entry->next;
1454
1455         while ((entry != &map->header) && (entry->start < end)) {
1456                 vm_map_clip_end(map, entry, end);
1457
1458                 entry->inheritance = new_inheritance;
1459
1460                 vm_map_simplify_entry(map, entry);
1461
1462                 entry = entry->next;
1463         }
1464
1465         vm_map_unlock(map);
1466         return (KERN_SUCCESS);
1467 }
1468
1469 /*
1470  * Implement the semantics of mlock
1471  */
1472 int
1473 vm_map_user_pageable(
1474         vm_map_t map,
1475         vm_offset_t start,
1476         vm_offset_t end,
1477         boolean_t new_pageable)
1478 {
1479         vm_map_entry_t entry;
1480         vm_map_entry_t start_entry;
1481         vm_offset_t estart;
1482         vm_offset_t eend;
1483         int rv;
1484
1485         vm_map_lock(map);
1486         VM_MAP_RANGE_CHECK(map, start, end);
1487
1488         if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1489                 vm_map_unlock(map);
1490                 return (KERN_INVALID_ADDRESS);
1491         }
1492
1493         if (new_pageable) {
1494
1495                 entry = start_entry;
1496                 vm_map_clip_start(map, entry, start);
1497
1498                 /*
1499                  * Now decrement the wiring count for each region. If a region
1500                  * becomes completely unwired, unwire its physical pages and
1501                  * mappings.
1502                  */
1503                 while ((entry != &map->header) && (entry->start < end)) {
1504                         if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1505                                 vm_map_clip_end(map, entry, end);
1506                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1507                                 entry->wired_count--;
1508                                 if (entry->wired_count == 0)
1509                                         vm_fault_unwire(map, entry->start, entry->end);
1510                         }
1511                         vm_map_simplify_entry(map,entry);
1512                         entry = entry->next;
1513                 }
1514         } else {
1515
1516                 entry = start_entry;
1517
1518                 while ((entry != &map->header) && (entry->start < end)) {
1519
1520                         if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1521                                 entry = entry->next;
1522                                 continue;
1523                         }
1524                         
1525                         if (entry->wired_count != 0) {
1526                                 entry->wired_count++;
1527                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
1528                                 entry = entry->next;
1529                                 continue;
1530                         }
1531
1532                         /* Here on entry being newly wired */
1533
1534                         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1535                                 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1536                                 if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
1537
1538                                         vm_object_shadow(&entry->object.vm_object,
1539                                             &entry->offset,
1540                                             atop(entry->end - entry->start));
1541                                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1542
1543                                 } else if (entry->object.vm_object == NULL &&
1544                                            !map->system_map) {
1545
1546                                         entry->object.vm_object =
1547                                             vm_object_allocate(OBJT_DEFAULT,
1548                                                 atop(entry->end - entry->start));
1549                                         entry->offset = (vm_offset_t) 0;
1550
1551                                 }
1552                         }
1553
1554                         vm_map_clip_start(map, entry, start);
1555                         vm_map_clip_end(map, entry, end);
1556
1557                         entry->wired_count++;
1558                         entry->eflags |= MAP_ENTRY_USER_WIRED;
1559                         estart = entry->start;
1560                         eend = entry->end;
1561
1562                         /* First we need to allow map modifications */
1563                         vm_map_set_recursive(map);
1564                         vm_map_lock_downgrade(map);
1565                         map->timestamp++;
1566
1567                         rv = vm_fault_user_wire(map, entry->start, entry->end);
1568                         if (rv) {
1569
1570                                 entry->wired_count--;
1571                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1572
1573                                 vm_map_clear_recursive(map);
1574                                 vm_map_unlock(map);
1575                         
1576                                 /*
1577                                  * At this point, the map is unlocked, and
1578                                  * entry might no longer be valid.  Use copy
1579                                  * of entry start value obtained while entry
1580                                  * was valid.
1581                                  */
1582                                 (void) vm_map_user_pageable(map, start, estart,
1583                                                             TRUE);
1584                                 return rv;
1585                         }
1586
1587                         vm_map_clear_recursive(map);
1588                         if (vm_map_lock_upgrade(map)) {
1589                                 vm_map_lock(map);
1590                                 if (vm_map_lookup_entry(map, estart, &entry) 
1591                                     == FALSE) {
1592                                         vm_map_unlock(map);
1593                                         /* 
1594                                          * vm_fault_user_wire succeded, thus
1595                                          * the area between start and eend
1596                                          * is wired and has to be unwired
1597                                          * here as part of the cleanup.
1598                                          */
1599                                         (void) vm_map_user_pageable(map,
1600                                                                     start,
1601                                                                     eend,
1602                                                                     TRUE);
1603                                         return (KERN_INVALID_ADDRESS);
1604                                 }
1605                         }
1606                         vm_map_simplify_entry(map,entry);
1607                 }
1608         }
1609         map->timestamp++;
1610         vm_map_unlock(map);
1611         return KERN_SUCCESS;
1612 }
1613
1614 /*
1615  *      vm_map_pageable:
1616  *
1617  *      Sets the pageability of the specified address
1618  *      range in the target map.  Regions specified
1619  *      as not pageable require locked-down physical
1620  *      memory and physical page maps.
1621  *
1622  *      The map must not be locked, but a reference
1623  *      must remain to the map throughout the call.
1624  */
1625 int
1626 vm_map_pageable(
1627         vm_map_t map,
1628         vm_offset_t start,
1629         vm_offset_t end,
1630         boolean_t new_pageable)
1631 {
1632         vm_map_entry_t entry;
1633         vm_map_entry_t start_entry;
1634         vm_offset_t failed = 0;
1635         int rv;
1636
1637         GIANT_REQUIRED;
1638
1639         vm_map_lock(map);
1640
1641         VM_MAP_RANGE_CHECK(map, start, end);
1642
1643         /*
1644          * Only one pageability change may take place at one time, since
1645          * vm_fault assumes it will be called only once for each
1646          * wiring/unwiring.  Therefore, we have to make sure we're actually
1647          * changing the pageability for the entire region.  We do so before
1648          * making any changes.
1649          */
1650         if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1651                 vm_map_unlock(map);
1652                 return (KERN_INVALID_ADDRESS);
1653         }
1654         entry = start_entry;
1655
1656         /*
1657          * Actions are rather different for wiring and unwiring, so we have
1658          * two separate cases.
1659          */
1660         if (new_pageable) {
1661                 vm_map_clip_start(map, entry, start);
1662
1663                 /*
1664                  * Unwiring.  First ensure that the range to be unwired is
1665                  * really wired down and that there are no holes.
1666                  */
1667                 while ((entry != &map->header) && (entry->start < end)) {
1668                         if (entry->wired_count == 0 ||
1669                             (entry->end < end &&
1670                                 (entry->next == &map->header ||
1671                                     entry->next->start > entry->end))) {
1672                                 vm_map_unlock(map);
1673                                 return (KERN_INVALID_ARGUMENT);
1674                         }
1675                         entry = entry->next;
1676                 }
1677
1678                 /*
1679                  * Now decrement the wiring count for each region. If a region
1680                  * becomes completely unwired, unwire its physical pages and
1681                  * mappings.
1682                  */
1683                 entry = start_entry;
1684                 while ((entry != &map->header) && (entry->start < end)) {
1685                         vm_map_clip_end(map, entry, end);
1686
1687                         entry->wired_count--;
1688                         if (entry->wired_count == 0)
1689                                 vm_fault_unwire(map, entry->start, entry->end);
1690
1691                         vm_map_simplify_entry(map, entry);
1692
1693                         entry = entry->next;
1694                 }
1695         } else {
1696                 /*
1697                  * Wiring.  We must do this in two passes:
1698                  *
1699                  * 1.  Holding the write lock, we create any shadow or zero-fill
1700                  * objects that need to be created. Then we clip each map
1701                  * entry to the region to be wired and increment its wiring
1702                  * count.  We create objects before clipping the map entries
1703                  * to avoid object proliferation.
1704                  *
1705                  * 2.  We downgrade to a read lock, and call vm_fault_wire to
1706                  * fault in the pages for any newly wired area (wired_count is
1707                  * 1).
1708                  *
1709                  * Downgrading to a read lock for vm_fault_wire avoids a possible
1710                  * deadlock with another process that may have faulted on one
1711                  * of the pages to be wired (it would mark the page busy,
1712                  * blocking us, then in turn block on the map lock that we
1713                  * hold).  Because of problems in the recursive lock package,
1714                  * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
1715                  * any actions that require the write lock must be done
1716                  * beforehand.  Because we keep the read lock on the map, the
1717                  * copy-on-write status of the entries we modify here cannot
1718                  * change.
1719                  */
1720
1721                 /*
1722                  * Pass 1.
1723                  */
1724                 while ((entry != &map->header) && (entry->start < end)) {
1725                         if (entry->wired_count == 0) {
1726
1727                                 /*
1728                                  * Perform actions of vm_map_lookup that need
1729                                  * the write lock on the map: create a shadow
1730                                  * object for a copy-on-write region, or an
1731                                  * object for a zero-fill region.
1732                                  *
1733                                  * We don't have to do this for entries that
1734                                  * point to sub maps, because we won't
1735                                  * hold the lock on the sub map.
1736                                  */
1737                                 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1738                                         int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1739                                         if (copyflag &&
1740                                             ((entry->protection & VM_PROT_WRITE) != 0)) {
1741
1742                                                 vm_object_shadow(&entry->object.vm_object,
1743                                                     &entry->offset,
1744                                                     atop(entry->end - entry->start));
1745                                                 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1746                                         } else if (entry->object.vm_object == NULL &&
1747                                                    !map->system_map) {
1748                                                 entry->object.vm_object =
1749                                                     vm_object_allocate(OBJT_DEFAULT,
1750                                                         atop(entry->end - entry->start));
1751                                                 entry->offset = (vm_offset_t) 0;
1752                                         }
1753                                 }
1754                         }
1755                         vm_map_clip_start(map, entry, start);
1756                         vm_map_clip_end(map, entry, end);
1757                         entry->wired_count++;
1758
1759                         /*
1760                          * Check for holes
1761                          */
1762                         if (entry->end < end &&
1763                             (entry->next == &map->header ||
1764                                 entry->next->start > entry->end)) {
1765                                 /*
1766                                  * Found one.  Object creation actions do not
1767                                  * need to be undone, but the wired counts
1768                                  * need to be restored.
1769                                  */
1770                                 while (entry != &map->header && entry->end > start) {
1771                                         entry->wired_count--;
1772                                         entry = entry->prev;
1773                                 }
1774                                 vm_map_unlock(map);
1775                                 return (KERN_INVALID_ARGUMENT);
1776                         }
1777                         entry = entry->next;
1778                 }
1779
1780                 /*
1781                  * Pass 2.
1782                  */
1783
1784                 /*
1785                  * HACK HACK HACK HACK
1786                  *
1787                  * If we are wiring in the kernel map or a submap of it,
1788                  * unlock the map to avoid deadlocks.  We trust that the
1789                  * kernel is well-behaved, and therefore will not do
1790                  * anything destructive to this region of the map while
1791                  * we have it unlocked.  We cannot trust user processes
1792                  * to do the same.
1793                  *
1794                  * HACK HACK HACK HACK
1795                  */
1796                 if (vm_map_pmap(map) == kernel_pmap) {
1797                         vm_map_unlock(map);     /* trust me ... */
1798                 } else {
1799                         vm_map_lock_downgrade(map);
1800                 }
1801
1802                 rv = 0;
1803                 entry = start_entry;
1804                 while (entry != &map->header && entry->start < end) {
1805                         /*
1806                          * If vm_fault_wire fails for any page we need to undo
1807                          * what has been done.  We decrement the wiring count
1808                          * for those pages which have not yet been wired (now)
1809                          * and unwire those that have (later).
1810                          *
1811                          * XXX this violates the locking protocol on the map,
1812                          * needs to be fixed.
1813                          */
1814                         if (rv)
1815                                 entry->wired_count--;
1816                         else if (entry->wired_count == 1) {
1817                                 rv = vm_fault_wire(map, entry->start, entry->end);
1818                                 if (rv) {
1819                                         failed = entry->start;
1820                                         entry->wired_count--;
1821                                 }
1822                         }
1823                         entry = entry->next;
1824                 }
1825
1826                 if (vm_map_pmap(map) == kernel_pmap) {
1827                         vm_map_lock(map);
1828                 }
1829                 if (rv) {
1830                         vm_map_unlock(map);
1831                         (void) vm_map_pageable(map, start, failed, TRUE);
1832                         return (rv);
1833                 }
1834                 /*
1835                  * An exclusive lock on the map is needed in order to call
1836                  * vm_map_simplify_entry().  If the current lock on the map
1837                  * is only a shared lock, an upgrade is needed.
1838                  */
1839                 if (vm_map_pmap(map) != kernel_pmap &&
1840                     vm_map_lock_upgrade(map)) {
1841                         vm_map_lock(map);
1842                         if (vm_map_lookup_entry(map, start, &start_entry) ==
1843                             FALSE) {
1844                                 vm_map_unlock(map);
1845                                 return KERN_SUCCESS;
1846                         }
1847                 }
1848                 vm_map_simplify_entry(map, start_entry);
1849         }
1850
1851         vm_map_unlock(map);
1852
1853         return (KERN_SUCCESS);
1854 }
1855
1856 /*
1857  * vm_map_clean
1858  *
1859  * Push any dirty cached pages in the address range to their pager.
1860  * If syncio is TRUE, dirty pages are written synchronously.
1861  * If invalidate is TRUE, any cached pages are freed as well.
1862  *
1863  * Returns an error if any part of the specified range is not mapped.
1864  */
1865 int
1866 vm_map_clean(
1867         vm_map_t map,
1868         vm_offset_t start,
1869         vm_offset_t end,
1870         boolean_t syncio,
1871         boolean_t invalidate)
1872 {
1873         vm_map_entry_t current;
1874         vm_map_entry_t entry;
1875         vm_size_t size;
1876         vm_object_t object;
1877         vm_ooffset_t offset;
1878
1879         GIANT_REQUIRED;
1880
1881         vm_map_lock_read(map);
1882         VM_MAP_RANGE_CHECK(map, start, end);
1883         if (!vm_map_lookup_entry(map, start, &entry)) {
1884                 vm_map_unlock_read(map);
1885                 return (KERN_INVALID_ADDRESS);
1886         }
1887         /*
1888          * Make a first pass to check for holes.
1889          */
1890         for (current = entry; current->start < end; current = current->next) {
1891                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1892                         vm_map_unlock_read(map);
1893                         return (KERN_INVALID_ARGUMENT);
1894                 }
1895                 if (end > current->end &&
1896                     (current->next == &map->header ||
1897                         current->end != current->next->start)) {
1898                         vm_map_unlock_read(map);
1899                         return (KERN_INVALID_ADDRESS);
1900                 }
1901         }
1902
1903         if (invalidate)
1904                 pmap_remove(vm_map_pmap(map), start, end);
1905         /*
1906          * Make a second pass, cleaning/uncaching pages from the indicated
1907          * objects as we go.
1908          */
1909         for (current = entry; current->start < end; current = current->next) {
1910                 offset = current->offset + (start - current->start);
1911                 size = (end <= current->end ? end : current->end) - start;
1912                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1913                         vm_map_t smap;
1914                         vm_map_entry_t tentry;
1915                         vm_size_t tsize;
1916
1917                         smap = current->object.sub_map;
1918                         vm_map_lock_read(smap);
1919                         (void) vm_map_lookup_entry(smap, offset, &tentry);
1920                         tsize = tentry->end - offset;
1921                         if (tsize < size)
1922                                 size = tsize;
1923                         object = tentry->object.vm_object;
1924                         offset = tentry->offset + (offset - tentry->start);
1925                         vm_map_unlock_read(smap);
1926                 } else {
1927                         object = current->object.vm_object;
1928                 }
1929                 /*
1930                  * Note that there is absolutely no sense in writing out
1931                  * anonymous objects, so we track down the vnode object
1932                  * to write out.
1933                  * We invalidate (remove) all pages from the address space
1934                  * anyway, for semantic correctness.
1935                  *
1936                  * note: certain anonymous maps, such as MAP_NOSYNC maps,
1937                  * may start out with a NULL object.
1938                  */
1939                 while (object && object->backing_object) {
1940                         object = object->backing_object;
1941                         offset += object->backing_object_offset;
1942                         if (object->size < OFF_TO_IDX(offset + size))
1943                                 size = IDX_TO_OFF(object->size) - offset;
1944                 }
1945                 if (object && (object->type == OBJT_VNODE) && 
1946                     (current->protection & VM_PROT_WRITE)) {
1947                         /*
1948                          * Flush pages if writing is allowed, invalidate them
1949                          * if invalidation requested.  Pages undergoing I/O
1950                          * will be ignored by vm_object_page_remove().
1951                          *
1952                          * We cannot lock the vnode and then wait for paging
1953                          * to complete without deadlocking against vm_fault.
1954                          * Instead we simply call vm_object_page_remove() and
1955                          * allow it to block internally on a page-by-page 
1956                          * basis when it encounters pages undergoing async 
1957                          * I/O.
1958                          */
1959                         int flags;
1960
1961                         vm_object_reference(object);
1962                         vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread);
1963                         flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1964                         flags |= invalidate ? OBJPC_INVAL : 0;
1965                         vm_object_page_clean(object,
1966                             OFF_TO_IDX(offset),
1967                             OFF_TO_IDX(offset + size + PAGE_MASK),
1968                             flags);
1969                         if (invalidate) {
1970                                 /*vm_object_pip_wait(object, "objmcl");*/
1971                                 vm_object_page_remove(object,
1972                                     OFF_TO_IDX(offset),
1973                                     OFF_TO_IDX(offset + size + PAGE_MASK),
1974                                     FALSE);
1975                         }
1976                         VOP_UNLOCK(object->handle, 0, curthread);
1977                         vm_object_deallocate(object);
1978                 }
1979                 start += size;
1980         }
1981
1982         vm_map_unlock_read(map);
1983         return (KERN_SUCCESS);
1984 }
1985
1986 /*
1987  *      vm_map_entry_unwire:    [ internal use only ]
1988  *
1989  *      Make the region specified by this entry pageable.
1990  *
1991  *      The map in question should be locked.
1992  *      [This is the reason for this routine's existence.]
1993  */
1994 static void 
1995 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
1996 {
1997         vm_fault_unwire(map, entry->start, entry->end);
1998         entry->wired_count = 0;
1999 }
2000
2001 /*
2002  *      vm_map_entry_delete:    [ internal use only ]
2003  *
2004  *      Deallocate the given entry from the target map.
2005  */
2006 static void
2007 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
2008 {
2009         vm_map_entry_unlink(map, entry);
2010         map->size -= entry->end - entry->start;
2011
2012         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
2013                 vm_object_deallocate(entry->object.vm_object);
2014         }
2015
2016         vm_map_entry_dispose(map, entry);
2017 }
2018
2019 /*
2020  *      vm_map_delete:  [ internal use only ]
2021  *
2022  *      Deallocates the given address range from the target
2023  *      map.
2024  */
2025 int
2026 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
2027 {
2028         vm_object_t object;
2029         vm_map_entry_t entry;
2030         vm_map_entry_t first_entry;
2031
2032         GIANT_REQUIRED;
2033
2034         /*
2035          * Find the start of the region, and clip it
2036          */
2037         if (!vm_map_lookup_entry(map, start, &first_entry))
2038                 entry = first_entry->next;
2039         else {
2040                 entry = first_entry;
2041                 vm_map_clip_start(map, entry, start);
2042                 /*
2043                  * Fix the lookup hint now, rather than each time though the
2044                  * loop.
2045                  */
2046                 SAVE_HINT(map, entry->prev);
2047         }
2048
2049         /*
2050          * Save the free space hint
2051          */
2052         if (entry == &map->header) {
2053                 map->first_free = &map->header;
2054         } else if (map->first_free->start >= start) {
2055                 map->first_free = entry->prev;
2056         }
2057
2058         /*
2059          * Step through all entries in this region
2060          */
2061         while ((entry != &map->header) && (entry->start < end)) {
2062                 vm_map_entry_t next;
2063                 vm_offset_t s, e;
2064                 vm_pindex_t offidxstart, offidxend, count;
2065
2066                 vm_map_clip_end(map, entry, end);
2067
2068                 s = entry->start;
2069                 e = entry->end;
2070                 next = entry->next;
2071
2072                 offidxstart = OFF_TO_IDX(entry->offset);
2073                 count = OFF_TO_IDX(e - s);
2074                 object = entry->object.vm_object;
2075
2076                 /*
2077                  * Unwire before removing addresses from the pmap; otherwise,
2078                  * unwiring will put the entries back in the pmap.
2079                  */
2080                 if (entry->wired_count != 0) {
2081                         vm_map_entry_unwire(map, entry);
2082                 }
2083
2084                 offidxend = offidxstart + count;
2085
2086                 if ((object == kernel_object) || (object == kmem_object)) {
2087                         vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2088                 } else {
2089                         pmap_remove(map->pmap, s, e);
2090                         if (object != NULL &&
2091                             object->ref_count != 1 &&
2092                             (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
2093                             (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2094                                 vm_object_collapse(object);
2095                                 vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2096                                 if (object->type == OBJT_SWAP) {
2097                                         swap_pager_freespace(object, offidxstart, count);
2098                                 }
2099                                 if (offidxend >= object->size &&
2100                                     offidxstart < object->size) {
2101                                         object->size = offidxstart;
2102                                 }
2103                         }
2104                 }
2105
2106                 /*
2107                  * Delete the entry (which may delete the object) only after
2108                  * removing all pmap entries pointing to its pages.
2109                  * (Otherwise, its page frames may be reallocated, and any
2110                  * modify bits will be set in the wrong object!)
2111                  */
2112                 vm_map_entry_delete(map, entry);
2113                 entry = next;
2114         }
2115         return (KERN_SUCCESS);
2116 }
2117
2118 /*
2119  *      vm_map_remove:
2120  *
2121  *      Remove the given address range from the target map.
2122  *      This is the exported form of vm_map_delete.
2123  */
2124 int
2125 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2126 {
2127         int result, s = 0;
2128
2129         GIANT_REQUIRED;
2130
2131         if (map == kmem_map)
2132                 s = splvm();
2133
2134         vm_map_lock(map);
2135         VM_MAP_RANGE_CHECK(map, start, end);
2136         result = vm_map_delete(map, start, end);
2137         vm_map_unlock(map);
2138
2139         if (map == kmem_map)
2140                 splx(s);
2141
2142         return (result);
2143 }
2144
2145 /*
2146  *      vm_map_check_protection:
2147  *
2148  *      Assert that the target map allows the specified
2149  *      privilege on the entire address region given.
2150  *      The entire region must be allocated.
2151  */
2152 boolean_t
2153 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2154                         vm_prot_t protection)
2155 {
2156         vm_map_entry_t entry;
2157         vm_map_entry_t tmp_entry;
2158
2159         GIANT_REQUIRED;
2160
2161         vm_map_lock_read(map);
2162         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2163                 vm_map_unlock_read(map);
2164                 return (FALSE);
2165         }
2166         entry = tmp_entry;
2167
2168         while (start < end) {
2169                 if (entry == &map->header) {
2170                         vm_map_unlock_read(map);
2171                         return (FALSE);
2172                 }
2173                 /*
2174                  * No holes allowed!
2175                  */
2176                 if (start < entry->start) {
2177                         vm_map_unlock_read(map);
2178                         return (FALSE);
2179                 }
2180                 /*
2181                  * Check protection associated with entry.
2182                  */
2183                 if ((entry->protection & protection) != protection) {
2184                         vm_map_unlock_read(map);
2185                         return (FALSE);
2186                 }
2187                 /* go to next entry */
2188                 start = entry->end;
2189                 entry = entry->next;
2190         }
2191         vm_map_unlock_read(map);
2192         return (TRUE);
2193 }
2194
2195 /*
2196  * Split the pages in a map entry into a new object.  This affords
2197  * easier removal of unused pages, and keeps object inheritance from
2198  * being a negative impact on memory usage.
2199  */
2200 static void
2201 vm_map_split(vm_map_entry_t entry)
2202 {
2203         vm_page_t m;
2204         vm_object_t orig_object, new_object, source;
2205         vm_offset_t s, e;
2206         vm_pindex_t offidxstart, offidxend, idx;
2207         vm_size_t size;
2208         vm_ooffset_t offset;
2209
2210         GIANT_REQUIRED;
2211
2212         orig_object = entry->object.vm_object;
2213         if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
2214                 return;
2215         if (orig_object->ref_count <= 1)
2216                 return;
2217
2218         offset = entry->offset;
2219         s = entry->start;
2220         e = entry->end;
2221
2222         offidxstart = OFF_TO_IDX(offset);
2223         offidxend = offidxstart + OFF_TO_IDX(e - s);
2224         size = offidxend - offidxstart;
2225
2226         new_object = vm_pager_allocate(orig_object->type,
2227                 NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
2228         if (new_object == NULL)
2229                 return;
2230
2231         source = orig_object->backing_object;
2232         if (source != NULL) {
2233                 vm_object_reference(source);    /* Referenced by new_object */
2234                 TAILQ_INSERT_TAIL(&source->shadow_head,
2235                                   new_object, shadow_list);
2236                 vm_object_clear_flag(source, OBJ_ONEMAPPING);
2237                 new_object->backing_object_offset = 
2238                         orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
2239                 new_object->backing_object = source;
2240                 source->shadow_count++;
2241                 source->generation++;
2242         }
2243
2244         for (idx = 0; idx < size; idx++) {
2245                 vm_page_t m;
2246
2247         retry:
2248                 m = vm_page_lookup(orig_object, offidxstart + idx);
2249                 if (m == NULL)
2250                         continue;
2251
2252                 /*
2253                  * We must wait for pending I/O to complete before we can
2254                  * rename the page.
2255                  *
2256                  * We do not have to VM_PROT_NONE the page as mappings should
2257                  * not be changed by this operation.
2258                  */
2259                 if (vm_page_sleep_busy(m, TRUE, "spltwt"))
2260                         goto retry;
2261                         
2262                 vm_page_busy(m);
2263                 vm_page_rename(m, new_object, idx);
2264                 /* page automatically made dirty by rename and cache handled */
2265                 vm_page_busy(m);
2266         }
2267
2268         if (orig_object->type == OBJT_SWAP) {
2269                 vm_object_pip_add(orig_object, 1);
2270                 /*
2271                  * copy orig_object pages into new_object
2272                  * and destroy unneeded pages in
2273                  * shadow object.
2274                  */
2275                 swap_pager_copy(orig_object, new_object, offidxstart, 0);
2276                 vm_object_pip_wakeup(orig_object);
2277         }
2278
2279         for (idx = 0; idx < size; idx++) {
2280                 m = vm_page_lookup(new_object, idx);
2281                 if (m) {
2282                         vm_page_wakeup(m);
2283                 }
2284         }
2285
2286         entry->object.vm_object = new_object;
2287         entry->offset = 0LL;
2288         vm_object_deallocate(orig_object);
2289 }
2290
2291 /*
2292  *      vm_map_copy_entry:
2293  *
2294  *      Copies the contents of the source entry to the destination
2295  *      entry.  The entries *must* be aligned properly.
2296  */
2297 static void
2298 vm_map_copy_entry(
2299         vm_map_t src_map,
2300         vm_map_t dst_map,
2301         vm_map_entry_t src_entry, 
2302         vm_map_entry_t dst_entry)
2303 {
2304         vm_object_t src_object;
2305
2306         if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2307                 return;
2308
2309         if (src_entry->wired_count == 0) {
2310
2311                 /*
2312                  * If the source entry is marked needs_copy, it is already
2313                  * write-protected.
2314                  */
2315                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2316                         pmap_protect(src_map->pmap,
2317                             src_entry->start,
2318                             src_entry->end,
2319                             src_entry->protection & ~VM_PROT_WRITE);
2320                 }
2321
2322                 /*
2323                  * Make a copy of the object.
2324                  */
2325                 if ((src_object = src_entry->object.vm_object) != NULL) {
2326
2327                         if ((src_object->handle == NULL) &&
2328                                 (src_object->type == OBJT_DEFAULT ||
2329                                  src_object->type == OBJT_SWAP)) {
2330                                 vm_object_collapse(src_object);
2331                                 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2332                                         vm_map_split(src_entry);
2333                                         src_object = src_entry->object.vm_object;
2334                                 }
2335                         }
2336
2337                         vm_object_reference(src_object);
2338                         vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2339                         dst_entry->object.vm_object = src_object;
2340                         src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2341                         dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2342                         dst_entry->offset = src_entry->offset;
2343                 } else {
2344                         dst_entry->object.vm_object = NULL;
2345                         dst_entry->offset = 0;
2346                 }
2347
2348                 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2349                     dst_entry->end - dst_entry->start, src_entry->start);
2350         } else {
2351                 /*
2352                  * Of course, wired down pages can't be set copy-on-write.
2353                  * Cause wired pages to be copied into the new map by
2354                  * simulating faults (the new pages are pageable)
2355                  */
2356                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2357         }
2358 }
2359
2360 /*
2361  * vmspace_fork:
2362  * Create a new process vmspace structure and vm_map
2363  * based on those of an existing process.  The new map
2364  * is based on the old map, according to the inheritance
2365  * values on the regions in that map.
2366  *
2367  * The source map must not be locked.
2368  */
2369 struct vmspace *
2370 vmspace_fork(struct vmspace *vm1)
2371 {
2372         struct vmspace *vm2;
2373         vm_map_t old_map = &vm1->vm_map;
2374         vm_map_t new_map;
2375         vm_map_entry_t old_entry;
2376         vm_map_entry_t new_entry;
2377         vm_object_t object;
2378
2379         GIANT_REQUIRED;
2380
2381         vm_map_lock(old_map);
2382         old_map->infork = 1;
2383
2384         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2385         bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2386             (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
2387         new_map = &vm2->vm_map; /* XXX */
2388         new_map->timestamp = 1;
2389
2390         old_entry = old_map->header.next;
2391
2392         while (old_entry != &old_map->header) {
2393                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2394                         panic("vm_map_fork: encountered a submap");
2395
2396                 switch (old_entry->inheritance) {
2397                 case VM_INHERIT_NONE:
2398                         break;
2399
2400                 case VM_INHERIT_SHARE:
2401                         /*
2402                          * Clone the entry, creating the shared object if necessary.
2403                          */
2404                         object = old_entry->object.vm_object;
2405                         if (object == NULL) {
2406                                 object = vm_object_allocate(OBJT_DEFAULT,
2407                                         atop(old_entry->end - old_entry->start));
2408                                 old_entry->object.vm_object = object;
2409                                 old_entry->offset = (vm_offset_t) 0;
2410                         }
2411
2412                         /*
2413                          * Add the reference before calling vm_object_shadow
2414                          * to insure that a shadow object is created.
2415                          */
2416                         vm_object_reference(object);
2417                         if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2418                                 vm_object_shadow(&old_entry->object.vm_object,
2419                                         &old_entry->offset,
2420                                         atop(old_entry->end - old_entry->start));
2421                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2422                                 /* Transfer the second reference too. */
2423                                 vm_object_reference(
2424                                     old_entry->object.vm_object);
2425                                 vm_object_deallocate(object);
2426                                 object = old_entry->object.vm_object;
2427                         }
2428                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
2429
2430                         /*
2431                          * Clone the entry, referencing the shared object.
2432                          */
2433                         new_entry = vm_map_entry_create(new_map);
2434                         *new_entry = *old_entry;
2435                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2436                         new_entry->wired_count = 0;
2437
2438                         /*
2439                          * Insert the entry into the new map -- we know we're
2440                          * inserting at the end of the new map.
2441                          */
2442                         vm_map_entry_link(new_map, new_map->header.prev,
2443                             new_entry);
2444
2445                         /*
2446                          * Update the physical map
2447                          */
2448                         pmap_copy(new_map->pmap, old_map->pmap,
2449                             new_entry->start,
2450                             (old_entry->end - old_entry->start),
2451                             old_entry->start);
2452                         break;
2453
2454                 case VM_INHERIT_COPY:
2455                         /*
2456                          * Clone the entry and link into the map.
2457                          */
2458                         new_entry = vm_map_entry_create(new_map);
2459                         *new_entry = *old_entry;
2460                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2461                         new_entry->wired_count = 0;
2462                         new_entry->object.vm_object = NULL;
2463                         vm_map_entry_link(new_map, new_map->header.prev,
2464                             new_entry);
2465                         vm_map_copy_entry(old_map, new_map, old_entry,
2466                             new_entry);
2467                         break;
2468                 }
2469                 old_entry = old_entry->next;
2470         }
2471
2472         new_map->size = old_map->size;
2473         old_map->infork = 0;
2474         vm_map_unlock(old_map);
2475
2476         return (vm2);
2477 }
2478
2479 int
2480 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2481               vm_prot_t prot, vm_prot_t max, int cow)
2482 {
2483         vm_map_entry_t prev_entry;
2484         vm_map_entry_t new_stack_entry;
2485         vm_size_t      init_ssize;
2486         int            rv;
2487
2488         GIANT_REQUIRED;
2489
2490         if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
2491                 return (KERN_NO_SPACE);
2492
2493         if (max_ssize < sgrowsiz)
2494                 init_ssize = max_ssize;
2495         else
2496                 init_ssize = sgrowsiz;
2497
2498         vm_map_lock(map);
2499
2500         /* If addr is already mapped, no go */
2501         if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2502                 vm_map_unlock(map);
2503                 return (KERN_NO_SPACE);
2504         }
2505
2506         /* If we can't accomodate max_ssize in the current mapping,
2507          * no go.  However, we need to be aware that subsequent user
2508          * mappings might map into the space we have reserved for
2509          * stack, and currently this space is not protected.  
2510          * 
2511          * Hopefully we will at least detect this condition 
2512          * when we try to grow the stack.
2513          */
2514         if ((prev_entry->next != &map->header) &&
2515             (prev_entry->next->start < addrbos + max_ssize)) {
2516                 vm_map_unlock(map);
2517                 return (KERN_NO_SPACE);
2518         }
2519
2520         /* We initially map a stack of only init_ssize.  We will
2521          * grow as needed later.  Since this is to be a grow 
2522          * down stack, we map at the top of the range.
2523          *
2524          * Note: we would normally expect prot and max to be
2525          * VM_PROT_ALL, and cow to be 0.  Possibly we should
2526          * eliminate these as input parameters, and just
2527          * pass these values here in the insert call.
2528          */
2529         rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2530                            addrbos + max_ssize, prot, max, cow);
2531
2532         /* Now set the avail_ssize amount */
2533         if (rv == KERN_SUCCESS){
2534                 if (prev_entry != &map->header)
2535                         vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2536                 new_stack_entry = prev_entry->next;
2537                 if (new_stack_entry->end   != addrbos + max_ssize ||
2538                     new_stack_entry->start != addrbos + max_ssize - init_ssize)
2539                         panic ("Bad entry start/end for new stack entry");
2540                 else 
2541                         new_stack_entry->avail_ssize = max_ssize - init_ssize;
2542         }
2543
2544         vm_map_unlock(map);
2545         return (rv);
2546 }
2547
2548 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2549  * desired address is already mapped, or if we successfully grow
2550  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2551  * stack range (this is strange, but preserves compatibility with
2552  * the grow function in vm_machdep.c).
2553  */
2554 int
2555 vm_map_growstack (struct proc *p, vm_offset_t addr)
2556 {
2557         vm_map_entry_t prev_entry;
2558         vm_map_entry_t stack_entry;
2559         vm_map_entry_t new_stack_entry;
2560         struct vmspace *vm = p->p_vmspace;
2561         vm_map_t map = &vm->vm_map;
2562         vm_offset_t    end;
2563         int      grow_amount;
2564         int      rv;
2565         int      is_procstack;
2566
2567         GIANT_REQUIRED;
2568         
2569 Retry:
2570         vm_map_lock_read(map);
2571
2572         /* If addr is already in the entry range, no need to grow.*/
2573         if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2574                 vm_map_unlock_read(map);
2575                 return (KERN_SUCCESS);
2576         }
2577
2578         if ((stack_entry = prev_entry->next) == &map->header) {
2579                 vm_map_unlock_read(map);
2580                 return (KERN_SUCCESS);
2581         } 
2582         if (prev_entry == &map->header) 
2583                 end = stack_entry->start - stack_entry->avail_ssize;
2584         else
2585                 end = prev_entry->end;
2586
2587         /* This next test mimics the old grow function in vm_machdep.c.
2588          * It really doesn't quite make sense, but we do it anyway
2589          * for compatibility.
2590          *
2591          * If not growable stack, return success.  This signals the
2592          * caller to proceed as he would normally with normal vm.
2593          */
2594         if (stack_entry->avail_ssize < 1 ||
2595             addr >= stack_entry->start ||
2596             addr <  stack_entry->start - stack_entry->avail_ssize) {
2597                 vm_map_unlock_read(map);
2598                 return (KERN_SUCCESS);
2599         } 
2600         
2601         /* Find the minimum grow amount */
2602         grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2603         if (grow_amount > stack_entry->avail_ssize) {
2604                 vm_map_unlock_read(map);
2605                 return (KERN_NO_SPACE);
2606         }
2607
2608         /* If there is no longer enough space between the entries
2609          * nogo, and adjust the available space.  Note: this 
2610          * should only happen if the user has mapped into the
2611          * stack area after the stack was created, and is
2612          * probably an error.
2613          *
2614          * This also effectively destroys any guard page the user
2615          * might have intended by limiting the stack size.
2616          */
2617         if (grow_amount > stack_entry->start - end) {
2618                 if (vm_map_lock_upgrade(map))
2619                         goto Retry;
2620
2621                 stack_entry->avail_ssize = stack_entry->start - end;
2622
2623                 vm_map_unlock(map);
2624                 return (KERN_NO_SPACE);
2625         }
2626
2627         is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2628
2629         /* If this is the main process stack, see if we're over the 
2630          * stack limit.
2631          */
2632         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2633                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2634                 vm_map_unlock_read(map);
2635                 return (KERN_NO_SPACE);
2636         }
2637
2638         /* Round up the grow amount modulo SGROWSIZ */
2639         grow_amount = roundup (grow_amount, sgrowsiz);
2640         if (grow_amount > stack_entry->avail_ssize) {
2641                 grow_amount = stack_entry->avail_ssize;
2642         }
2643         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2644                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2645                 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2646                               ctob(vm->vm_ssize);
2647         }
2648
2649         if (vm_map_lock_upgrade(map))
2650                 goto Retry;
2651
2652         /* Get the preliminary new entry start value */
2653         addr = stack_entry->start - grow_amount;
2654
2655         /* If this puts us into the previous entry, cut back our growth
2656          * to the available space.  Also, see the note above.
2657          */
2658         if (addr < end) {
2659                 stack_entry->avail_ssize = stack_entry->start - end;
2660                 addr = end;
2661         }
2662
2663         rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2664                            VM_PROT_ALL,
2665                            VM_PROT_ALL,
2666                            0);
2667
2668         /* Adjust the available stack space by the amount we grew. */
2669         if (rv == KERN_SUCCESS) {
2670                 if (prev_entry != &map->header)
2671                         vm_map_clip_end(map, prev_entry, addr);
2672                 new_stack_entry = prev_entry->next;
2673                 if (new_stack_entry->end   != stack_entry->start  ||
2674                     new_stack_entry->start != addr)
2675                         panic ("Bad stack grow start/end in new stack entry");
2676                 else {
2677                         new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2678                                                         (new_stack_entry->end -
2679                                                          new_stack_entry->start);
2680                         if (is_procstack)
2681                                 vm->vm_ssize += btoc(new_stack_entry->end -
2682                                                      new_stack_entry->start);
2683                 }
2684         }
2685
2686         vm_map_unlock(map);
2687         return (rv);
2688 }
2689
2690 /*
2691  * Unshare the specified VM space for exec.  If other processes are
2692  * mapped to it, then create a new one.  The new vmspace is null.
2693  */
2694 void
2695 vmspace_exec(struct proc *p) 
2696 {
2697         struct vmspace *oldvmspace = p->p_vmspace;
2698         struct vmspace *newvmspace;
2699         vm_map_t map = &p->p_vmspace->vm_map;
2700
2701         GIANT_REQUIRED;
2702         newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
2703         bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2704             (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2705         /*
2706          * This code is written like this for prototype purposes.  The
2707          * goal is to avoid running down the vmspace here, but let the
2708          * other process's that are still using the vmspace to finally
2709          * run it down.  Even though there is little or no chance of blocking
2710          * here, it is a good idea to keep this form for future mods.
2711          */
2712         p->p_vmspace = newvmspace;
2713         pmap_pinit2(vmspace_pmap(newvmspace));
2714         vmspace_free(oldvmspace);
2715         if (p == curthread->td_proc)            /* XXXKSE ? */
2716                 pmap_activate(curthread);
2717 }
2718
2719 /*
2720  * Unshare the specified VM space for forcing COW.  This
2721  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2722  */
2723 void
2724 vmspace_unshare(struct proc *p)
2725 {
2726         struct vmspace *oldvmspace = p->p_vmspace;
2727         struct vmspace *newvmspace;
2728
2729         GIANT_REQUIRED;
2730         if (oldvmspace->vm_refcnt == 1)
2731                 return;
2732         newvmspace = vmspace_fork(oldvmspace);
2733         p->p_vmspace = newvmspace;
2734         pmap_pinit2(vmspace_pmap(newvmspace));
2735         vmspace_free(oldvmspace);
2736         if (p == curthread->td_proc)            /* XXXKSE ? */
2737                 pmap_activate(curthread);
2738 }
2739
2740 /*
2741  *      vm_map_lookup:
2742  *
2743  *      Finds the VM object, offset, and
2744  *      protection for a given virtual address in the
2745  *      specified map, assuming a page fault of the
2746  *      type specified.
2747  *
2748  *      Leaves the map in question locked for read; return
2749  *      values are guaranteed until a vm_map_lookup_done
2750  *      call is performed.  Note that the map argument
2751  *      is in/out; the returned map must be used in
2752  *      the call to vm_map_lookup_done.
2753  *
2754  *      A handle (out_entry) is returned for use in
2755  *      vm_map_lookup_done, to make that fast.
2756  *
2757  *      If a lookup is requested with "write protection"
2758  *      specified, the map may be changed to perform virtual
2759  *      copying operations, although the data referenced will
2760  *      remain the same.
2761  */
2762 int
2763 vm_map_lookup(vm_map_t *var_map,                /* IN/OUT */
2764               vm_offset_t vaddr,
2765               vm_prot_t fault_typea,
2766               vm_map_entry_t *out_entry,        /* OUT */
2767               vm_object_t *object,              /* OUT */
2768               vm_pindex_t *pindex,              /* OUT */
2769               vm_prot_t *out_prot,              /* OUT */
2770               boolean_t *wired)                 /* OUT */
2771 {
2772         vm_map_entry_t entry;
2773         vm_map_t map = *var_map;
2774         vm_prot_t prot;
2775         vm_prot_t fault_type = fault_typea;
2776
2777         GIANT_REQUIRED;
2778 RetryLookup:;
2779         /*
2780          * Lookup the faulting address.
2781          */
2782
2783         vm_map_lock_read(map);
2784 #define RETURN(why) \
2785                 { \
2786                 vm_map_unlock_read(map); \
2787                 return (why); \
2788                 }
2789
2790         /*
2791          * If the map has an interesting hint, try it before calling full
2792          * blown lookup routine.
2793          */
2794         entry = map->hint;
2795         *out_entry = entry;
2796         if ((entry == &map->header) ||
2797             (vaddr < entry->start) || (vaddr >= entry->end)) {
2798                 vm_map_entry_t tmp_entry;
2799
2800                 /*
2801                  * Entry was either not a valid hint, or the vaddr was not
2802                  * contained in the entry, so do a full lookup.
2803                  */
2804                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2805                         RETURN(KERN_INVALID_ADDRESS);
2806
2807                 entry = tmp_entry;
2808                 *out_entry = entry;
2809         }
2810         
2811         /*
2812          * Handle submaps.
2813          */
2814         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2815                 vm_map_t old_map = map;
2816
2817                 *var_map = map = entry->object.sub_map;
2818                 vm_map_unlock_read(old_map);
2819                 goto RetryLookup;
2820         }
2821
2822         /*
2823          * Check whether this task is allowed to have this page.
2824          * Note the special case for MAP_ENTRY_COW
2825          * pages with an override.  This is to implement a forced
2826          * COW for debuggers.
2827          */
2828         if (fault_type & VM_PROT_OVERRIDE_WRITE)
2829                 prot = entry->max_protection;
2830         else
2831                 prot = entry->protection;
2832         fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2833         if ((fault_type & prot) != fault_type) {
2834                         RETURN(KERN_PROTECTION_FAILURE);
2835         }
2836         if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2837             (entry->eflags & MAP_ENTRY_COW) &&
2838             (fault_type & VM_PROT_WRITE) &&
2839             (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2840                 RETURN(KERN_PROTECTION_FAILURE);
2841         }
2842
2843         /*
2844          * If this page is not pageable, we have to get it for all possible
2845          * accesses.
2846          */
2847         *wired = (entry->wired_count != 0);
2848         if (*wired)
2849                 prot = fault_type = entry->protection;
2850
2851         /*
2852          * If the entry was copy-on-write, we either ...
2853          */
2854         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2855                 /*
2856                  * If we want to write the page, we may as well handle that
2857                  * now since we've got the map locked.
2858                  *
2859                  * If we don't need to write the page, we just demote the
2860                  * permissions allowed.
2861                  */
2862                 if (fault_type & VM_PROT_WRITE) {
2863                         /*
2864                          * Make a new object, and place it in the object
2865                          * chain.  Note that no new references have appeared
2866                          * -- one just moved from the map to the new
2867                          * object.
2868                          */
2869                         if (vm_map_lock_upgrade(map))
2870                                 goto RetryLookup;
2871                         vm_object_shadow(
2872                             &entry->object.vm_object,
2873                             &entry->offset,
2874                             atop(entry->end - entry->start));
2875                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2876                         vm_map_lock_downgrade(map);
2877                 } else {
2878                         /*
2879                          * We're attempting to read a copy-on-write page --
2880                          * don't allow writes.
2881                          */
2882                         prot &= ~VM_PROT_WRITE;
2883                 }
2884         }
2885
2886         /*
2887          * Create an object if necessary.
2888          */
2889         if (entry->object.vm_object == NULL &&
2890             !map->system_map) {
2891                 if (vm_map_lock_upgrade(map)) 
2892                         goto RetryLookup;
2893                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2894                     atop(entry->end - entry->start));
2895                 entry->offset = 0;
2896                 vm_map_lock_downgrade(map);
2897         }
2898
2899         /*
2900          * Return the object/offset from this entry.  If the entry was
2901          * copy-on-write or empty, it has been fixed up.
2902          */
2903         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2904         *object = entry->object.vm_object;
2905
2906         /*
2907          * Return whether this is the only map sharing this data.
2908          */
2909         *out_prot = prot;
2910         return (KERN_SUCCESS);
2911
2912 #undef  RETURN
2913 }
2914
2915 /*
2916  *      vm_map_lookup_done:
2917  *
2918  *      Releases locks acquired by a vm_map_lookup
2919  *      (according to the handle returned by that lookup).
2920  */
2921 void
2922 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
2923 {
2924         /*
2925          * Unlock the main-level map
2926          */
2927         GIANT_REQUIRED;
2928         vm_map_unlock_read(map);
2929 }
2930
2931 /*
2932  * Implement uiomove with VM operations.  This handles (and collateral changes)
2933  * support every combination of source object modification, and COW type
2934  * operations.
2935  */
2936 int
2937 vm_uiomove(
2938         vm_map_t mapa,
2939         vm_object_t srcobject,
2940         off_t cp,
2941         int cnta,
2942         vm_offset_t uaddra,
2943         int *npages)
2944 {
2945         vm_map_t map;
2946         vm_object_t first_object, oldobject, object;
2947         vm_map_entry_t entry;
2948         vm_prot_t prot;
2949         boolean_t wired;
2950         int tcnt, rv;
2951         vm_offset_t uaddr, start, end, tend;
2952         vm_pindex_t first_pindex, osize, oindex;
2953         off_t ooffset;
2954         int cnt;
2955
2956         GIANT_REQUIRED;
2957
2958         if (npages)
2959                 *npages = 0;
2960
2961         cnt = cnta;
2962         uaddr = uaddra;
2963
2964         while (cnt > 0) {
2965                 map = mapa;
2966
2967                 if ((vm_map_lookup(&map, uaddr,
2968                         VM_PROT_READ, &entry, &first_object,
2969                         &first_pindex, &prot, &wired)) != KERN_SUCCESS) {
2970                         return EFAULT;
2971                 }
2972
2973                 vm_map_clip_start(map, entry, uaddr);
2974
2975                 tcnt = cnt;
2976                 tend = uaddr + tcnt;
2977                 if (tend > entry->end) {
2978                         tcnt = entry->end - uaddr;
2979                         tend = entry->end;
2980                 }
2981
2982                 vm_map_clip_end(map, entry, tend);
2983
2984                 start = entry->start;
2985                 end = entry->end;
2986
2987                 osize = atop(tcnt);
2988
2989                 oindex = OFF_TO_IDX(cp);
2990                 if (npages) {
2991                         vm_pindex_t idx;
2992                         for (idx = 0; idx < osize; idx++) {
2993                                 vm_page_t m;
2994                                 if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
2995                                         vm_map_lookup_done(map, entry);
2996                                         return 0;
2997                                 }
2998                                 /*
2999                                  * disallow busy or invalid pages, but allow
3000                                  * m->busy pages if they are entirely valid.
3001                                  */
3002                                 if ((m->flags & PG_BUSY) ||
3003                                         ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
3004                                         vm_map_lookup_done(map, entry);
3005                                         return 0;
3006                                 }
3007                         }
3008                 }
3009
3010 /*
3011  * If we are changing an existing map entry, just redirect
3012  * the object, and change mappings.
3013  */
3014                 if ((first_object->type == OBJT_VNODE) &&
3015                         ((oldobject = entry->object.vm_object) == first_object)) {
3016
3017                         if ((entry->offset != cp) || (oldobject != srcobject)) {
3018                                 /*
3019                                 * Remove old window into the file
3020                                 */
3021                                 pmap_remove (map->pmap, uaddr, tend);
3022
3023                                 /*
3024                                 * Force copy on write for mmaped regions
3025                                 */
3026                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3027
3028                                 /*
3029                                 * Point the object appropriately
3030                                 */
3031                                 if (oldobject != srcobject) {
3032
3033                                 /*
3034                                 * Set the object optimization hint flag
3035                                 */
3036                                         vm_object_set_flag(srcobject, OBJ_OPT);
3037                                         vm_object_reference(srcobject);
3038                                         entry->object.vm_object = srcobject;
3039
3040                                         if (oldobject) {
3041                                                 vm_object_deallocate(oldobject);
3042                                         }
3043                                 }
3044
3045                                 entry->offset = cp;
3046                                 map->timestamp++;
3047                         } else {
3048                                 pmap_remove (map->pmap, uaddr, tend);
3049                         }
3050
3051                 } else if ((first_object->ref_count == 1) &&
3052                         (first_object->size == osize) &&
3053                         ((first_object->type == OBJT_DEFAULT) ||
3054                                 (first_object->type == OBJT_SWAP)) ) {
3055
3056                         oldobject = first_object->backing_object;
3057
3058                         if ((first_object->backing_object_offset != cp) ||
3059                                 (oldobject != srcobject)) {
3060                                 /*
3061                                 * Remove old window into the file
3062                                 */
3063                                 pmap_remove (map->pmap, uaddr, tend);
3064
3065                                 /*
3066                                  * Remove unneeded old pages
3067                                  */
3068                                 vm_object_page_remove(first_object, 0, 0, 0);
3069
3070                                 /*
3071                                  * Invalidate swap space
3072                                  */
3073                                 if (first_object->type == OBJT_SWAP) {
3074                                         swap_pager_freespace(first_object,
3075                                                 0,
3076                                                 first_object->size);
3077                                 }
3078
3079                                 /*
3080                                  * Force copy on write for mmaped regions
3081                                  */
3082                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3083
3084                                 /*
3085                                  * Point the object appropriately
3086                                  */
3087                                 if (oldobject != srcobject) {
3088                                         /*
3089                                          * Set the object optimization hint flag
3090                                          */
3091                                         vm_object_set_flag(srcobject, OBJ_OPT);
3092                                         vm_object_reference(srcobject);
3093
3094                                         if (oldobject) {
3095                                                 TAILQ_REMOVE(&oldobject->shadow_head,
3096                                                         first_object, shadow_list);
3097                                                 oldobject->shadow_count--;
3098                                                 /* XXX bump generation? */
3099                                                 vm_object_deallocate(oldobject);
3100                                         }
3101
3102                                         TAILQ_INSERT_TAIL(&srcobject->shadow_head,
3103                                                 first_object, shadow_list);
3104                                         srcobject->shadow_count++;
3105                                         /* XXX bump generation? */
3106
3107                                         first_object->backing_object = srcobject;
3108                                 }
3109                                 first_object->backing_object_offset = cp;
3110                                 map->timestamp++;
3111                         } else {
3112                                 pmap_remove (map->pmap, uaddr, tend);
3113                         }
3114 /*
3115  * Otherwise, we have to do a logical mmap.
3116  */
3117                 } else {
3118
3119                         vm_object_set_flag(srcobject, OBJ_OPT);
3120                         vm_object_reference(srcobject);
3121
3122                         pmap_remove (map->pmap, uaddr, tend);
3123
3124                         vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3125                         vm_map_lock_upgrade(map);
3126
3127                         if (entry == &map->header) {
3128                                 map->first_free = &map->header;
3129                         } else if (map->first_free->start >= start) {
3130                                 map->first_free = entry->prev;
3131                         }
3132
3133                         SAVE_HINT(map, entry->prev);
3134                         vm_map_entry_delete(map, entry);
3135
3136                         object = srcobject;
3137                         ooffset = cp;
3138
3139                         rv = vm_map_insert(map, object, ooffset, start, tend,
3140                                 VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
3141
3142                         if (rv != KERN_SUCCESS)
3143                                 panic("vm_uiomove: could not insert new entry: %d", rv);
3144                 }
3145
3146 /*
3147  * Map the window directly, if it is already in memory
3148  */
3149                 pmap_object_init_pt(map->pmap, uaddr,
3150                         srcobject, oindex, tcnt, 0);
3151
3152                 map->timestamp++;
3153                 vm_map_unlock(map);
3154
3155                 cnt -= tcnt;
3156                 uaddr += tcnt;
3157                 cp += tcnt;
3158                 if (npages)
3159                         *npages += osize;
3160         }
3161         return 0;
3162 }
3163
3164 /*
3165  * Performs the copy_on_write operations necessary to allow the virtual copies
3166  * into user space to work.  This has to be called for write(2) system calls
3167  * from other processes, file unlinking, and file size shrinkage.
3168  */
3169 void
3170 vm_freeze_copyopts(vm_object_t object, vm_pindex_t froma, vm_pindex_t toa)
3171 {
3172         int rv;
3173         vm_object_t robject;
3174         vm_pindex_t idx;
3175
3176         GIANT_REQUIRED;
3177         if ((object == NULL) ||
3178                 ((object->flags & OBJ_OPT) == 0))
3179                 return;
3180
3181         if (object->shadow_count > object->ref_count)
3182                 panic("vm_freeze_copyopts: sc > rc");
3183
3184         while ((robject = TAILQ_FIRST(&object->shadow_head)) != NULL) {
3185                 vm_pindex_t bo_pindex;
3186                 vm_page_t m_in, m_out;
3187
3188                 bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
3189
3190                 vm_object_reference(robject);
3191
3192                 vm_object_pip_wait(robject, "objfrz");
3193
3194                 if (robject->ref_count == 1) {
3195                         vm_object_deallocate(robject);
3196                         continue;
3197                 }
3198
3199                 vm_object_pip_add(robject, 1);
3200
3201                 for (idx = 0; idx < robject->size; idx++) {
3202
3203                         m_out = vm_page_grab(robject, idx,
3204                                                 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3205
3206                         if (m_out->valid == 0) {
3207                                 m_in = vm_page_grab(object, bo_pindex + idx,
3208                                                 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3209                                 if (m_in->valid == 0) {
3210                                         rv = vm_pager_get_pages(object, &m_in, 1, 0);
3211                                         if (rv != VM_PAGER_OK) {
3212                                                 printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
3213                                                 continue;
3214                                         }
3215                                         vm_page_deactivate(m_in);
3216                                 }
3217
3218                                 vm_page_protect(m_in, VM_PROT_NONE);
3219                                 pmap_copy_page(m_in, m_out);
3220                                 m_out->valid = m_in->valid;
3221                                 vm_page_dirty(m_out);
3222                                 vm_page_activate(m_out);
3223                                 vm_page_wakeup(m_in);
3224                         }
3225                         vm_page_wakeup(m_out);
3226                 }
3227
3228                 object->shadow_count--;
3229                 object->ref_count--;
3230                 TAILQ_REMOVE(&object->shadow_head, robject, shadow_list);
3231                 robject->backing_object = NULL;
3232                 robject->backing_object_offset = 0;
3233
3234                 vm_object_pip_wakeup(robject);
3235                 vm_object_deallocate(robject);
3236         }
3237
3238         vm_object_clear_flag(object, OBJ_OPT);
3239 }
3240
3241 #include "opt_ddb.h"
3242 #ifdef DDB
3243 #include <sys/kernel.h>
3244
3245 #include <ddb/ddb.h>
3246
3247 /*
3248  *      vm_map_print:   [ debug ]
3249  */
3250 DB_SHOW_COMMAND(map, vm_map_print)
3251 {
3252         static int nlines;
3253         /* XXX convert args. */
3254         vm_map_t map = (vm_map_t)addr;
3255         boolean_t full = have_addr;
3256
3257         vm_map_entry_t entry;
3258
3259         db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3260             (void *)map,
3261             (void *)map->pmap, map->nentries, map->timestamp);
3262         nlines++;
3263
3264         if (!full && db_indent)
3265                 return;
3266
3267         db_indent += 2;
3268         for (entry = map->header.next; entry != &map->header;
3269             entry = entry->next) {
3270                 db_iprintf("map entry %p: start=%p, end=%p\n",
3271                     (void *)entry, (void *)entry->start, (void *)entry->end);
3272                 nlines++;
3273                 {
3274                         static char *inheritance_name[4] =
3275                         {"share", "copy", "none", "donate_copy"};
3276
3277                         db_iprintf(" prot=%x/%x/%s",
3278                             entry->protection,
3279                             entry->max_protection,
3280                             inheritance_name[(int)(unsigned char)entry->inheritance]);
3281                         if (entry->wired_count != 0)
3282                                 db_printf(", wired");
3283                 }
3284                 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3285                         /* XXX no %qd in kernel.  Truncate entry->offset. */
3286                         db_printf(", share=%p, offset=0x%lx\n",
3287                             (void *)entry->object.sub_map,
3288                             (long)entry->offset);
3289                         nlines++;
3290                         if ((entry->prev == &map->header) ||
3291                             (entry->prev->object.sub_map !=
3292                                 entry->object.sub_map)) {
3293                                 db_indent += 2;
3294                                 vm_map_print((db_expr_t)(intptr_t)
3295                                              entry->object.sub_map,
3296                                              full, 0, (char *)0);
3297                                 db_indent -= 2;
3298                         }
3299                 } else {
3300                         /* XXX no %qd in kernel.  Truncate entry->offset. */
3301                         db_printf(", object=%p, offset=0x%lx",
3302                             (void *)entry->object.vm_object,
3303                             (long)entry->offset);
3304                         if (entry->eflags & MAP_ENTRY_COW)
3305                                 db_printf(", copy (%s)",
3306                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3307                         db_printf("\n");
3308                         nlines++;
3309
3310                         if ((entry->prev == &map->header) ||
3311                             (entry->prev->object.vm_object !=
3312                                 entry->object.vm_object)) {
3313                                 db_indent += 2;
3314                                 vm_object_print((db_expr_t)(intptr_t)
3315                                                 entry->object.vm_object,
3316                                                 full, 0, (char *)0);
3317                                 nlines += 4;
3318                                 db_indent -= 2;
3319                         }
3320                 }
3321         }
3322         db_indent -= 2;
3323         if (db_indent == 0)
3324                 nlines = 0;
3325 }
3326
3327
3328 DB_SHOW_COMMAND(procvm, procvm)
3329 {
3330         struct proc *p;
3331
3332         if (have_addr) {
3333                 p = (struct proc *) addr;
3334         } else {
3335                 p = curproc;
3336         }
3337
3338         db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3339             (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3340             (void *)vmspace_pmap(p->p_vmspace));
3341
3342         vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3343 }
3344
3345 #endif /* DDB */