]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/vm/vm_map.c
Add uma_zone_set_max() to add enforced limits to non vm obj backed zones.
[FreeBSD/FreeBSD.git] / sys / vm / vm_map.c
1 /*
2  * Copyright (c) 1991, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66
67 /*
68  *      Virtual memory mapping module.
69  */
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/ktr.h>
74 #include <sys/lock.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/vmmeter.h>
78 #include <sys/mman.h>
79 #include <sys/vnode.h>
80 #include <sys/resourcevar.h>
81
82 #include <vm/vm.h>
83 #include <vm/vm_param.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_pager.h>
89 #include <vm/vm_kern.h>
90 #include <vm/vm_extern.h>
91 #include <vm/swap_pager.h>
92 #include <vm/uma.h>
93
94 /*
95  *      Virtual memory maps provide for the mapping, protection,
96  *      and sharing of virtual memory objects.  In addition,
97  *      this module provides for an efficient virtual copy of
98  *      memory from one map to another.
99  *
100  *      Synchronization is required prior to most operations.
101  *
102  *      Maps consist of an ordered doubly-linked list of simple
103  *      entries; a single hint is used to speed up lookups.
104  *
105  *      Since portions of maps are specified by start/end addresses,
106  *      which may not align with existing map entries, all
107  *      routines merely "clip" entries to these start/end values.
108  *      [That is, an entry is split into two, bordering at a
109  *      start or end value.]  Note that these clippings may not
110  *      always be necessary (as the two resulting entries are then
111  *      not changed); however, the clipping is done for convenience.
112  *
113  *      As mentioned above, virtual copy operations are performed
114  *      by copying VM object references from one map to
115  *      another, and then marking both regions as copy-on-write.
116  */
117
118 /*
119  *      vm_map_startup:
120  *
121  *      Initialize the vm_map module.  Must be called before
122  *      any other vm_map routines.
123  *
124  *      Map and entry structures are allocated from the general
125  *      purpose memory pool with some exceptions:
126  *
127  *      - The kernel map and kmem submap are allocated statically.
128  *      - Kernel map entries are allocated out of a static pool.
129  *
130  *      These restrictions are necessary since malloc() uses the
131  *      maps and requires map entries.
132  */
133
134 static uma_zone_t mapentzone;
135 static uma_zone_t kmapentzone;
136 static uma_zone_t mapzone;
137 static uma_zone_t vmspace_zone;
138 static struct vm_object kmapentobj;
139 static void vmspace_zinit(void *mem, int size);
140 static void vmspace_zfini(void *mem, int size);
141 static void vm_map_zinit(void *mem, int size);
142 static void vm_map_zfini(void *mem, int size);
143 static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max);
144
145 #ifdef INVARIANTS
146 static void vm_map_zdtor(void *mem, int size, void *arg);
147 static void vmspace_zdtor(void *mem, int size, void *arg);
148 #endif
149
150 void
151 vm_map_startup(void)
152 {
153         mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
154 #ifdef INVARIANTS
155             vm_map_zdtor,
156 #else
157             NULL,
158 #endif
159             vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
160         uma_prealloc(mapzone, MAX_KMAP);
161         kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry), 
162             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
163         uma_prealloc(kmapentzone, MAX_KMAPENT);
164         mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), 
165             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
166         uma_prealloc(mapentzone, MAX_MAPENT);
167 }
168
169 static void
170 vmspace_zfini(void *mem, int size)
171 {
172         struct vmspace *vm;
173
174         vm = (struct vmspace *)mem;
175
176         vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map));
177 }
178
179 static void
180 vmspace_zinit(void *mem, int size)
181 {
182         struct vmspace *vm;
183
184         vm = (struct vmspace *)mem;
185
186         vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map));
187 }
188
189 static void
190 vm_map_zfini(void *mem, int size)
191 {
192         vm_map_t map;
193
194         GIANT_REQUIRED;
195         map = (vm_map_t)mem;
196
197         lockdestroy(&map->lock);
198 }
199
200 static void
201 vm_map_zinit(void *mem, int size)
202 {
203         vm_map_t map;
204
205         GIANT_REQUIRED;
206
207         map = (vm_map_t)mem;
208         map->nentries = 0;
209         map->size = 0;
210         map->infork = 0;
211         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
212 }
213
214 #ifdef INVARIANTS
215 static void
216 vmspace_zdtor(void *mem, int size, void *arg)
217 {
218         struct vmspace *vm;
219
220         vm = (struct vmspace *)mem;
221
222         vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
223 }
224 static void
225 vm_map_zdtor(void *mem, int size, void *arg)
226 {
227         vm_map_t map;
228
229         map = (vm_map_t)mem;
230         KASSERT(map->nentries == 0,
231             ("map %p nentries == %d on free.", 
232             map, map->nentries));
233         KASSERT(map->size == 0,
234             ("map %p size == %lu on free.",
235             map, (unsigned long)map->size));
236         KASSERT(map->infork == 0,
237             ("map %p infork == %d on free.",
238             map, map->infork));
239 }
240 #endif  /* INVARIANTS */
241
242 /*
243  * Allocate a vmspace structure, including a vm_map and pmap,
244  * and initialize those structures.  The refcnt is set to 1.
245  * The remaining fields must be initialized by the caller.
246  */
247 struct vmspace *
248 vmspace_alloc(min, max)
249         vm_offset_t min, max;
250 {
251         struct vmspace *vm;
252
253         GIANT_REQUIRED;
254         vm = uma_zalloc(vmspace_zone, M_WAITOK);
255         CTR1(KTR_VM, "vmspace_alloc: %p", vm);
256         _vm_map_init(&vm->vm_map, min, max);
257         pmap_pinit(vmspace_pmap(vm));
258         vm->vm_map.pmap = vmspace_pmap(vm);             /* XXX */
259         vm->vm_refcnt = 1;
260         vm->vm_shm = NULL;
261         vm->vm_freer = NULL;
262         return (vm);
263 }
264
265 void
266 vm_init2(void) 
267 {
268         uma_zone_set_obj(kmapentzone, &kmapentobj, cnt.v_page_count / 4);
269         vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
270 #ifdef INVARIANTS
271             vmspace_zdtor,
272 #else
273             NULL,
274 #endif
275             vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
276         pmap_init2();
277         vm_object_init2();
278 }
279
280 static __inline void
281 vmspace_dofree(struct vmspace *vm)
282 {
283         CTR1(KTR_VM, "vmspace_free: %p", vm);
284         /*
285          * Lock the map, to wait out all other references to it.
286          * Delete all of the mappings and pages they hold, then call
287          * the pmap module to reclaim anything left.
288          */
289         vm_map_lock(&vm->vm_map);
290         (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
291             vm->vm_map.max_offset);
292         vm_map_unlock(&vm->vm_map);
293
294         pmap_release(vmspace_pmap(vm));
295         uma_zfree(vmspace_zone, vm);
296 }
297
298 void
299 vmspace_free(struct vmspace *vm)
300 {
301         GIANT_REQUIRED;
302
303         if (vm->vm_refcnt == 0)
304                 panic("vmspace_free: attempt to free already freed vmspace");
305
306         if (--vm->vm_refcnt == 0)
307                 vmspace_dofree(vm);
308 }
309
310 void
311 vmspace_exitfree(struct proc *p)
312 {
313         GIANT_REQUIRED;
314
315         if (p == p->p_vmspace->vm_freer)
316                 vmspace_dofree(p->p_vmspace);
317 }
318
319 /*
320  * vmspace_swap_count() - count the approximate swap useage in pages for a
321  *                        vmspace.
322  *
323  *      Swap useage is determined by taking the proportional swap used by
324  *      VM objects backing the VM map.  To make up for fractional losses,
325  *      if the VM object has any swap use at all the associated map entries
326  *      count for at least 1 swap page.
327  */
328 int
329 vmspace_swap_count(struct vmspace *vmspace)
330 {
331         vm_map_t map = &vmspace->vm_map;
332         vm_map_entry_t cur;
333         int count = 0;
334
335         for (cur = map->header.next; cur != &map->header; cur = cur->next) {
336                 vm_object_t object;
337
338                 if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
339                     (object = cur->object.vm_object) != NULL &&
340                     object->type == OBJT_SWAP
341                 ) {
342                         int n = (cur->end - cur->start) / PAGE_SIZE;
343
344                         if (object->un_pager.swp.swp_bcount) {
345                                 count += object->un_pager.swp.swp_bcount *
346                                     SWAP_META_PAGES * n / object->size + 1;
347                         }
348                 }
349         }
350         return (count);
351 }
352
353 u_char   
354 vm_map_entry_behavior(struct vm_map_entry *entry)
355 {                  
356         return entry->eflags & MAP_ENTRY_BEHAV_MASK;
357 }
358
359 void
360 vm_map_entry_set_behavior(struct vm_map_entry *entry, u_char behavior)
361 {              
362         entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
363                 (behavior & MAP_ENTRY_BEHAV_MASK);
364 }                       
365
366 void
367 vm_map_lock(vm_map_t map)
368 {
369         vm_map_printf("locking map LK_EXCLUSIVE: %p\n", map);
370         if (lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread) != 0)
371                 panic("vm_map_lock: failed to get lock");
372         map->timestamp++;
373 }
374
375 void
376 vm_map_unlock(vm_map_t map)
377 {
378         vm_map_printf("locking map LK_RELEASE: %p\n", map);
379         lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread);
380 }
381
382 void
383 vm_map_lock_read(vm_map_t map)
384 {
385         vm_map_printf("locking map LK_SHARED: %p\n", map);
386         lockmgr(&(map)->lock, LK_SHARED, NULL, curthread);
387 }
388
389 void
390 vm_map_unlock_read(vm_map_t map)
391 {
392         vm_map_printf("locking map LK_RELEASE: %p\n", map);
393         lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread);
394 }
395
396 static __inline__ int
397 _vm_map_lock_upgrade(vm_map_t map, struct thread *td) {
398         int error;
399
400         vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map); 
401         error = lockmgr(&map->lock, LK_EXCLUPGRADE, NULL, td);
402         if (error == 0)
403                 map->timestamp++;
404         return error;
405 }
406
407 int
408 vm_map_lock_upgrade(vm_map_t map)
409 {
410     return (_vm_map_lock_upgrade(map, curthread));
411 }
412
413 void
414 vm_map_lock_downgrade(vm_map_t map)
415 {
416         vm_map_printf("locking map LK_DOWNGRADE: %p\n", map);
417         lockmgr(&map->lock, LK_DOWNGRADE, NULL, curthread);
418 }
419
420 void
421 vm_map_set_recursive(vm_map_t map)
422 {
423         mtx_lock((map)->lock.lk_interlock);
424         map->lock.lk_flags |= LK_CANRECURSE;
425         mtx_unlock((map)->lock.lk_interlock);
426 }
427
428 void
429 vm_map_clear_recursive(vm_map_t map)
430 {
431         mtx_lock((map)->lock.lk_interlock);
432         map->lock.lk_flags &= ~LK_CANRECURSE;
433         mtx_unlock((map)->lock.lk_interlock);
434 }
435
436 vm_offset_t
437 vm_map_min(vm_map_t map)
438 {
439         return (map->min_offset);
440 }
441
442 vm_offset_t
443 vm_map_max(vm_map_t map)
444 {
445         return (map->max_offset);
446 }
447
448 struct pmap *
449 vm_map_pmap(vm_map_t map)
450 {
451         return (map->pmap);
452 }
453
454 struct pmap *
455 vmspace_pmap(struct vmspace *vmspace)
456 {
457         return &vmspace->vm_pmap;
458 }
459
460 long
461 vmspace_resident_count(struct vmspace *vmspace)
462 {
463         return pmap_resident_count(vmspace_pmap(vmspace));
464 }
465
466 /*
467  *      vm_map_create:
468  *
469  *      Creates and returns a new empty VM map with
470  *      the given physical map structure, and having
471  *      the given lower and upper address bounds.
472  */
473 vm_map_t
474 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
475 {
476         vm_map_t result;
477
478         GIANT_REQUIRED;
479
480         result = uma_zalloc(mapzone, M_WAITOK);
481         CTR1(KTR_VM, "vm_map_create: %p", result);
482         _vm_map_init(result, min, max);
483         result->pmap = pmap;
484         return (result);
485 }
486
487 /*
488  * Initialize an existing vm_map structure
489  * such as that in the vmspace structure.
490  * The pmap is set elsewhere.
491  */
492 static void
493 _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
494 {
495         GIANT_REQUIRED;
496
497         map->header.next = map->header.prev = &map->header;
498         map->system_map = 0;
499         map->min_offset = min;
500         map->max_offset = max;
501         map->first_free = &map->header;
502         map->hint = &map->header;
503         map->timestamp = 0;
504 }
505
506 void
507 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
508 {
509         _vm_map_init(map, min, max);
510         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
511 }
512
513 /*
514  *      vm_map_entry_dispose:   [ internal use only ]
515  *
516  *      Inverse of vm_map_entry_create.
517  */
518 static void
519 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
520 {
521         uma_zfree((map->system_map || !mapentzone)
522             ? kmapentzone : mapentzone, entry);
523 }
524
525 /*
526  *      vm_map_entry_create:    [ internal use only ]
527  *
528  *      Allocates a VM map entry for insertion.
529  *      No entry fields are filled in.
530  */
531 static vm_map_entry_t
532 vm_map_entry_create(vm_map_t map)
533 {
534         vm_map_entry_t new_entry;
535
536         new_entry = uma_zalloc((map->system_map || !mapentzone) ? 
537                 kmapentzone : mapentzone, M_WAITOK);
538         if (new_entry == NULL)
539             panic("vm_map_entry_create: kernel resources exhausted");
540         return (new_entry);
541 }
542
543 /*
544  *      vm_map_entry_{un,}link:
545  *
546  *      Insert/remove entries from maps.
547  */
548 static __inline void
549 vm_map_entry_link(vm_map_t map,
550                   vm_map_entry_t after_where,
551                   vm_map_entry_t entry)
552 {
553
554         CTR4(KTR_VM,
555             "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
556             map->nentries, entry, after_where);
557         map->nentries++;
558         entry->prev = after_where;
559         entry->next = after_where->next;
560         entry->next->prev = entry;
561         after_where->next = entry;
562 }
563
564 static __inline void
565 vm_map_entry_unlink(vm_map_t map,
566                     vm_map_entry_t entry)
567 {
568         vm_map_entry_t prev = entry->prev;
569         vm_map_entry_t next = entry->next;
570
571         next->prev = prev;
572         prev->next = next;
573         map->nentries--;
574         CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
575             map->nentries, entry);
576 }
577
578 /*
579  *      SAVE_HINT:
580  *
581  *      Saves the specified entry as the hint for
582  *      future lookups.
583  */
584 #define SAVE_HINT(map,value) \
585                 (map)->hint = (value);
586
587 /*
588  *      vm_map_lookup_entry:    [ internal use only ]
589  *
590  *      Finds the map entry containing (or
591  *      immediately preceding) the specified address
592  *      in the given map; the entry is returned
593  *      in the "entry" parameter.  The boolean
594  *      result indicates whether the address is
595  *      actually contained in the map.
596  */
597 boolean_t
598 vm_map_lookup_entry(
599         vm_map_t map,
600         vm_offset_t address,
601         vm_map_entry_t *entry)  /* OUT */
602 {
603         vm_map_entry_t cur;
604         vm_map_entry_t last;
605
606         GIANT_REQUIRED;
607         /*
608          * Start looking either from the head of the list, or from the hint.
609          */
610         cur = map->hint;
611
612         if (cur == &map->header)
613                 cur = cur->next;
614
615         if (address >= cur->start) {
616                 /*
617                  * Go from hint to end of list.
618                  *
619                  * But first, make a quick check to see if we are already looking
620                  * at the entry we want (which is usually the case). Note also
621                  * that we don't need to save the hint here... it is the same
622                  * hint (unless we are at the header, in which case the hint
623                  * didn't buy us anything anyway).
624                  */
625                 last = &map->header;
626                 if ((cur != last) && (cur->end > address)) {
627                         *entry = cur;
628                         return (TRUE);
629                 }
630         } else {
631                 /*
632                  * Go from start to hint, *inclusively*
633                  */
634                 last = cur->next;
635                 cur = map->header.next;
636         }
637
638         /*
639          * Search linearly
640          */
641         while (cur != last) {
642                 if (cur->end > address) {
643                         if (address >= cur->start) {
644                                 /*
645                                  * Save this lookup for future hints, and
646                                  * return
647                                  */
648                                 *entry = cur;
649                                 SAVE_HINT(map, cur);
650                                 return (TRUE);
651                         }
652                         break;
653                 }
654                 cur = cur->next;
655         }
656         *entry = cur->prev;
657         SAVE_HINT(map, *entry);
658         return (FALSE);
659 }
660
661 /*
662  *      vm_map_insert:
663  *
664  *      Inserts the given whole VM object into the target
665  *      map at the specified address range.  The object's
666  *      size should match that of the address range.
667  *
668  *      Requires that the map be locked, and leaves it so.
669  *
670  *      If object is non-NULL, ref count must be bumped by caller
671  *      prior to making call to account for the new entry.
672  */
673 int
674 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
675               vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
676               int cow)
677 {
678         vm_map_entry_t new_entry;
679         vm_map_entry_t prev_entry;
680         vm_map_entry_t temp_entry;
681         vm_eflags_t protoeflags;
682
683         GIANT_REQUIRED;
684
685         /*
686          * Check that the start and end points are not bogus.
687          */
688         if ((start < map->min_offset) || (end > map->max_offset) ||
689             (start >= end))
690                 return (KERN_INVALID_ADDRESS);
691
692         /*
693          * Find the entry prior to the proposed starting address; if it's part
694          * of an existing entry, this range is bogus.
695          */
696         if (vm_map_lookup_entry(map, start, &temp_entry))
697                 return (KERN_NO_SPACE);
698
699         prev_entry = temp_entry;
700
701         /*
702          * Assert that the next entry doesn't overlap the end point.
703          */
704         if ((prev_entry->next != &map->header) &&
705             (prev_entry->next->start < end))
706                 return (KERN_NO_SPACE);
707
708         protoeflags = 0;
709
710         if (cow & MAP_COPY_ON_WRITE)
711                 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
712
713         if (cow & MAP_NOFAULT) {
714                 protoeflags |= MAP_ENTRY_NOFAULT;
715
716                 KASSERT(object == NULL,
717                         ("vm_map_insert: paradoxical MAP_NOFAULT request"));
718         }
719         if (cow & MAP_DISABLE_SYNCER)
720                 protoeflags |= MAP_ENTRY_NOSYNC;
721         if (cow & MAP_DISABLE_COREDUMP)
722                 protoeflags |= MAP_ENTRY_NOCOREDUMP;
723
724         if (object) {
725                 /*
726                  * When object is non-NULL, it could be shared with another
727                  * process.  We have to set or clear OBJ_ONEMAPPING 
728                  * appropriately.
729                  */
730                 if ((object->ref_count > 1) || (object->shadow_count != 0)) {
731                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
732                 }
733         }
734         else if ((prev_entry != &map->header) &&
735                  (prev_entry->eflags == protoeflags) &&
736                  (prev_entry->end == start) &&
737                  (prev_entry->wired_count == 0) &&
738                  ((prev_entry->object.vm_object == NULL) ||
739                   vm_object_coalesce(prev_entry->object.vm_object,
740                                      OFF_TO_IDX(prev_entry->offset),
741                                      (vm_size_t)(prev_entry->end - prev_entry->start),
742                                      (vm_size_t)(end - prev_entry->end)))) {
743                 /*
744                  * We were able to extend the object.  Determine if we
745                  * can extend the previous map entry to include the 
746                  * new range as well.
747                  */
748                 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
749                     (prev_entry->protection == prot) &&
750                     (prev_entry->max_protection == max)) {
751                         map->size += (end - prev_entry->end);
752                         prev_entry->end = end;
753                         vm_map_simplify_entry(map, prev_entry);
754                         return (KERN_SUCCESS);
755                 }
756
757                 /*
758                  * If we can extend the object but cannot extend the
759                  * map entry, we have to create a new map entry.  We
760                  * must bump the ref count on the extended object to
761                  * account for it.  object may be NULL.
762                  */
763                 object = prev_entry->object.vm_object;
764                 offset = prev_entry->offset +
765                         (prev_entry->end - prev_entry->start);
766                 vm_object_reference(object);
767         }
768
769         /*
770          * NOTE: if conditionals fail, object can be NULL here.  This occurs
771          * in things like the buffer map where we manage kva but do not manage
772          * backing objects.
773          */
774
775         /*
776          * Create a new entry
777          */
778         new_entry = vm_map_entry_create(map);
779         new_entry->start = start;
780         new_entry->end = end;
781
782         new_entry->eflags = protoeflags;
783         new_entry->object.vm_object = object;
784         new_entry->offset = offset;
785         new_entry->avail_ssize = 0;
786
787         new_entry->inheritance = VM_INHERIT_DEFAULT;
788         new_entry->protection = prot;
789         new_entry->max_protection = max;
790         new_entry->wired_count = 0;
791
792         /*
793          * Insert the new entry into the list
794          */
795         vm_map_entry_link(map, prev_entry, new_entry);
796         map->size += new_entry->end - new_entry->start;
797
798         /*
799          * Update the free space hint
800          */
801         if ((map->first_free == prev_entry) &&
802             (prev_entry->end >= new_entry->start)) {
803                 map->first_free = new_entry;
804         }
805
806 #if 0
807         /*
808          * Temporarily removed to avoid MAP_STACK panic, due to
809          * MAP_STACK being a huge hack.  Will be added back in
810          * when MAP_STACK (and the user stack mapping) is fixed.
811          */
812         /*
813          * It may be possible to simplify the entry
814          */
815         vm_map_simplify_entry(map, new_entry);
816 #endif
817
818         if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
819                 pmap_object_init_pt(map->pmap, start,
820                                     object, OFF_TO_IDX(offset), end - start,
821                                     cow & MAP_PREFAULT_PARTIAL);
822         }
823
824         return (KERN_SUCCESS);
825 }
826
827 /*
828  * Find sufficient space for `length' bytes in the given map, starting at
829  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
830  */
831 int
832 vm_map_findspace(
833         vm_map_t map,
834         vm_offset_t start,
835         vm_size_t length,
836         vm_offset_t *addr)
837 {
838         vm_map_entry_t entry, next;
839         vm_offset_t end;
840
841         GIANT_REQUIRED;
842         if (start < map->min_offset)
843                 start = map->min_offset;
844         if (start > map->max_offset)
845                 return (1);
846
847         /*
848          * Look for the first possible address; if there's already something
849          * at this address, we have to start after it.
850          */
851         if (start == map->min_offset) {
852                 if ((entry = map->first_free) != &map->header)
853                         start = entry->end;
854         } else {
855                 vm_map_entry_t tmp;
856
857                 if (vm_map_lookup_entry(map, start, &tmp))
858                         start = tmp->end;
859                 entry = tmp;
860         }
861
862         /*
863          * Look through the rest of the map, trying to fit a new region in the
864          * gap between existing regions, or after the very last region.
865          */
866         for (;; start = (entry = next)->end) {
867                 /*
868                  * Find the end of the proposed new region.  Be sure we didn't
869                  * go beyond the end of the map, or wrap around the address;
870                  * if so, we lose.  Otherwise, if this is the last entry, or
871                  * if the proposed new region fits before the next entry, we
872                  * win.
873                  */
874                 end = start + length;
875                 if (end > map->max_offset || end < start)
876                         return (1);
877                 next = entry->next;
878                 if (next == &map->header || next->start >= end)
879                         break;
880         }
881         SAVE_HINT(map, entry);
882         *addr = start;
883         if (map == kernel_map) {
884                 vm_offset_t ksize;
885                 if ((ksize = round_page(start + length)) > kernel_vm_end) {
886                         pmap_growkernel(ksize);
887                 }
888         }
889         return (0);
890 }
891
892 /*
893  *      vm_map_find finds an unallocated region in the target address
894  *      map with the given length.  The search is defined to be
895  *      first-fit from the specified address; the region found is
896  *      returned in the same parameter.
897  *
898  *      If object is non-NULL, ref count must be bumped by caller
899  *      prior to making call to account for the new entry.
900  */
901 int
902 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
903             vm_offset_t *addr,  /* IN/OUT */
904             vm_size_t length, boolean_t find_space, vm_prot_t prot,
905             vm_prot_t max, int cow)
906 {
907         vm_offset_t start;
908         int result, s = 0;
909
910         GIANT_REQUIRED;
911
912         start = *addr;
913
914         if (map == kmem_map)
915                 s = splvm();
916
917         vm_map_lock(map);
918         if (find_space) {
919                 if (vm_map_findspace(map, start, length, addr)) {
920                         vm_map_unlock(map);
921                         if (map == kmem_map)
922                                 splx(s);
923                         return (KERN_NO_SPACE);
924                 }
925                 start = *addr;
926         }
927         result = vm_map_insert(map, object, offset,
928                 start, start + length, prot, max, cow);
929         vm_map_unlock(map);
930
931         if (map == kmem_map)
932                 splx(s);
933
934         return (result);
935 }
936
937 /*
938  *      vm_map_simplify_entry:
939  *
940  *      Simplify the given map entry by merging with either neighbor.  This
941  *      routine also has the ability to merge with both neighbors.
942  *
943  *      The map must be locked.
944  *
945  *      This routine guarentees that the passed entry remains valid (though
946  *      possibly extended).  When merging, this routine may delete one or
947  *      both neighbors.
948  */
949 void
950 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
951 {
952         vm_map_entry_t next, prev;
953         vm_size_t prevsize, esize;
954
955         GIANT_REQUIRED;
956
957         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
958                 return;
959
960         prev = entry->prev;
961         if (prev != &map->header) {
962                 prevsize = prev->end - prev->start;
963                 if ( (prev->end == entry->start) &&
964                      (prev->object.vm_object == entry->object.vm_object) &&
965                      (!prev->object.vm_object ||
966                         (prev->offset + prevsize == entry->offset)) &&
967                      (prev->eflags == entry->eflags) &&
968                      (prev->protection == entry->protection) &&
969                      (prev->max_protection == entry->max_protection) &&
970                      (prev->inheritance == entry->inheritance) &&
971                      (prev->wired_count == entry->wired_count)) {
972                         if (map->first_free == prev)
973                                 map->first_free = entry;
974                         if (map->hint == prev)
975                                 map->hint = entry;
976                         vm_map_entry_unlink(map, prev);
977                         entry->start = prev->start;
978                         entry->offset = prev->offset;
979                         if (prev->object.vm_object)
980                                 vm_object_deallocate(prev->object.vm_object);
981                         vm_map_entry_dispose(map, prev);
982                 }
983         }
984
985         next = entry->next;
986         if (next != &map->header) {
987                 esize = entry->end - entry->start;
988                 if ((entry->end == next->start) &&
989                     (next->object.vm_object == entry->object.vm_object) &&
990                      (!entry->object.vm_object ||
991                         (entry->offset + esize == next->offset)) &&
992                     (next->eflags == entry->eflags) &&
993                     (next->protection == entry->protection) &&
994                     (next->max_protection == entry->max_protection) &&
995                     (next->inheritance == entry->inheritance) &&
996                     (next->wired_count == entry->wired_count)) {
997                         if (map->first_free == next)
998                                 map->first_free = entry;
999                         if (map->hint == next)
1000                                 map->hint = entry;
1001                         vm_map_entry_unlink(map, next);
1002                         entry->end = next->end;
1003                         if (next->object.vm_object)
1004                                 vm_object_deallocate(next->object.vm_object);
1005                         vm_map_entry_dispose(map, next);
1006                 }
1007         }
1008 }
1009 /*
1010  *      vm_map_clip_start:      [ internal use only ]
1011  *
1012  *      Asserts that the given entry begins at or after
1013  *      the specified address; if necessary,
1014  *      it splits the entry into two.
1015  */
1016 #define vm_map_clip_start(map, entry, startaddr) \
1017 { \
1018         if (startaddr > entry->start) \
1019                 _vm_map_clip_start(map, entry, startaddr); \
1020 }
1021
1022 /*
1023  *      This routine is called only when it is known that
1024  *      the entry must be split.
1025  */
1026 static void
1027 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
1028 {
1029         vm_map_entry_t new_entry;
1030
1031         /*
1032          * Split off the front portion -- note that we must insert the new
1033          * entry BEFORE this one, so that this entry has the specified
1034          * starting address.
1035          */
1036         vm_map_simplify_entry(map, entry);
1037
1038         /*
1039          * If there is no object backing this entry, we might as well create
1040          * one now.  If we defer it, an object can get created after the map
1041          * is clipped, and individual objects will be created for the split-up
1042          * map.  This is a bit of a hack, but is also about the best place to
1043          * put this improvement.
1044          */
1045         if (entry->object.vm_object == NULL && !map->system_map) {
1046                 vm_object_t object;
1047                 object = vm_object_allocate(OBJT_DEFAULT,
1048                                 atop(entry->end - entry->start));
1049                 entry->object.vm_object = object;
1050                 entry->offset = 0;
1051         }
1052
1053         new_entry = vm_map_entry_create(map);
1054         *new_entry = *entry;
1055
1056         new_entry->end = start;
1057         entry->offset += (start - entry->start);
1058         entry->start = start;
1059
1060         vm_map_entry_link(map, entry->prev, new_entry);
1061
1062         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1063                 vm_object_reference(new_entry->object.vm_object);
1064         }
1065 }
1066
1067 /*
1068  *      vm_map_clip_end:        [ internal use only ]
1069  *
1070  *      Asserts that the given entry ends at or before
1071  *      the specified address; if necessary,
1072  *      it splits the entry into two.
1073  */
1074 #define vm_map_clip_end(map, entry, endaddr) \
1075 { \
1076         if (endaddr < entry->end) \
1077                 _vm_map_clip_end(map, entry, endaddr); \
1078 }
1079
1080 /*
1081  *      This routine is called only when it is known that
1082  *      the entry must be split.
1083  */
1084 static void
1085 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1086 {
1087         vm_map_entry_t new_entry;
1088
1089         /*
1090          * If there is no object backing this entry, we might as well create
1091          * one now.  If we defer it, an object can get created after the map
1092          * is clipped, and individual objects will be created for the split-up
1093          * map.  This is a bit of a hack, but is also about the best place to
1094          * put this improvement.
1095          */
1096         if (entry->object.vm_object == NULL && !map->system_map) {
1097                 vm_object_t object;
1098                 object = vm_object_allocate(OBJT_DEFAULT,
1099                                 atop(entry->end - entry->start));
1100                 entry->object.vm_object = object;
1101                 entry->offset = 0;
1102         }
1103
1104         /*
1105          * Create a new entry and insert it AFTER the specified entry
1106          */
1107         new_entry = vm_map_entry_create(map);
1108         *new_entry = *entry;
1109
1110         new_entry->start = entry->end = end;
1111         new_entry->offset += (end - entry->start);
1112
1113         vm_map_entry_link(map, entry, new_entry);
1114
1115         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1116                 vm_object_reference(new_entry->object.vm_object);
1117         }
1118 }
1119
1120 /*
1121  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
1122  *
1123  *      Asserts that the starting and ending region
1124  *      addresses fall within the valid range of the map.
1125  */
1126 #define VM_MAP_RANGE_CHECK(map, start, end)             \
1127                 {                                       \
1128                 if (start < vm_map_min(map))            \
1129                         start = vm_map_min(map);        \
1130                 if (end > vm_map_max(map))              \
1131                         end = vm_map_max(map);          \
1132                 if (start > end)                        \
1133                         start = end;                    \
1134                 }
1135
1136 /*
1137  *      vm_map_submap:          [ kernel use only ]
1138  *
1139  *      Mark the given range as handled by a subordinate map.
1140  *
1141  *      This range must have been created with vm_map_find,
1142  *      and no other operations may have been performed on this
1143  *      range prior to calling vm_map_submap.
1144  *
1145  *      Only a limited number of operations can be performed
1146  *      within this rage after calling vm_map_submap:
1147  *              vm_fault
1148  *      [Don't try vm_map_copy!]
1149  *
1150  *      To remove a submapping, one must first remove the
1151  *      range from the superior map, and then destroy the
1152  *      submap (if desired).  [Better yet, don't try it.]
1153  */
1154 int
1155 vm_map_submap(
1156         vm_map_t map,
1157         vm_offset_t start,
1158         vm_offset_t end,
1159         vm_map_t submap)
1160 {
1161         vm_map_entry_t entry;
1162         int result = KERN_INVALID_ARGUMENT;
1163
1164         GIANT_REQUIRED;
1165
1166         vm_map_lock(map);
1167
1168         VM_MAP_RANGE_CHECK(map, start, end);
1169
1170         if (vm_map_lookup_entry(map, start, &entry)) {
1171                 vm_map_clip_start(map, entry, start);
1172         } else
1173                 entry = entry->next;
1174
1175         vm_map_clip_end(map, entry, end);
1176
1177         if ((entry->start == start) && (entry->end == end) &&
1178             ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1179             (entry->object.vm_object == NULL)) {
1180                 entry->object.sub_map = submap;
1181                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1182                 result = KERN_SUCCESS;
1183         }
1184         vm_map_unlock(map);
1185
1186         return (result);
1187 }
1188
1189 /*
1190  *      vm_map_protect:
1191  *
1192  *      Sets the protection of the specified address
1193  *      region in the target map.  If "set_max" is
1194  *      specified, the maximum protection is to be set;
1195  *      otherwise, only the current protection is affected.
1196  */
1197 int
1198 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1199                vm_prot_t new_prot, boolean_t set_max)
1200 {
1201         vm_map_entry_t current;
1202         vm_map_entry_t entry;
1203
1204         GIANT_REQUIRED;
1205         vm_map_lock(map);
1206
1207         VM_MAP_RANGE_CHECK(map, start, end);
1208
1209         if (vm_map_lookup_entry(map, start, &entry)) {
1210                 vm_map_clip_start(map, entry, start);
1211         } else {
1212                 entry = entry->next;
1213         }
1214
1215         /*
1216          * Make a first pass to check for protection violations.
1217          */
1218         current = entry;
1219         while ((current != &map->header) && (current->start < end)) {
1220                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1221                         vm_map_unlock(map);
1222                         return (KERN_INVALID_ARGUMENT);
1223                 }
1224                 if ((new_prot & current->max_protection) != new_prot) {
1225                         vm_map_unlock(map);
1226                         return (KERN_PROTECTION_FAILURE);
1227                 }
1228                 current = current->next;
1229         }
1230
1231         /*
1232          * Go back and fix up protections. [Note that clipping is not
1233          * necessary the second time.]
1234          */
1235         current = entry;
1236         while ((current != &map->header) && (current->start < end)) {
1237                 vm_prot_t old_prot;
1238
1239                 vm_map_clip_end(map, current, end);
1240
1241                 old_prot = current->protection;
1242                 if (set_max)
1243                         current->protection =
1244                             (current->max_protection = new_prot) &
1245                             old_prot;
1246                 else
1247                         current->protection = new_prot;
1248
1249                 /*
1250                  * Update physical map if necessary. Worry about copy-on-write
1251                  * here -- CHECK THIS XXX
1252                  */
1253                 if (current->protection != old_prot) {
1254 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1255                                                         VM_PROT_ALL)
1256                         pmap_protect(map->pmap, current->start,
1257                             current->end,
1258                             current->protection & MASK(current));
1259 #undef  MASK
1260                 }
1261                 vm_map_simplify_entry(map, current);
1262                 current = current->next;
1263         }
1264         vm_map_unlock(map);
1265         return (KERN_SUCCESS);
1266 }
1267
1268 /*
1269  *      vm_map_madvise:
1270  *
1271  *      This routine traverses a processes map handling the madvise
1272  *      system call.  Advisories are classified as either those effecting
1273  *      the vm_map_entry structure, or those effecting the underlying 
1274  *      objects.
1275  */
1276 int
1277 vm_map_madvise(
1278         vm_map_t map,
1279         vm_offset_t start, 
1280         vm_offset_t end,
1281         int behav)
1282 {
1283         vm_map_entry_t current, entry;
1284         int modify_map = 0;
1285
1286         GIANT_REQUIRED;
1287
1288         /*
1289          * Some madvise calls directly modify the vm_map_entry, in which case
1290          * we need to use an exclusive lock on the map and we need to perform 
1291          * various clipping operations.  Otherwise we only need a read-lock
1292          * on the map.
1293          */
1294         switch(behav) {
1295         case MADV_NORMAL:
1296         case MADV_SEQUENTIAL:
1297         case MADV_RANDOM:
1298         case MADV_NOSYNC:
1299         case MADV_AUTOSYNC:
1300         case MADV_NOCORE:
1301         case MADV_CORE:
1302                 modify_map = 1;
1303                 vm_map_lock(map);
1304                 break;
1305         case MADV_WILLNEED:
1306         case MADV_DONTNEED:
1307         case MADV_FREE:
1308                 vm_map_lock_read(map);
1309                 break;
1310         default:
1311                 return (KERN_INVALID_ARGUMENT);
1312         }
1313
1314         /*
1315          * Locate starting entry and clip if necessary.
1316          */
1317         VM_MAP_RANGE_CHECK(map, start, end);
1318
1319         if (vm_map_lookup_entry(map, start, &entry)) {
1320                 if (modify_map)
1321                         vm_map_clip_start(map, entry, start);
1322         } else {
1323                 entry = entry->next;
1324         }
1325
1326         if (modify_map) {
1327                 /*
1328                  * madvise behaviors that are implemented in the vm_map_entry.
1329                  *
1330                  * We clip the vm_map_entry so that behavioral changes are
1331                  * limited to the specified address range.
1332                  */
1333                 for (current = entry;
1334                      (current != &map->header) && (current->start < end);
1335                      current = current->next
1336                 ) {
1337                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1338                                 continue;
1339
1340                         vm_map_clip_end(map, current, end);
1341
1342                         switch (behav) {
1343                         case MADV_NORMAL:
1344                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1345                                 break;
1346                         case MADV_SEQUENTIAL:
1347                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1348                                 break;
1349                         case MADV_RANDOM:
1350                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1351                                 break;
1352                         case MADV_NOSYNC:
1353                                 current->eflags |= MAP_ENTRY_NOSYNC;
1354                                 break;
1355                         case MADV_AUTOSYNC:
1356                                 current->eflags &= ~MAP_ENTRY_NOSYNC;
1357                                 break;
1358                         case MADV_NOCORE:
1359                                 current->eflags |= MAP_ENTRY_NOCOREDUMP;
1360                                 break;
1361                         case MADV_CORE:
1362                                 current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1363                                 break;
1364                         default:
1365                                 break;
1366                         }
1367                         vm_map_simplify_entry(map, current);
1368                 }
1369                 vm_map_unlock(map);
1370         } else {
1371                 vm_pindex_t pindex;
1372                 int count;
1373
1374                 /*
1375                  * madvise behaviors that are implemented in the underlying
1376                  * vm_object.
1377                  *
1378                  * Since we don't clip the vm_map_entry, we have to clip
1379                  * the vm_object pindex and count.
1380                  */
1381                 for (current = entry;
1382                      (current != &map->header) && (current->start < end);
1383                      current = current->next
1384                 ) {
1385                         vm_offset_t useStart;
1386
1387                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1388                                 continue;
1389
1390                         pindex = OFF_TO_IDX(current->offset);
1391                         count = atop(current->end - current->start);
1392                         useStart = current->start;
1393
1394                         if (current->start < start) {
1395                                 pindex += atop(start - current->start);
1396                                 count -= atop(start - current->start);
1397                                 useStart = start;
1398                         }
1399                         if (current->end > end)
1400                                 count -= atop(current->end - end);
1401
1402                         if (count <= 0)
1403                                 continue;
1404
1405                         vm_object_madvise(current->object.vm_object,
1406                                           pindex, count, behav);
1407                         if (behav == MADV_WILLNEED) {
1408                                 pmap_object_init_pt(
1409                                     map->pmap, 
1410                                     useStart,
1411                                     current->object.vm_object,
1412                                     pindex, 
1413                                     (count << PAGE_SHIFT),
1414                                     MAP_PREFAULT_MADVISE
1415                                 );
1416                         }
1417                 }
1418                 vm_map_unlock_read(map);
1419         }
1420         return (0);
1421 }       
1422
1423
1424 /*
1425  *      vm_map_inherit:
1426  *
1427  *      Sets the inheritance of the specified address
1428  *      range in the target map.  Inheritance
1429  *      affects how the map will be shared with
1430  *      child maps at the time of vm_map_fork.
1431  */
1432 int
1433 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1434                vm_inherit_t new_inheritance)
1435 {
1436         vm_map_entry_t entry;
1437         vm_map_entry_t temp_entry;
1438
1439         GIANT_REQUIRED;
1440
1441         switch (new_inheritance) {
1442         case VM_INHERIT_NONE:
1443         case VM_INHERIT_COPY:
1444         case VM_INHERIT_SHARE:
1445                 break;
1446         default:
1447                 return (KERN_INVALID_ARGUMENT);
1448         }
1449
1450         vm_map_lock(map);
1451
1452         VM_MAP_RANGE_CHECK(map, start, end);
1453
1454         if (vm_map_lookup_entry(map, start, &temp_entry)) {
1455                 entry = temp_entry;
1456                 vm_map_clip_start(map, entry, start);
1457         } else
1458                 entry = temp_entry->next;
1459
1460         while ((entry != &map->header) && (entry->start < end)) {
1461                 vm_map_clip_end(map, entry, end);
1462
1463                 entry->inheritance = new_inheritance;
1464
1465                 vm_map_simplify_entry(map, entry);
1466
1467                 entry = entry->next;
1468         }
1469
1470         vm_map_unlock(map);
1471         return (KERN_SUCCESS);
1472 }
1473
1474 /*
1475  * Implement the semantics of mlock
1476  */
1477 int
1478 vm_map_user_pageable(
1479         vm_map_t map,
1480         vm_offset_t start,
1481         vm_offset_t end,
1482         boolean_t new_pageable)
1483 {
1484         vm_map_entry_t entry;
1485         vm_map_entry_t start_entry;
1486         vm_offset_t estart;
1487         vm_offset_t eend;
1488         int rv;
1489
1490         vm_map_lock(map);
1491         VM_MAP_RANGE_CHECK(map, start, end);
1492
1493         if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1494                 vm_map_unlock(map);
1495                 return (KERN_INVALID_ADDRESS);
1496         }
1497
1498         if (new_pageable) {
1499
1500                 entry = start_entry;
1501                 vm_map_clip_start(map, entry, start);
1502
1503                 /*
1504                  * Now decrement the wiring count for each region. If a region
1505                  * becomes completely unwired, unwire its physical pages and
1506                  * mappings.
1507                  */
1508                 while ((entry != &map->header) && (entry->start < end)) {
1509                         if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1510                                 vm_map_clip_end(map, entry, end);
1511                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1512                                 entry->wired_count--;
1513                                 if (entry->wired_count == 0)
1514                                         vm_fault_unwire(map, entry->start, entry->end);
1515                         }
1516                         vm_map_simplify_entry(map,entry);
1517                         entry = entry->next;
1518                 }
1519         } else {
1520
1521                 entry = start_entry;
1522
1523                 while ((entry != &map->header) && (entry->start < end)) {
1524
1525                         if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1526                                 entry = entry->next;
1527                                 continue;
1528                         }
1529                         
1530                         if (entry->wired_count != 0) {
1531                                 entry->wired_count++;
1532                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
1533                                 entry = entry->next;
1534                                 continue;
1535                         }
1536
1537                         /* Here on entry being newly wired */
1538
1539                         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1540                                 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1541                                 if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
1542
1543                                         vm_object_shadow(&entry->object.vm_object,
1544                                             &entry->offset,
1545                                             atop(entry->end - entry->start));
1546                                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1547
1548                                 } else if (entry->object.vm_object == NULL &&
1549                                            !map->system_map) {
1550
1551                                         entry->object.vm_object =
1552                                             vm_object_allocate(OBJT_DEFAULT,
1553                                                 atop(entry->end - entry->start));
1554                                         entry->offset = (vm_offset_t) 0;
1555
1556                                 }
1557                         }
1558
1559                         vm_map_clip_start(map, entry, start);
1560                         vm_map_clip_end(map, entry, end);
1561
1562                         entry->wired_count++;
1563                         entry->eflags |= MAP_ENTRY_USER_WIRED;
1564                         estart = entry->start;
1565                         eend = entry->end;
1566
1567                         /* First we need to allow map modifications */
1568                         vm_map_set_recursive(map);
1569                         vm_map_lock_downgrade(map);
1570                         map->timestamp++;
1571
1572                         rv = vm_fault_user_wire(map, entry->start, entry->end);
1573                         if (rv) {
1574
1575                                 entry->wired_count--;
1576                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1577
1578                                 vm_map_clear_recursive(map);
1579                                 vm_map_unlock(map);
1580                         
1581                                 /*
1582                                  * At this point, the map is unlocked, and
1583                                  * entry might no longer be valid.  Use copy
1584                                  * of entry start value obtained while entry
1585                                  * was valid.
1586                                  */
1587                                 (void) vm_map_user_pageable(map, start, estart,
1588                                                             TRUE);
1589                                 return rv;
1590                         }
1591
1592                         vm_map_clear_recursive(map);
1593                         if (vm_map_lock_upgrade(map)) {
1594                                 vm_map_lock(map);
1595                                 if (vm_map_lookup_entry(map, estart, &entry) 
1596                                     == FALSE) {
1597                                         vm_map_unlock(map);
1598                                         /* 
1599                                          * vm_fault_user_wire succeded, thus
1600                                          * the area between start and eend
1601                                          * is wired and has to be unwired
1602                                          * here as part of the cleanup.
1603                                          */
1604                                         (void) vm_map_user_pageable(map,
1605                                                                     start,
1606                                                                     eend,
1607                                                                     TRUE);
1608                                         return (KERN_INVALID_ADDRESS);
1609                                 }
1610                         }
1611                         vm_map_simplify_entry(map,entry);
1612                 }
1613         }
1614         map->timestamp++;
1615         vm_map_unlock(map);
1616         return KERN_SUCCESS;
1617 }
1618
1619 /*
1620  *      vm_map_pageable:
1621  *
1622  *      Sets the pageability of the specified address
1623  *      range in the target map.  Regions specified
1624  *      as not pageable require locked-down physical
1625  *      memory and physical page maps.
1626  *
1627  *      The map must not be locked, but a reference
1628  *      must remain to the map throughout the call.
1629  */
1630 int
1631 vm_map_pageable(
1632         vm_map_t map,
1633         vm_offset_t start,
1634         vm_offset_t end,
1635         boolean_t new_pageable)
1636 {
1637         vm_map_entry_t entry;
1638         vm_map_entry_t start_entry;
1639         vm_offset_t failed = 0;
1640         int rv;
1641
1642         GIANT_REQUIRED;
1643
1644         vm_map_lock(map);
1645
1646         VM_MAP_RANGE_CHECK(map, start, end);
1647
1648         /*
1649          * Only one pageability change may take place at one time, since
1650          * vm_fault assumes it will be called only once for each
1651          * wiring/unwiring.  Therefore, we have to make sure we're actually
1652          * changing the pageability for the entire region.  We do so before
1653          * making any changes.
1654          */
1655         if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1656                 vm_map_unlock(map);
1657                 return (KERN_INVALID_ADDRESS);
1658         }
1659         entry = start_entry;
1660
1661         /*
1662          * Actions are rather different for wiring and unwiring, so we have
1663          * two separate cases.
1664          */
1665         if (new_pageable) {
1666                 vm_map_clip_start(map, entry, start);
1667
1668                 /*
1669                  * Unwiring.  First ensure that the range to be unwired is
1670                  * really wired down and that there are no holes.
1671                  */
1672                 while ((entry != &map->header) && (entry->start < end)) {
1673                         if (entry->wired_count == 0 ||
1674                             (entry->end < end &&
1675                                 (entry->next == &map->header ||
1676                                     entry->next->start > entry->end))) {
1677                                 vm_map_unlock(map);
1678                                 return (KERN_INVALID_ARGUMENT);
1679                         }
1680                         entry = entry->next;
1681                 }
1682
1683                 /*
1684                  * Now decrement the wiring count for each region. If a region
1685                  * becomes completely unwired, unwire its physical pages and
1686                  * mappings.
1687                  */
1688                 entry = start_entry;
1689                 while ((entry != &map->header) && (entry->start < end)) {
1690                         vm_map_clip_end(map, entry, end);
1691
1692                         entry->wired_count--;
1693                         if (entry->wired_count == 0)
1694                                 vm_fault_unwire(map, entry->start, entry->end);
1695
1696                         vm_map_simplify_entry(map, entry);
1697
1698                         entry = entry->next;
1699                 }
1700         } else {
1701                 /*
1702                  * Wiring.  We must do this in two passes:
1703                  *
1704                  * 1.  Holding the write lock, we create any shadow or zero-fill
1705                  * objects that need to be created. Then we clip each map
1706                  * entry to the region to be wired and increment its wiring
1707                  * count.  We create objects before clipping the map entries
1708                  * to avoid object proliferation.
1709                  *
1710                  * 2.  We downgrade to a read lock, and call vm_fault_wire to
1711                  * fault in the pages for any newly wired area (wired_count is
1712                  * 1).
1713                  *
1714                  * Downgrading to a read lock for vm_fault_wire avoids a possible
1715                  * deadlock with another process that may have faulted on one
1716                  * of the pages to be wired (it would mark the page busy,
1717                  * blocking us, then in turn block on the map lock that we
1718                  * hold).  Because of problems in the recursive lock package,
1719                  * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
1720                  * any actions that require the write lock must be done
1721                  * beforehand.  Because we keep the read lock on the map, the
1722                  * copy-on-write status of the entries we modify here cannot
1723                  * change.
1724                  */
1725
1726                 /*
1727                  * Pass 1.
1728                  */
1729                 while ((entry != &map->header) && (entry->start < end)) {
1730                         if (entry->wired_count == 0) {
1731
1732                                 /*
1733                                  * Perform actions of vm_map_lookup that need
1734                                  * the write lock on the map: create a shadow
1735                                  * object for a copy-on-write region, or an
1736                                  * object for a zero-fill region.
1737                                  *
1738                                  * We don't have to do this for entries that
1739                                  * point to sub maps, because we won't
1740                                  * hold the lock on the sub map.
1741                                  */
1742                                 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1743                                         int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1744                                         if (copyflag &&
1745                                             ((entry->protection & VM_PROT_WRITE) != 0)) {
1746
1747                                                 vm_object_shadow(&entry->object.vm_object,
1748                                                     &entry->offset,
1749                                                     atop(entry->end - entry->start));
1750                                                 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1751                                         } else if (entry->object.vm_object == NULL &&
1752                                                    !map->system_map) {
1753                                                 entry->object.vm_object =
1754                                                     vm_object_allocate(OBJT_DEFAULT,
1755                                                         atop(entry->end - entry->start));
1756                                                 entry->offset = (vm_offset_t) 0;
1757                                         }
1758                                 }
1759                         }
1760                         vm_map_clip_start(map, entry, start);
1761                         vm_map_clip_end(map, entry, end);
1762                         entry->wired_count++;
1763
1764                         /*
1765                          * Check for holes
1766                          */
1767                         if (entry->end < end &&
1768                             (entry->next == &map->header ||
1769                                 entry->next->start > entry->end)) {
1770                                 /*
1771                                  * Found one.  Object creation actions do not
1772                                  * need to be undone, but the wired counts
1773                                  * need to be restored.
1774                                  */
1775                                 while (entry != &map->header && entry->end > start) {
1776                                         entry->wired_count--;
1777                                         entry = entry->prev;
1778                                 }
1779                                 vm_map_unlock(map);
1780                                 return (KERN_INVALID_ARGUMENT);
1781                         }
1782                         entry = entry->next;
1783                 }
1784
1785                 /*
1786                  * Pass 2.
1787                  */
1788
1789                 /*
1790                  * HACK HACK HACK HACK
1791                  *
1792                  * If we are wiring in the kernel map or a submap of it,
1793                  * unlock the map to avoid deadlocks.  We trust that the
1794                  * kernel is well-behaved, and therefore will not do
1795                  * anything destructive to this region of the map while
1796                  * we have it unlocked.  We cannot trust user processes
1797                  * to do the same.
1798                  *
1799                  * HACK HACK HACK HACK
1800                  */
1801                 if (vm_map_pmap(map) == kernel_pmap) {
1802                         vm_map_unlock(map);     /* trust me ... */
1803                 } else {
1804                         vm_map_lock_downgrade(map);
1805                 }
1806
1807                 rv = 0;
1808                 entry = start_entry;
1809                 while (entry != &map->header && entry->start < end) {
1810                         /*
1811                          * If vm_fault_wire fails for any page we need to undo
1812                          * what has been done.  We decrement the wiring count
1813                          * for those pages which have not yet been wired (now)
1814                          * and unwire those that have (later).
1815                          *
1816                          * XXX this violates the locking protocol on the map,
1817                          * needs to be fixed.
1818                          */
1819                         if (rv)
1820                                 entry->wired_count--;
1821                         else if (entry->wired_count == 1) {
1822                                 rv = vm_fault_wire(map, entry->start, entry->end);
1823                                 if (rv) {
1824                                         failed = entry->start;
1825                                         entry->wired_count--;
1826                                 }
1827                         }
1828                         entry = entry->next;
1829                 }
1830
1831                 if (vm_map_pmap(map) == kernel_pmap) {
1832                         vm_map_lock(map);
1833                 }
1834                 if (rv) {
1835                         vm_map_unlock(map);
1836                         (void) vm_map_pageable(map, start, failed, TRUE);
1837                         return (rv);
1838                 }
1839                 /*
1840                  * An exclusive lock on the map is needed in order to call
1841                  * vm_map_simplify_entry().  If the current lock on the map
1842                  * is only a shared lock, an upgrade is needed.
1843                  */
1844                 if (vm_map_pmap(map) != kernel_pmap &&
1845                     vm_map_lock_upgrade(map)) {
1846                         vm_map_lock(map);
1847                         if (vm_map_lookup_entry(map, start, &start_entry) ==
1848                             FALSE) {
1849                                 vm_map_unlock(map);
1850                                 return KERN_SUCCESS;
1851                         }
1852                 }
1853                 vm_map_simplify_entry(map, start_entry);
1854         }
1855
1856         vm_map_unlock(map);
1857
1858         return (KERN_SUCCESS);
1859 }
1860
1861 /*
1862  * vm_map_clean
1863  *
1864  * Push any dirty cached pages in the address range to their pager.
1865  * If syncio is TRUE, dirty pages are written synchronously.
1866  * If invalidate is TRUE, any cached pages are freed as well.
1867  *
1868  * Returns an error if any part of the specified range is not mapped.
1869  */
1870 int
1871 vm_map_clean(
1872         vm_map_t map,
1873         vm_offset_t start,
1874         vm_offset_t end,
1875         boolean_t syncio,
1876         boolean_t invalidate)
1877 {
1878         vm_map_entry_t current;
1879         vm_map_entry_t entry;
1880         vm_size_t size;
1881         vm_object_t object;
1882         vm_ooffset_t offset;
1883
1884         GIANT_REQUIRED;
1885
1886         vm_map_lock_read(map);
1887         VM_MAP_RANGE_CHECK(map, start, end);
1888         if (!vm_map_lookup_entry(map, start, &entry)) {
1889                 vm_map_unlock_read(map);
1890                 return (KERN_INVALID_ADDRESS);
1891         }
1892         /*
1893          * Make a first pass to check for holes.
1894          */
1895         for (current = entry; current->start < end; current = current->next) {
1896                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1897                         vm_map_unlock_read(map);
1898                         return (KERN_INVALID_ARGUMENT);
1899                 }
1900                 if (end > current->end &&
1901                     (current->next == &map->header ||
1902                         current->end != current->next->start)) {
1903                         vm_map_unlock_read(map);
1904                         return (KERN_INVALID_ADDRESS);
1905                 }
1906         }
1907
1908         if (invalidate)
1909                 pmap_remove(vm_map_pmap(map), start, end);
1910         /*
1911          * Make a second pass, cleaning/uncaching pages from the indicated
1912          * objects as we go.
1913          */
1914         for (current = entry; current->start < end; current = current->next) {
1915                 offset = current->offset + (start - current->start);
1916                 size = (end <= current->end ? end : current->end) - start;
1917                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1918                         vm_map_t smap;
1919                         vm_map_entry_t tentry;
1920                         vm_size_t tsize;
1921
1922                         smap = current->object.sub_map;
1923                         vm_map_lock_read(smap);
1924                         (void) vm_map_lookup_entry(smap, offset, &tentry);
1925                         tsize = tentry->end - offset;
1926                         if (tsize < size)
1927                                 size = tsize;
1928                         object = tentry->object.vm_object;
1929                         offset = tentry->offset + (offset - tentry->start);
1930                         vm_map_unlock_read(smap);
1931                 } else {
1932                         object = current->object.vm_object;
1933                 }
1934                 /*
1935                  * Note that there is absolutely no sense in writing out
1936                  * anonymous objects, so we track down the vnode object
1937                  * to write out.
1938                  * We invalidate (remove) all pages from the address space
1939                  * anyway, for semantic correctness.
1940                  *
1941                  * note: certain anonymous maps, such as MAP_NOSYNC maps,
1942                  * may start out with a NULL object.
1943                  */
1944                 while (object && object->backing_object) {
1945                         object = object->backing_object;
1946                         offset += object->backing_object_offset;
1947                         if (object->size < OFF_TO_IDX(offset + size))
1948                                 size = IDX_TO_OFF(object->size) - offset;
1949                 }
1950                 if (object && (object->type == OBJT_VNODE) && 
1951                     (current->protection & VM_PROT_WRITE)) {
1952                         /*
1953                          * Flush pages if writing is allowed, invalidate them
1954                          * if invalidation requested.  Pages undergoing I/O
1955                          * will be ignored by vm_object_page_remove().
1956                          *
1957                          * We cannot lock the vnode and then wait for paging
1958                          * to complete without deadlocking against vm_fault.
1959                          * Instead we simply call vm_object_page_remove() and
1960                          * allow it to block internally on a page-by-page 
1961                          * basis when it encounters pages undergoing async 
1962                          * I/O.
1963                          */
1964                         int flags;
1965
1966                         vm_object_reference(object);
1967                         vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread);
1968                         flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1969                         flags |= invalidate ? OBJPC_INVAL : 0;
1970                         vm_object_page_clean(object,
1971                             OFF_TO_IDX(offset),
1972                             OFF_TO_IDX(offset + size + PAGE_MASK),
1973                             flags);
1974                         if (invalidate) {
1975                                 /*vm_object_pip_wait(object, "objmcl");*/
1976                                 vm_object_page_remove(object,
1977                                     OFF_TO_IDX(offset),
1978                                     OFF_TO_IDX(offset + size + PAGE_MASK),
1979                                     FALSE);
1980                         }
1981                         VOP_UNLOCK(object->handle, 0, curthread);
1982                         vm_object_deallocate(object);
1983                 }
1984                 start += size;
1985         }
1986
1987         vm_map_unlock_read(map);
1988         return (KERN_SUCCESS);
1989 }
1990
1991 /*
1992  *      vm_map_entry_unwire:    [ internal use only ]
1993  *
1994  *      Make the region specified by this entry pageable.
1995  *
1996  *      The map in question should be locked.
1997  *      [This is the reason for this routine's existence.]
1998  */
1999 static void 
2000 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
2001 {
2002         vm_fault_unwire(map, entry->start, entry->end);
2003         entry->wired_count = 0;
2004 }
2005
2006 /*
2007  *      vm_map_entry_delete:    [ internal use only ]
2008  *
2009  *      Deallocate the given entry from the target map.
2010  */
2011 static void
2012 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
2013 {
2014         vm_map_entry_unlink(map, entry);
2015         map->size -= entry->end - entry->start;
2016
2017         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
2018                 vm_object_deallocate(entry->object.vm_object);
2019         }
2020
2021         vm_map_entry_dispose(map, entry);
2022 }
2023
2024 /*
2025  *      vm_map_delete:  [ internal use only ]
2026  *
2027  *      Deallocates the given address range from the target
2028  *      map.
2029  */
2030 int
2031 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
2032 {
2033         vm_object_t object;
2034         vm_map_entry_t entry;
2035         vm_map_entry_t first_entry;
2036
2037         GIANT_REQUIRED;
2038
2039         /*
2040          * Find the start of the region, and clip it
2041          */
2042         if (!vm_map_lookup_entry(map, start, &first_entry))
2043                 entry = first_entry->next;
2044         else {
2045                 entry = first_entry;
2046                 vm_map_clip_start(map, entry, start);
2047                 /*
2048                  * Fix the lookup hint now, rather than each time though the
2049                  * loop.
2050                  */
2051                 SAVE_HINT(map, entry->prev);
2052         }
2053
2054         /*
2055          * Save the free space hint
2056          */
2057         if (entry == &map->header) {
2058                 map->first_free = &map->header;
2059         } else if (map->first_free->start >= start) {
2060                 map->first_free = entry->prev;
2061         }
2062
2063         /*
2064          * Step through all entries in this region
2065          */
2066         while ((entry != &map->header) && (entry->start < end)) {
2067                 vm_map_entry_t next;
2068                 vm_offset_t s, e;
2069                 vm_pindex_t offidxstart, offidxend, count;
2070
2071                 vm_map_clip_end(map, entry, end);
2072
2073                 s = entry->start;
2074                 e = entry->end;
2075                 next = entry->next;
2076
2077                 offidxstart = OFF_TO_IDX(entry->offset);
2078                 count = OFF_TO_IDX(e - s);
2079                 object = entry->object.vm_object;
2080
2081                 /*
2082                  * Unwire before removing addresses from the pmap; otherwise,
2083                  * unwiring will put the entries back in the pmap.
2084                  */
2085                 if (entry->wired_count != 0) {
2086                         vm_map_entry_unwire(map, entry);
2087                 }
2088
2089                 offidxend = offidxstart + count;
2090
2091                 if ((object == kernel_object) || (object == kmem_object)) {
2092                         vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2093                 } else {
2094                         pmap_remove(map->pmap, s, e);
2095                         if (object != NULL &&
2096                             object->ref_count != 1 &&
2097                             (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
2098                             (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2099                                 vm_object_collapse(object);
2100                                 vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2101                                 if (object->type == OBJT_SWAP) {
2102                                         swap_pager_freespace(object, offidxstart, count);
2103                                 }
2104                                 if (offidxend >= object->size &&
2105                                     offidxstart < object->size) {
2106                                         object->size = offidxstart;
2107                                 }
2108                         }
2109                 }
2110
2111                 /*
2112                  * Delete the entry (which may delete the object) only after
2113                  * removing all pmap entries pointing to its pages.
2114                  * (Otherwise, its page frames may be reallocated, and any
2115                  * modify bits will be set in the wrong object!)
2116                  */
2117                 vm_map_entry_delete(map, entry);
2118                 entry = next;
2119         }
2120         return (KERN_SUCCESS);
2121 }
2122
2123 /*
2124  *      vm_map_remove:
2125  *
2126  *      Remove the given address range from the target map.
2127  *      This is the exported form of vm_map_delete.
2128  */
2129 int
2130 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2131 {
2132         int result, s = 0;
2133
2134         GIANT_REQUIRED;
2135
2136         if (map == kmem_map)
2137                 s = splvm();
2138
2139         vm_map_lock(map);
2140         VM_MAP_RANGE_CHECK(map, start, end);
2141         result = vm_map_delete(map, start, end);
2142         vm_map_unlock(map);
2143
2144         if (map == kmem_map)
2145                 splx(s);
2146
2147         return (result);
2148 }
2149
2150 /*
2151  *      vm_map_check_protection:
2152  *
2153  *      Assert that the target map allows the specified
2154  *      privilege on the entire address region given.
2155  *      The entire region must be allocated.
2156  */
2157 boolean_t
2158 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2159                         vm_prot_t protection)
2160 {
2161         vm_map_entry_t entry;
2162         vm_map_entry_t tmp_entry;
2163
2164         GIANT_REQUIRED;
2165
2166         vm_map_lock_read(map);
2167         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2168                 vm_map_unlock_read(map);
2169                 return (FALSE);
2170         }
2171         entry = tmp_entry;
2172
2173         while (start < end) {
2174                 if (entry == &map->header) {
2175                         vm_map_unlock_read(map);
2176                         return (FALSE);
2177                 }
2178                 /*
2179                  * No holes allowed!
2180                  */
2181                 if (start < entry->start) {
2182                         vm_map_unlock_read(map);
2183                         return (FALSE);
2184                 }
2185                 /*
2186                  * Check protection associated with entry.
2187                  */
2188                 if ((entry->protection & protection) != protection) {
2189                         vm_map_unlock_read(map);
2190                         return (FALSE);
2191                 }
2192                 /* go to next entry */
2193                 start = entry->end;
2194                 entry = entry->next;
2195         }
2196         vm_map_unlock_read(map);
2197         return (TRUE);
2198 }
2199
2200 /*
2201  * Split the pages in a map entry into a new object.  This affords
2202  * easier removal of unused pages, and keeps object inheritance from
2203  * being a negative impact on memory usage.
2204  */
2205 static void
2206 vm_map_split(vm_map_entry_t entry)
2207 {
2208         vm_page_t m;
2209         vm_object_t orig_object, new_object, source;
2210         vm_offset_t s, e;
2211         vm_pindex_t offidxstart, offidxend, idx;
2212         vm_size_t size;
2213         vm_ooffset_t offset;
2214
2215         GIANT_REQUIRED;
2216
2217         orig_object = entry->object.vm_object;
2218         if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
2219                 return;
2220         if (orig_object->ref_count <= 1)
2221                 return;
2222
2223         offset = entry->offset;
2224         s = entry->start;
2225         e = entry->end;
2226
2227         offidxstart = OFF_TO_IDX(offset);
2228         offidxend = offidxstart + OFF_TO_IDX(e - s);
2229         size = offidxend - offidxstart;
2230
2231         new_object = vm_pager_allocate(orig_object->type,
2232                 NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
2233         if (new_object == NULL)
2234                 return;
2235
2236         source = orig_object->backing_object;
2237         if (source != NULL) {
2238                 vm_object_reference(source);    /* Referenced by new_object */
2239                 TAILQ_INSERT_TAIL(&source->shadow_head,
2240                                   new_object, shadow_list);
2241                 vm_object_clear_flag(source, OBJ_ONEMAPPING);
2242                 new_object->backing_object_offset = 
2243                         orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
2244                 new_object->backing_object = source;
2245                 source->shadow_count++;
2246                 source->generation++;
2247         }
2248
2249         for (idx = 0; idx < size; idx++) {
2250                 vm_page_t m;
2251
2252         retry:
2253                 m = vm_page_lookup(orig_object, offidxstart + idx);
2254                 if (m == NULL)
2255                         continue;
2256
2257                 /*
2258                  * We must wait for pending I/O to complete before we can
2259                  * rename the page.
2260                  *
2261                  * We do not have to VM_PROT_NONE the page as mappings should
2262                  * not be changed by this operation.
2263                  */
2264                 if (vm_page_sleep_busy(m, TRUE, "spltwt"))
2265                         goto retry;
2266                         
2267                 vm_page_busy(m);
2268                 vm_page_rename(m, new_object, idx);
2269                 /* page automatically made dirty by rename and cache handled */
2270                 vm_page_busy(m);
2271         }
2272
2273         if (orig_object->type == OBJT_SWAP) {
2274                 vm_object_pip_add(orig_object, 1);
2275                 /*
2276                  * copy orig_object pages into new_object
2277                  * and destroy unneeded pages in
2278                  * shadow object.
2279                  */
2280                 swap_pager_copy(orig_object, new_object, offidxstart, 0);
2281                 vm_object_pip_wakeup(orig_object);
2282         }
2283
2284         for (idx = 0; idx < size; idx++) {
2285                 m = vm_page_lookup(new_object, idx);
2286                 if (m) {
2287                         vm_page_wakeup(m);
2288                 }
2289         }
2290
2291         entry->object.vm_object = new_object;
2292         entry->offset = 0LL;
2293         vm_object_deallocate(orig_object);
2294 }
2295
2296 /*
2297  *      vm_map_copy_entry:
2298  *
2299  *      Copies the contents of the source entry to the destination
2300  *      entry.  The entries *must* be aligned properly.
2301  */
2302 static void
2303 vm_map_copy_entry(
2304         vm_map_t src_map,
2305         vm_map_t dst_map,
2306         vm_map_entry_t src_entry, 
2307         vm_map_entry_t dst_entry)
2308 {
2309         vm_object_t src_object;
2310
2311         if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2312                 return;
2313
2314         if (src_entry->wired_count == 0) {
2315
2316                 /*
2317                  * If the source entry is marked needs_copy, it is already
2318                  * write-protected.
2319                  */
2320                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2321                         pmap_protect(src_map->pmap,
2322                             src_entry->start,
2323                             src_entry->end,
2324                             src_entry->protection & ~VM_PROT_WRITE);
2325                 }
2326
2327                 /*
2328                  * Make a copy of the object.
2329                  */
2330                 if ((src_object = src_entry->object.vm_object) != NULL) {
2331
2332                         if ((src_object->handle == NULL) &&
2333                                 (src_object->type == OBJT_DEFAULT ||
2334                                  src_object->type == OBJT_SWAP)) {
2335                                 vm_object_collapse(src_object);
2336                                 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2337                                         vm_map_split(src_entry);
2338                                         src_object = src_entry->object.vm_object;
2339                                 }
2340                         }
2341
2342                         vm_object_reference(src_object);
2343                         vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2344                         dst_entry->object.vm_object = src_object;
2345                         src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2346                         dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2347                         dst_entry->offset = src_entry->offset;
2348                 } else {
2349                         dst_entry->object.vm_object = NULL;
2350                         dst_entry->offset = 0;
2351                 }
2352
2353                 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2354                     dst_entry->end - dst_entry->start, src_entry->start);
2355         } else {
2356                 /*
2357                  * Of course, wired down pages can't be set copy-on-write.
2358                  * Cause wired pages to be copied into the new map by
2359                  * simulating faults (the new pages are pageable)
2360                  */
2361                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2362         }
2363 }
2364
2365 /*
2366  * vmspace_fork:
2367  * Create a new process vmspace structure and vm_map
2368  * based on those of an existing process.  The new map
2369  * is based on the old map, according to the inheritance
2370  * values on the regions in that map.
2371  *
2372  * The source map must not be locked.
2373  */
2374 struct vmspace *
2375 vmspace_fork(struct vmspace *vm1)
2376 {
2377         struct vmspace *vm2;
2378         vm_map_t old_map = &vm1->vm_map;
2379         vm_map_t new_map;
2380         vm_map_entry_t old_entry;
2381         vm_map_entry_t new_entry;
2382         vm_object_t object;
2383
2384         GIANT_REQUIRED;
2385
2386         vm_map_lock(old_map);
2387         old_map->infork = 1;
2388
2389         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2390         bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2391             (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
2392         new_map = &vm2->vm_map; /* XXX */
2393         new_map->timestamp = 1;
2394
2395         old_entry = old_map->header.next;
2396
2397         while (old_entry != &old_map->header) {
2398                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2399                         panic("vm_map_fork: encountered a submap");
2400
2401                 switch (old_entry->inheritance) {
2402                 case VM_INHERIT_NONE:
2403                         break;
2404
2405                 case VM_INHERIT_SHARE:
2406                         /*
2407                          * Clone the entry, creating the shared object if necessary.
2408                          */
2409                         object = old_entry->object.vm_object;
2410                         if (object == NULL) {
2411                                 object = vm_object_allocate(OBJT_DEFAULT,
2412                                         atop(old_entry->end - old_entry->start));
2413                                 old_entry->object.vm_object = object;
2414                                 old_entry->offset = (vm_offset_t) 0;
2415                         }
2416
2417                         /*
2418                          * Add the reference before calling vm_object_shadow
2419                          * to insure that a shadow object is created.
2420                          */
2421                         vm_object_reference(object);
2422                         if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2423                                 vm_object_shadow(&old_entry->object.vm_object,
2424                                         &old_entry->offset,
2425                                         atop(old_entry->end - old_entry->start));
2426                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2427                                 /* Transfer the second reference too. */
2428                                 vm_object_reference(
2429                                     old_entry->object.vm_object);
2430                                 vm_object_deallocate(object);
2431                                 object = old_entry->object.vm_object;
2432                         }
2433                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
2434
2435                         /*
2436                          * Clone the entry, referencing the shared object.
2437                          */
2438                         new_entry = vm_map_entry_create(new_map);
2439                         *new_entry = *old_entry;
2440                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2441                         new_entry->wired_count = 0;
2442
2443                         /*
2444                          * Insert the entry into the new map -- we know we're
2445                          * inserting at the end of the new map.
2446                          */
2447                         vm_map_entry_link(new_map, new_map->header.prev,
2448                             new_entry);
2449
2450                         /*
2451                          * Update the physical map
2452                          */
2453                         pmap_copy(new_map->pmap, old_map->pmap,
2454                             new_entry->start,
2455                             (old_entry->end - old_entry->start),
2456                             old_entry->start);
2457                         break;
2458
2459                 case VM_INHERIT_COPY:
2460                         /*
2461                          * Clone the entry and link into the map.
2462                          */
2463                         new_entry = vm_map_entry_create(new_map);
2464                         *new_entry = *old_entry;
2465                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2466                         new_entry->wired_count = 0;
2467                         new_entry->object.vm_object = NULL;
2468                         vm_map_entry_link(new_map, new_map->header.prev,
2469                             new_entry);
2470                         vm_map_copy_entry(old_map, new_map, old_entry,
2471                             new_entry);
2472                         break;
2473                 }
2474                 old_entry = old_entry->next;
2475         }
2476
2477         new_map->size = old_map->size;
2478         old_map->infork = 0;
2479         vm_map_unlock(old_map);
2480
2481         return (vm2);
2482 }
2483
2484 int
2485 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2486               vm_prot_t prot, vm_prot_t max, int cow)
2487 {
2488         vm_map_entry_t prev_entry;
2489         vm_map_entry_t new_stack_entry;
2490         vm_size_t      init_ssize;
2491         int            rv;
2492
2493         GIANT_REQUIRED;
2494
2495         if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
2496                 return (KERN_NO_SPACE);
2497
2498         if (max_ssize < sgrowsiz)
2499                 init_ssize = max_ssize;
2500         else
2501                 init_ssize = sgrowsiz;
2502
2503         vm_map_lock(map);
2504
2505         /* If addr is already mapped, no go */
2506         if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2507                 vm_map_unlock(map);
2508                 return (KERN_NO_SPACE);
2509         }
2510
2511         /* If we can't accomodate max_ssize in the current mapping,
2512          * no go.  However, we need to be aware that subsequent user
2513          * mappings might map into the space we have reserved for
2514          * stack, and currently this space is not protected.  
2515          * 
2516          * Hopefully we will at least detect this condition 
2517          * when we try to grow the stack.
2518          */
2519         if ((prev_entry->next != &map->header) &&
2520             (prev_entry->next->start < addrbos + max_ssize)) {
2521                 vm_map_unlock(map);
2522                 return (KERN_NO_SPACE);
2523         }
2524
2525         /* We initially map a stack of only init_ssize.  We will
2526          * grow as needed later.  Since this is to be a grow 
2527          * down stack, we map at the top of the range.
2528          *
2529          * Note: we would normally expect prot and max to be
2530          * VM_PROT_ALL, and cow to be 0.  Possibly we should
2531          * eliminate these as input parameters, and just
2532          * pass these values here in the insert call.
2533          */
2534         rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2535                            addrbos + max_ssize, prot, max, cow);
2536
2537         /* Now set the avail_ssize amount */
2538         if (rv == KERN_SUCCESS){
2539                 if (prev_entry != &map->header)
2540                         vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2541                 new_stack_entry = prev_entry->next;
2542                 if (new_stack_entry->end   != addrbos + max_ssize ||
2543                     new_stack_entry->start != addrbos + max_ssize - init_ssize)
2544                         panic ("Bad entry start/end for new stack entry");
2545                 else 
2546                         new_stack_entry->avail_ssize = max_ssize - init_ssize;
2547         }
2548
2549         vm_map_unlock(map);
2550         return (rv);
2551 }
2552
2553 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2554  * desired address is already mapped, or if we successfully grow
2555  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2556  * stack range (this is strange, but preserves compatibility with
2557  * the grow function in vm_machdep.c).
2558  */
2559 int
2560 vm_map_growstack (struct proc *p, vm_offset_t addr)
2561 {
2562         vm_map_entry_t prev_entry;
2563         vm_map_entry_t stack_entry;
2564         vm_map_entry_t new_stack_entry;
2565         struct vmspace *vm = p->p_vmspace;
2566         vm_map_t map = &vm->vm_map;
2567         vm_offset_t    end;
2568         int      grow_amount;
2569         int      rv;
2570         int      is_procstack;
2571
2572         GIANT_REQUIRED;
2573         
2574 Retry:
2575         vm_map_lock_read(map);
2576
2577         /* If addr is already in the entry range, no need to grow.*/
2578         if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2579                 vm_map_unlock_read(map);
2580                 return (KERN_SUCCESS);
2581         }
2582
2583         if ((stack_entry = prev_entry->next) == &map->header) {
2584                 vm_map_unlock_read(map);
2585                 return (KERN_SUCCESS);
2586         } 
2587         if (prev_entry == &map->header) 
2588                 end = stack_entry->start - stack_entry->avail_ssize;
2589         else
2590                 end = prev_entry->end;
2591
2592         /* This next test mimics the old grow function in vm_machdep.c.
2593          * It really doesn't quite make sense, but we do it anyway
2594          * for compatibility.
2595          *
2596          * If not growable stack, return success.  This signals the
2597          * caller to proceed as he would normally with normal vm.
2598          */
2599         if (stack_entry->avail_ssize < 1 ||
2600             addr >= stack_entry->start ||
2601             addr <  stack_entry->start - stack_entry->avail_ssize) {
2602                 vm_map_unlock_read(map);
2603                 return (KERN_SUCCESS);
2604         } 
2605         
2606         /* Find the minimum grow amount */
2607         grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2608         if (grow_amount > stack_entry->avail_ssize) {
2609                 vm_map_unlock_read(map);
2610                 return (KERN_NO_SPACE);
2611         }
2612
2613         /* If there is no longer enough space between the entries
2614          * nogo, and adjust the available space.  Note: this 
2615          * should only happen if the user has mapped into the
2616          * stack area after the stack was created, and is
2617          * probably an error.
2618          *
2619          * This also effectively destroys any guard page the user
2620          * might have intended by limiting the stack size.
2621          */
2622         if (grow_amount > stack_entry->start - end) {
2623                 if (vm_map_lock_upgrade(map))
2624                         goto Retry;
2625
2626                 stack_entry->avail_ssize = stack_entry->start - end;
2627
2628                 vm_map_unlock(map);
2629                 return (KERN_NO_SPACE);
2630         }
2631
2632         is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2633
2634         /* If this is the main process stack, see if we're over the 
2635          * stack limit.
2636          */
2637         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2638                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2639                 vm_map_unlock_read(map);
2640                 return (KERN_NO_SPACE);
2641         }
2642
2643         /* Round up the grow amount modulo SGROWSIZ */
2644         grow_amount = roundup (grow_amount, sgrowsiz);
2645         if (grow_amount > stack_entry->avail_ssize) {
2646                 grow_amount = stack_entry->avail_ssize;
2647         }
2648         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2649                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2650                 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2651                               ctob(vm->vm_ssize);
2652         }
2653
2654         if (vm_map_lock_upgrade(map))
2655                 goto Retry;
2656
2657         /* Get the preliminary new entry start value */
2658         addr = stack_entry->start - grow_amount;
2659
2660         /* If this puts us into the previous entry, cut back our growth
2661          * to the available space.  Also, see the note above.
2662          */
2663         if (addr < end) {
2664                 stack_entry->avail_ssize = stack_entry->start - end;
2665                 addr = end;
2666         }
2667
2668         rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2669                            VM_PROT_ALL,
2670                            VM_PROT_ALL,
2671                            0);
2672
2673         /* Adjust the available stack space by the amount we grew. */
2674         if (rv == KERN_SUCCESS) {
2675                 if (prev_entry != &map->header)
2676                         vm_map_clip_end(map, prev_entry, addr);
2677                 new_stack_entry = prev_entry->next;
2678                 if (new_stack_entry->end   != stack_entry->start  ||
2679                     new_stack_entry->start != addr)
2680                         panic ("Bad stack grow start/end in new stack entry");
2681                 else {
2682                         new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2683                                                         (new_stack_entry->end -
2684                                                          new_stack_entry->start);
2685                         if (is_procstack)
2686                                 vm->vm_ssize += btoc(new_stack_entry->end -
2687                                                      new_stack_entry->start);
2688                 }
2689         }
2690
2691         vm_map_unlock(map);
2692         return (rv);
2693 }
2694
2695 /*
2696  * Unshare the specified VM space for exec.  If other processes are
2697  * mapped to it, then create a new one.  The new vmspace is null.
2698  */
2699 void
2700 vmspace_exec(struct proc *p) 
2701 {
2702         struct vmspace *oldvmspace = p->p_vmspace;
2703         struct vmspace *newvmspace;
2704         vm_map_t map = &p->p_vmspace->vm_map;
2705
2706         GIANT_REQUIRED;
2707         newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
2708         bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2709             (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2710         /*
2711          * This code is written like this for prototype purposes.  The
2712          * goal is to avoid running down the vmspace here, but let the
2713          * other process's that are still using the vmspace to finally
2714          * run it down.  Even though there is little or no chance of blocking
2715          * here, it is a good idea to keep this form for future mods.
2716          */
2717         p->p_vmspace = newvmspace;
2718         pmap_pinit2(vmspace_pmap(newvmspace));
2719         vmspace_free(oldvmspace);
2720         if (p == curthread->td_proc)            /* XXXKSE ? */
2721                 pmap_activate(curthread);
2722 }
2723
2724 /*
2725  * Unshare the specified VM space for forcing COW.  This
2726  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2727  */
2728 void
2729 vmspace_unshare(struct proc *p)
2730 {
2731         struct vmspace *oldvmspace = p->p_vmspace;
2732         struct vmspace *newvmspace;
2733
2734         GIANT_REQUIRED;
2735         if (oldvmspace->vm_refcnt == 1)
2736                 return;
2737         newvmspace = vmspace_fork(oldvmspace);
2738         p->p_vmspace = newvmspace;
2739         pmap_pinit2(vmspace_pmap(newvmspace));
2740         vmspace_free(oldvmspace);
2741         if (p == curthread->td_proc)            /* XXXKSE ? */
2742                 pmap_activate(curthread);
2743 }
2744
2745 /*
2746  *      vm_map_lookup:
2747  *
2748  *      Finds the VM object, offset, and
2749  *      protection for a given virtual address in the
2750  *      specified map, assuming a page fault of the
2751  *      type specified.
2752  *
2753  *      Leaves the map in question locked for read; return
2754  *      values are guaranteed until a vm_map_lookup_done
2755  *      call is performed.  Note that the map argument
2756  *      is in/out; the returned map must be used in
2757  *      the call to vm_map_lookup_done.
2758  *
2759  *      A handle (out_entry) is returned for use in
2760  *      vm_map_lookup_done, to make that fast.
2761  *
2762  *      If a lookup is requested with "write protection"
2763  *      specified, the map may be changed to perform virtual
2764  *      copying operations, although the data referenced will
2765  *      remain the same.
2766  */
2767 int
2768 vm_map_lookup(vm_map_t *var_map,                /* IN/OUT */
2769               vm_offset_t vaddr,
2770               vm_prot_t fault_typea,
2771               vm_map_entry_t *out_entry,        /* OUT */
2772               vm_object_t *object,              /* OUT */
2773               vm_pindex_t *pindex,              /* OUT */
2774               vm_prot_t *out_prot,              /* OUT */
2775               boolean_t *wired)                 /* OUT */
2776 {
2777         vm_map_entry_t entry;
2778         vm_map_t map = *var_map;
2779         vm_prot_t prot;
2780         vm_prot_t fault_type = fault_typea;
2781
2782         GIANT_REQUIRED;
2783 RetryLookup:;
2784         /*
2785          * Lookup the faulting address.
2786          */
2787
2788         vm_map_lock_read(map);
2789 #define RETURN(why) \
2790                 { \
2791                 vm_map_unlock_read(map); \
2792                 return (why); \
2793                 }
2794
2795         /*
2796          * If the map has an interesting hint, try it before calling full
2797          * blown lookup routine.
2798          */
2799         entry = map->hint;
2800         *out_entry = entry;
2801         if ((entry == &map->header) ||
2802             (vaddr < entry->start) || (vaddr >= entry->end)) {
2803                 vm_map_entry_t tmp_entry;
2804
2805                 /*
2806                  * Entry was either not a valid hint, or the vaddr was not
2807                  * contained in the entry, so do a full lookup.
2808                  */
2809                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2810                         RETURN(KERN_INVALID_ADDRESS);
2811
2812                 entry = tmp_entry;
2813                 *out_entry = entry;
2814         }
2815         
2816         /*
2817          * Handle submaps.
2818          */
2819         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2820                 vm_map_t old_map = map;
2821
2822                 *var_map = map = entry->object.sub_map;
2823                 vm_map_unlock_read(old_map);
2824                 goto RetryLookup;
2825         }
2826
2827         /*
2828          * Check whether this task is allowed to have this page.
2829          * Note the special case for MAP_ENTRY_COW
2830          * pages with an override.  This is to implement a forced
2831          * COW for debuggers.
2832          */
2833         if (fault_type & VM_PROT_OVERRIDE_WRITE)
2834                 prot = entry->max_protection;
2835         else
2836                 prot = entry->protection;
2837         fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2838         if ((fault_type & prot) != fault_type) {
2839                         RETURN(KERN_PROTECTION_FAILURE);
2840         }
2841         if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2842             (entry->eflags & MAP_ENTRY_COW) &&
2843             (fault_type & VM_PROT_WRITE) &&
2844             (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2845                 RETURN(KERN_PROTECTION_FAILURE);
2846         }
2847
2848         /*
2849          * If this page is not pageable, we have to get it for all possible
2850          * accesses.
2851          */
2852         *wired = (entry->wired_count != 0);
2853         if (*wired)
2854                 prot = fault_type = entry->protection;
2855
2856         /*
2857          * If the entry was copy-on-write, we either ...
2858          */
2859         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2860                 /*
2861                  * If we want to write the page, we may as well handle that
2862                  * now since we've got the map locked.
2863                  *
2864                  * If we don't need to write the page, we just demote the
2865                  * permissions allowed.
2866                  */
2867                 if (fault_type & VM_PROT_WRITE) {
2868                         /*
2869                          * Make a new object, and place it in the object
2870                          * chain.  Note that no new references have appeared
2871                          * -- one just moved from the map to the new
2872                          * object.
2873                          */
2874                         if (vm_map_lock_upgrade(map))
2875                                 goto RetryLookup;
2876                         vm_object_shadow(
2877                             &entry->object.vm_object,
2878                             &entry->offset,
2879                             atop(entry->end - entry->start));
2880                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2881                         vm_map_lock_downgrade(map);
2882                 } else {
2883                         /*
2884                          * We're attempting to read a copy-on-write page --
2885                          * don't allow writes.
2886                          */
2887                         prot &= ~VM_PROT_WRITE;
2888                 }
2889         }
2890
2891         /*
2892          * Create an object if necessary.
2893          */
2894         if (entry->object.vm_object == NULL &&
2895             !map->system_map) {
2896                 if (vm_map_lock_upgrade(map)) 
2897                         goto RetryLookup;
2898                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2899                     atop(entry->end - entry->start));
2900                 entry->offset = 0;
2901                 vm_map_lock_downgrade(map);
2902         }
2903
2904         /*
2905          * Return the object/offset from this entry.  If the entry was
2906          * copy-on-write or empty, it has been fixed up.
2907          */
2908         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2909         *object = entry->object.vm_object;
2910
2911         /*
2912          * Return whether this is the only map sharing this data.
2913          */
2914         *out_prot = prot;
2915         return (KERN_SUCCESS);
2916
2917 #undef  RETURN
2918 }
2919
2920 /*
2921  *      vm_map_lookup_done:
2922  *
2923  *      Releases locks acquired by a vm_map_lookup
2924  *      (according to the handle returned by that lookup).
2925  */
2926 void
2927 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
2928 {
2929         /*
2930          * Unlock the main-level map
2931          */
2932         GIANT_REQUIRED;
2933         vm_map_unlock_read(map);
2934 }
2935
2936 /*
2937  * Implement uiomove with VM operations.  This handles (and collateral changes)
2938  * support every combination of source object modification, and COW type
2939  * operations.
2940  */
2941 int
2942 vm_uiomove(
2943         vm_map_t mapa,
2944         vm_object_t srcobject,
2945         off_t cp,
2946         int cnta,
2947         vm_offset_t uaddra,
2948         int *npages)
2949 {
2950         vm_map_t map;
2951         vm_object_t first_object, oldobject, object;
2952         vm_map_entry_t entry;
2953         vm_prot_t prot;
2954         boolean_t wired;
2955         int tcnt, rv;
2956         vm_offset_t uaddr, start, end, tend;
2957         vm_pindex_t first_pindex, osize, oindex;
2958         off_t ooffset;
2959         int cnt;
2960
2961         GIANT_REQUIRED;
2962
2963         if (npages)
2964                 *npages = 0;
2965
2966         cnt = cnta;
2967         uaddr = uaddra;
2968
2969         while (cnt > 0) {
2970                 map = mapa;
2971
2972                 if ((vm_map_lookup(&map, uaddr,
2973                         VM_PROT_READ, &entry, &first_object,
2974                         &first_pindex, &prot, &wired)) != KERN_SUCCESS) {
2975                         return EFAULT;
2976                 }
2977
2978                 vm_map_clip_start(map, entry, uaddr);
2979
2980                 tcnt = cnt;
2981                 tend = uaddr + tcnt;
2982                 if (tend > entry->end) {
2983                         tcnt = entry->end - uaddr;
2984                         tend = entry->end;
2985                 }
2986
2987                 vm_map_clip_end(map, entry, tend);
2988
2989                 start = entry->start;
2990                 end = entry->end;
2991
2992                 osize = atop(tcnt);
2993
2994                 oindex = OFF_TO_IDX(cp);
2995                 if (npages) {
2996                         vm_pindex_t idx;
2997                         for (idx = 0; idx < osize; idx++) {
2998                                 vm_page_t m;
2999                                 if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
3000                                         vm_map_lookup_done(map, entry);
3001                                         return 0;
3002                                 }
3003                                 /*
3004                                  * disallow busy or invalid pages, but allow
3005                                  * m->busy pages if they are entirely valid.
3006                                  */
3007                                 if ((m->flags & PG_BUSY) ||
3008                                         ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
3009                                         vm_map_lookup_done(map, entry);
3010                                         return 0;
3011                                 }
3012                         }
3013                 }
3014
3015 /*
3016  * If we are changing an existing map entry, just redirect
3017  * the object, and change mappings.
3018  */
3019                 if ((first_object->type == OBJT_VNODE) &&
3020                         ((oldobject = entry->object.vm_object) == first_object)) {
3021
3022                         if ((entry->offset != cp) || (oldobject != srcobject)) {
3023                                 /*
3024                                 * Remove old window into the file
3025                                 */
3026                                 pmap_remove (map->pmap, uaddr, tend);
3027
3028                                 /*
3029                                 * Force copy on write for mmaped regions
3030                                 */
3031                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3032
3033                                 /*
3034                                 * Point the object appropriately
3035                                 */
3036                                 if (oldobject != srcobject) {
3037
3038                                 /*
3039                                 * Set the object optimization hint flag
3040                                 */
3041                                         vm_object_set_flag(srcobject, OBJ_OPT);
3042                                         vm_object_reference(srcobject);
3043                                         entry->object.vm_object = srcobject;
3044
3045                                         if (oldobject) {
3046                                                 vm_object_deallocate(oldobject);
3047                                         }
3048                                 }
3049
3050                                 entry->offset = cp;
3051                                 map->timestamp++;
3052                         } else {
3053                                 pmap_remove (map->pmap, uaddr, tend);
3054                         }
3055
3056                 } else if ((first_object->ref_count == 1) &&
3057                         (first_object->size == osize) &&
3058                         ((first_object->type == OBJT_DEFAULT) ||
3059                                 (first_object->type == OBJT_SWAP)) ) {
3060
3061                         oldobject = first_object->backing_object;
3062
3063                         if ((first_object->backing_object_offset != cp) ||
3064                                 (oldobject != srcobject)) {
3065                                 /*
3066                                 * Remove old window into the file
3067                                 */
3068                                 pmap_remove (map->pmap, uaddr, tend);
3069
3070                                 /*
3071                                  * Remove unneeded old pages
3072                                  */
3073                                 vm_object_page_remove(first_object, 0, 0, 0);
3074
3075                                 /*
3076                                  * Invalidate swap space
3077                                  */
3078                                 if (first_object->type == OBJT_SWAP) {
3079                                         swap_pager_freespace(first_object,
3080                                                 0,
3081                                                 first_object->size);
3082                                 }
3083
3084                                 /*
3085                                  * Force copy on write for mmaped regions
3086                                  */
3087                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3088
3089                                 /*
3090                                  * Point the object appropriately
3091                                  */
3092                                 if (oldobject != srcobject) {
3093                                         /*
3094                                          * Set the object optimization hint flag
3095                                          */
3096                                         vm_object_set_flag(srcobject, OBJ_OPT);
3097                                         vm_object_reference(srcobject);
3098
3099                                         if (oldobject) {
3100                                                 TAILQ_REMOVE(&oldobject->shadow_head,
3101                                                         first_object, shadow_list);
3102                                                 oldobject->shadow_count--;
3103                                                 /* XXX bump generation? */
3104                                                 vm_object_deallocate(oldobject);
3105                                         }
3106
3107                                         TAILQ_INSERT_TAIL(&srcobject->shadow_head,
3108                                                 first_object, shadow_list);
3109                                         srcobject->shadow_count++;
3110                                         /* XXX bump generation? */
3111
3112                                         first_object->backing_object = srcobject;
3113                                 }
3114                                 first_object->backing_object_offset = cp;
3115                                 map->timestamp++;
3116                         } else {
3117                                 pmap_remove (map->pmap, uaddr, tend);
3118                         }
3119 /*
3120  * Otherwise, we have to do a logical mmap.
3121  */
3122                 } else {
3123
3124                         vm_object_set_flag(srcobject, OBJ_OPT);
3125                         vm_object_reference(srcobject);
3126
3127                         pmap_remove (map->pmap, uaddr, tend);
3128
3129                         vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3130                         vm_map_lock_upgrade(map);
3131
3132                         if (entry == &map->header) {
3133                                 map->first_free = &map->header;
3134                         } else if (map->first_free->start >= start) {
3135                                 map->first_free = entry->prev;
3136                         }
3137
3138                         SAVE_HINT(map, entry->prev);
3139                         vm_map_entry_delete(map, entry);
3140
3141                         object = srcobject;
3142                         ooffset = cp;
3143
3144                         rv = vm_map_insert(map, object, ooffset, start, tend,
3145                                 VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
3146
3147                         if (rv != KERN_SUCCESS)
3148                                 panic("vm_uiomove: could not insert new entry: %d", rv);
3149                 }
3150
3151 /*
3152  * Map the window directly, if it is already in memory
3153  */
3154                 pmap_object_init_pt(map->pmap, uaddr,
3155                         srcobject, oindex, tcnt, 0);
3156
3157                 map->timestamp++;
3158                 vm_map_unlock(map);
3159
3160                 cnt -= tcnt;
3161                 uaddr += tcnt;
3162                 cp += tcnt;
3163                 if (npages)
3164                         *npages += osize;
3165         }
3166         return 0;
3167 }
3168
3169 /*
3170  * Performs the copy_on_write operations necessary to allow the virtual copies
3171  * into user space to work.  This has to be called for write(2) system calls
3172  * from other processes, file unlinking, and file size shrinkage.
3173  */
3174 void
3175 vm_freeze_copyopts(vm_object_t object, vm_pindex_t froma, vm_pindex_t toa)
3176 {
3177         int rv;
3178         vm_object_t robject;
3179         vm_pindex_t idx;
3180
3181         GIANT_REQUIRED;
3182         if ((object == NULL) ||
3183                 ((object->flags & OBJ_OPT) == 0))
3184                 return;
3185
3186         if (object->shadow_count > object->ref_count)
3187                 panic("vm_freeze_copyopts: sc > rc");
3188
3189         while ((robject = TAILQ_FIRST(&object->shadow_head)) != NULL) {
3190                 vm_pindex_t bo_pindex;
3191                 vm_page_t m_in, m_out;
3192
3193                 bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
3194
3195                 vm_object_reference(robject);
3196
3197                 vm_object_pip_wait(robject, "objfrz");
3198
3199                 if (robject->ref_count == 1) {
3200                         vm_object_deallocate(robject);
3201                         continue;
3202                 }
3203
3204                 vm_object_pip_add(robject, 1);
3205
3206                 for (idx = 0; idx < robject->size; idx++) {
3207
3208                         m_out = vm_page_grab(robject, idx,
3209                                                 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3210
3211                         if (m_out->valid == 0) {
3212                                 m_in = vm_page_grab(object, bo_pindex + idx,
3213                                                 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3214                                 if (m_in->valid == 0) {
3215                                         rv = vm_pager_get_pages(object, &m_in, 1, 0);
3216                                         if (rv != VM_PAGER_OK) {
3217                                                 printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
3218                                                 continue;
3219                                         }
3220                                         vm_page_deactivate(m_in);
3221                                 }
3222
3223                                 vm_page_protect(m_in, VM_PROT_NONE);
3224                                 pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out));
3225                                 m_out->valid = m_in->valid;
3226                                 vm_page_dirty(m_out);
3227                                 vm_page_activate(m_out);
3228                                 vm_page_wakeup(m_in);
3229                         }
3230                         vm_page_wakeup(m_out);
3231                 }
3232
3233                 object->shadow_count--;
3234                 object->ref_count--;
3235                 TAILQ_REMOVE(&object->shadow_head, robject, shadow_list);
3236                 robject->backing_object = NULL;
3237                 robject->backing_object_offset = 0;
3238
3239                 vm_object_pip_wakeup(robject);
3240                 vm_object_deallocate(robject);
3241         }
3242
3243         vm_object_clear_flag(object, OBJ_OPT);
3244 }
3245
3246 #include "opt_ddb.h"
3247 #ifdef DDB
3248 #include <sys/kernel.h>
3249
3250 #include <ddb/ddb.h>
3251
3252 /*
3253  *      vm_map_print:   [ debug ]
3254  */
3255 DB_SHOW_COMMAND(map, vm_map_print)
3256 {
3257         static int nlines;
3258         /* XXX convert args. */
3259         vm_map_t map = (vm_map_t)addr;
3260         boolean_t full = have_addr;
3261
3262         vm_map_entry_t entry;
3263
3264         db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3265             (void *)map,
3266             (void *)map->pmap, map->nentries, map->timestamp);
3267         nlines++;
3268
3269         if (!full && db_indent)
3270                 return;
3271
3272         db_indent += 2;
3273         for (entry = map->header.next; entry != &map->header;
3274             entry = entry->next) {
3275                 db_iprintf("map entry %p: start=%p, end=%p\n",
3276                     (void *)entry, (void *)entry->start, (void *)entry->end);
3277                 nlines++;
3278                 {
3279                         static char *inheritance_name[4] =
3280                         {"share", "copy", "none", "donate_copy"};
3281
3282                         db_iprintf(" prot=%x/%x/%s",
3283                             entry->protection,
3284                             entry->max_protection,
3285                             inheritance_name[(int)(unsigned char)entry->inheritance]);
3286                         if (entry->wired_count != 0)
3287                                 db_printf(", wired");
3288                 }
3289                 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3290                         /* XXX no %qd in kernel.  Truncate entry->offset. */
3291                         db_printf(", share=%p, offset=0x%lx\n",
3292                             (void *)entry->object.sub_map,
3293                             (long)entry->offset);
3294                         nlines++;
3295                         if ((entry->prev == &map->header) ||
3296                             (entry->prev->object.sub_map !=
3297                                 entry->object.sub_map)) {
3298                                 db_indent += 2;
3299                                 vm_map_print((db_expr_t)(intptr_t)
3300                                              entry->object.sub_map,
3301                                              full, 0, (char *)0);
3302                                 db_indent -= 2;
3303                         }
3304                 } else {
3305                         /* XXX no %qd in kernel.  Truncate entry->offset. */
3306                         db_printf(", object=%p, offset=0x%lx",
3307                             (void *)entry->object.vm_object,
3308                             (long)entry->offset);
3309                         if (entry->eflags & MAP_ENTRY_COW)
3310                                 db_printf(", copy (%s)",
3311                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3312                         db_printf("\n");
3313                         nlines++;
3314
3315                         if ((entry->prev == &map->header) ||
3316                             (entry->prev->object.vm_object !=
3317                                 entry->object.vm_object)) {
3318                                 db_indent += 2;
3319                                 vm_object_print((db_expr_t)(intptr_t)
3320                                                 entry->object.vm_object,
3321                                                 full, 0, (char *)0);
3322                                 nlines += 4;
3323                                 db_indent -= 2;
3324                         }
3325                 }
3326         }
3327         db_indent -= 2;
3328         if (db_indent == 0)
3329                 nlines = 0;
3330 }
3331
3332
3333 DB_SHOW_COMMAND(procvm, procvm)
3334 {
3335         struct proc *p;
3336
3337         if (have_addr) {
3338                 p = (struct proc *) addr;
3339         } else {
3340                 p = curproc;
3341         }
3342
3343         db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3344             (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3345             (void *)vmspace_pmap(p->p_vmspace));
3346
3347         vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3348 }
3349
3350 #endif /* DDB */