]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/vm/vm_object.c
Replace ffind_* with fget calls.
[FreeBSD/FreeBSD.git] / sys / vm / vm_object.c
1 /*
2  * Copyright (c) 1991, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      from: @(#)vm_object.c   8.5 (Berkeley) 3/22/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66
67 /*
68  *      Virtual memory object module.
69  */
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/lock.h>
74 #include <sys/mman.h>
75 #include <sys/mount.h>
76 #include <sys/mutex.h>
77 #include <sys/proc.h>           /* for curproc, pageproc */
78 #include <sys/socket.h>
79 #include <sys/vnode.h>
80 #include <sys/vmmeter.h>
81 #include <sys/sx.h>
82
83 #include <vm/vm.h>
84 #include <vm/vm_param.h>
85 #include <vm/pmap.h>
86 #include <vm/vm_map.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_page.h>
89 #include <vm/vm_pageout.h>
90 #include <vm/vm_pager.h>
91 #include <vm/vm_zone.h>
92 #include <vm/swap_pager.h>
93 #include <vm/vm_kern.h>
94 #include <vm/vm_extern.h>
95
96 static void     vm_object_qcollapse __P((vm_object_t object));
97
98 /*
99  *      Virtual memory objects maintain the actual data
100  *      associated with allocated virtual memory.  A given
101  *      page of memory exists within exactly one object.
102  *
103  *      An object is only deallocated when all "references"
104  *      are given up.  Only one "reference" to a given
105  *      region of an object should be writeable.
106  *
107  *      Associated with each object is a list of all resident
108  *      memory pages belonging to that object; this list is
109  *      maintained by the "vm_page" module, and locked by the object's
110  *      lock.
111  *
112  *      Each object also records a "pager" routine which is
113  *      used to retrieve (and store) pages to the proper backing
114  *      storage.  In addition, objects may be backed by other
115  *      objects from which they were virtual-copied.
116  *
117  *      The only items within the object structure which are
118  *      modified after time of creation are:
119  *              reference count         locked by object's lock
120  *              pager routine           locked by object's lock
121  *
122  */
123
124 struct object_q vm_object_list;
125 static struct mtx vm_object_list_mtx;   /* lock for object list and count */
126 static long vm_object_count;            /* count of all objects */
127 vm_object_t kernel_object;
128 vm_object_t kmem_object;
129 static struct vm_object kernel_object_store;
130 static struct vm_object kmem_object_store;
131 extern int vm_pageout_page_count;
132
133 static long object_collapses;
134 static long object_bypasses;
135 static int next_index;
136 static vm_zone_t obj_zone;
137 static struct vm_zone obj_zone_store;
138 static int object_hash_rand;
139 #define VM_OBJECTS_INIT 256
140 static struct vm_object vm_objects_init[VM_OBJECTS_INIT];
141
142 void
143 _vm_object_allocate(objtype_t type, vm_size_t size, vm_object_t object)
144 {
145         int incr;
146
147         GIANT_REQUIRED;
148
149         TAILQ_INIT(&object->memq);
150         TAILQ_INIT(&object->shadow_head);
151
152         object->type = type;
153         object->size = size;
154         object->ref_count = 1;
155         object->flags = 0;
156         if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
157                 vm_object_set_flag(object, OBJ_ONEMAPPING);
158         object->paging_in_progress = 0;
159         object->resident_page_count = 0;
160         object->shadow_count = 0;
161         object->pg_color = next_index;
162         if ( size > (PQ_L2_SIZE / 3 + PQ_PRIME1))
163                 incr = PQ_L2_SIZE / 3 + PQ_PRIME1;
164         else
165                 incr = size;
166         next_index = (next_index + incr) & PQ_L2_MASK;
167         object->handle = NULL;
168         object->backing_object = NULL;
169         object->backing_object_offset = (vm_ooffset_t) 0;
170         /*
171          * Try to generate a number that will spread objects out in the
172          * hash table.  We 'wipe' new objects across the hash in 128 page
173          * increments plus 1 more to offset it a little more by the time
174          * it wraps around.
175          */
176         object->hash_rand = object_hash_rand - 129;
177
178         object->generation++;
179
180         TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
181         vm_object_count++;
182         object_hash_rand = object->hash_rand;
183 }
184
185 /*
186  *      vm_object_init:
187  *
188  *      Initialize the VM objects module.
189  */
190 void
191 vm_object_init(void)
192 {
193         GIANT_REQUIRED;
194
195         TAILQ_INIT(&vm_object_list);
196         mtx_init(&vm_object_list_mtx, "vm object_list", MTX_DEF);
197         vm_object_count = 0;
198         
199         kernel_object = &kernel_object_store;
200         _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
201             kernel_object);
202
203         kmem_object = &kmem_object_store;
204         _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
205             kmem_object);
206
207         obj_zone = &obj_zone_store;
208         zbootinit(obj_zone, "VM OBJECT", sizeof (struct vm_object),
209                 vm_objects_init, VM_OBJECTS_INIT);
210 }
211
212 void
213 vm_object_init2(void)
214 {
215         zinitna(obj_zone, NULL, NULL, 0, 0, 0, 1);
216 }
217
218 void
219 vm_object_set_flag(vm_object_t object, u_short bits)
220 {
221         GIANT_REQUIRED;
222         object->flags |= bits;
223 }
224
225 void
226 vm_object_clear_flag(vm_object_t object, u_short bits)
227 {
228         GIANT_REQUIRED;
229         object->flags &= ~bits;
230 }
231
232 void
233 vm_object_pip_add(vm_object_t object, short i)
234 {
235         GIANT_REQUIRED;
236         object->paging_in_progress += i;
237 }
238
239 void
240 vm_object_pip_subtract(vm_object_t object, short i)
241 {
242         GIANT_REQUIRED;
243         object->paging_in_progress -= i;
244 }
245
246 void
247 vm_object_pip_wakeup(vm_object_t object)
248 {
249         GIANT_REQUIRED;
250         object->paging_in_progress--;
251         if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
252                 vm_object_clear_flag(object, OBJ_PIPWNT);
253                 wakeup(object);
254         }
255 }
256
257 void
258 vm_object_pip_wakeupn(vm_object_t object, short i)
259 {
260         GIANT_REQUIRED;
261         if (i)
262                 object->paging_in_progress -= i;
263         if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
264                 vm_object_clear_flag(object, OBJ_PIPWNT);
265                 wakeup(object);
266         }
267 }
268
269 void
270 vm_object_pip_sleep(vm_object_t object, char *waitid)
271 {
272         GIANT_REQUIRED;
273         if (object->paging_in_progress) {
274                 int s = splvm();
275                 if (object->paging_in_progress) {
276                         vm_object_set_flag(object, OBJ_PIPWNT);
277                         tsleep(object, PVM, waitid, 0);
278                 }
279                 splx(s);
280         }
281 }
282
283 void
284 vm_object_pip_wait(vm_object_t object, char *waitid)
285 {
286         GIANT_REQUIRED;
287         while (object->paging_in_progress)
288                 vm_object_pip_sleep(object, waitid);
289 }
290
291 /*
292  *      vm_object_allocate:
293  *
294  *      Returns a new object with the given size.
295  */
296
297 vm_object_t
298 vm_object_allocate(objtype_t type, vm_size_t size)
299 {
300         vm_object_t result;
301
302         GIANT_REQUIRED;
303
304         result = (vm_object_t) zalloc(obj_zone);
305         _vm_object_allocate(type, size, result);
306
307         return (result);
308 }
309
310
311 /*
312  *      vm_object_reference:
313  *
314  *      Gets another reference to the given object.
315  */
316 void
317 vm_object_reference(vm_object_t object)
318 {
319         GIANT_REQUIRED;
320
321         if (object == NULL)
322                 return;
323
324 #if 0
325         /* object can be re-referenced during final cleaning */
326         KASSERT(!(object->flags & OBJ_DEAD),
327             ("vm_object_reference: attempting to reference dead obj"));
328 #endif
329
330         object->ref_count++;
331         if (object->type == OBJT_VNODE) {
332                 while (vget((struct vnode *) object->handle, LK_RETRY|LK_NOOBJ, curthread)) {
333                         printf("vm_object_reference: delay in getting object\n");
334                 }
335         }
336 }
337
338 /*
339  * handle deallocating a object of type OBJT_VNODE
340  */
341 void
342 vm_object_vndeallocate(vm_object_t object)
343 {
344         struct vnode *vp = (struct vnode *) object->handle;
345
346         GIANT_REQUIRED;
347         KASSERT(object->type == OBJT_VNODE,
348             ("vm_object_vndeallocate: not a vnode object"));
349         KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
350 #ifdef INVARIANTS
351         if (object->ref_count == 0) {
352                 vprint("vm_object_vndeallocate", vp);
353                 panic("vm_object_vndeallocate: bad object reference count");
354         }
355 #endif
356
357         object->ref_count--;
358         if (object->ref_count == 0) {
359                 vp->v_flag &= ~VTEXT;
360                 vm_object_clear_flag(object, OBJ_OPT);
361         }
362         /*
363          * vrele may need a vop lock
364          */
365         vrele(vp);
366 }
367
368 /*
369  *      vm_object_deallocate:
370  *
371  *      Release a reference to the specified object,
372  *      gained either through a vm_object_allocate
373  *      or a vm_object_reference call.  When all references
374  *      are gone, storage associated with this object
375  *      may be relinquished.
376  *
377  *      No object may be locked.
378  */
379 void
380 vm_object_deallocate(vm_object_t object)
381 {
382         vm_object_t temp;
383
384         GIANT_REQUIRED;
385
386         while (object != NULL) {
387
388                 if (object->type == OBJT_VNODE) {
389                         vm_object_vndeallocate(object);
390                         return;
391                 }
392
393                 KASSERT(object->ref_count != 0,
394                         ("vm_object_deallocate: object deallocated too many times: %d", object->type));
395
396                 /*
397                  * If the reference count goes to 0 we start calling
398                  * vm_object_terminate() on the object chain.
399                  * A ref count of 1 may be a special case depending on the
400                  * shadow count being 0 or 1.
401                  */
402                 object->ref_count--;
403                 if (object->ref_count > 1) {
404                         return;
405                 } else if (object->ref_count == 1) {
406                         if (object->shadow_count == 0) {
407                                 vm_object_set_flag(object, OBJ_ONEMAPPING);
408                         } else if ((object->shadow_count == 1) &&
409                             (object->handle == NULL) &&
410                             (object->type == OBJT_DEFAULT ||
411                              object->type == OBJT_SWAP)) {
412                                 vm_object_t robject;
413
414                                 robject = TAILQ_FIRST(&object->shadow_head);
415                                 KASSERT(robject != NULL,
416                                     ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
417                                          object->ref_count,
418                                          object->shadow_count));
419                                 if ((robject->handle == NULL) &&
420                                     (robject->type == OBJT_DEFAULT ||
421                                      robject->type == OBJT_SWAP)) {
422
423                                         robject->ref_count++;
424
425                                         while (
426                                                 robject->paging_in_progress ||
427                                                 object->paging_in_progress
428                                         ) {
429                                                 vm_object_pip_sleep(robject, "objde1");
430                                                 vm_object_pip_sleep(object, "objde2");
431                                         }
432
433                                         if (robject->ref_count == 1) {
434                                                 robject->ref_count--;
435                                                 object = robject;
436                                                 goto doterm;
437                                         }
438
439                                         object = robject;
440                                         vm_object_collapse(object);
441                                         continue;
442                                 }
443                         }
444
445                         return;
446
447                 }
448
449 doterm:
450
451                 temp = object->backing_object;
452                 if (temp) {
453                         TAILQ_REMOVE(&temp->shadow_head, object, shadow_list);
454                         temp->shadow_count--;
455                         if (temp->ref_count == 0)
456                                 vm_object_clear_flag(temp, OBJ_OPT);
457                         temp->generation++;
458                         object->backing_object = NULL;
459                 }
460                 /*
461                  * Don't double-terminate, we could be in a termination
462                  * recursion due to the terminate having to sync data
463                  * to disk.
464                  */
465                 if ((object->flags & OBJ_DEAD) == 0)
466                         vm_object_terminate(object);
467                 object = temp;
468         }
469 }
470
471 /*
472  *      vm_object_terminate actually destroys the specified object, freeing
473  *      up all previously used resources.
474  *
475  *      The object must be locked.
476  *      This routine may block.
477  */
478 void
479 vm_object_terminate(vm_object_t object)
480 {
481         vm_page_t p;
482         int s;
483
484         GIANT_REQUIRED;
485
486         /*
487          * Make sure no one uses us.
488          */
489         vm_object_set_flag(object, OBJ_DEAD);
490
491         /*
492          * wait for the pageout daemon to be done with the object
493          */
494         vm_object_pip_wait(object, "objtrm");
495
496         KASSERT(!object->paging_in_progress,
497                 ("vm_object_terminate: pageout in progress"));
498
499         /*
500          * Clean and free the pages, as appropriate. All references to the
501          * object are gone, so we don't need to lock it.
502          */
503         if (object->type == OBJT_VNODE) {
504                 struct vnode *vp;
505
506                 /*
507                  * Freeze optimized copies.
508                  */
509                 vm_freeze_copyopts(object, 0, object->size);
510
511                 /*
512                  * Clean pages and flush buffers.
513                  */
514                 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
515
516                 vp = (struct vnode *) object->handle;
517                 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
518         }
519
520         KASSERT(object->ref_count == 0, 
521                 ("vm_object_terminate: object with references, ref_count=%d",
522                 object->ref_count));
523
524         /*
525          * Now free any remaining pages. For internal objects, this also
526          * removes them from paging queues. Don't free wired pages, just
527          * remove them from the object. 
528          */
529         s = splvm();
530         while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
531                 KASSERT(!p->busy && (p->flags & PG_BUSY) == 0,
532                         ("vm_object_terminate: freeing busy page %p "
533                         "p->busy = %d, p->flags %x\n", p, p->busy, p->flags));
534                 if (p->wire_count == 0) {
535                         vm_page_busy(p);
536                         vm_page_free(p);
537                         cnt.v_pfree++;
538                 } else {
539                         vm_page_busy(p);
540                         vm_page_remove(p);
541                 }
542         }
543         splx(s);
544
545         /*
546          * Let the pager know object is dead.
547          */
548         vm_pager_deallocate(object);
549
550         /*
551          * Remove the object from the global object list.
552          */
553         mtx_lock(&vm_object_list_mtx);
554         TAILQ_REMOVE(&vm_object_list, object, object_list);
555         mtx_unlock(&vm_object_list_mtx);
556
557         wakeup(object);
558
559         /*
560          * Free the space for the object.
561          */
562         zfree(obj_zone, object);
563 }
564
565 /*
566  *      vm_object_page_clean
567  *
568  *      Clean all dirty pages in the specified range of object.  Leaves page 
569  *      on whatever queue it is currently on.   If NOSYNC is set then do not
570  *      write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC),
571  *      leaving the object dirty.
572  *
573  *      Odd semantics: if start == end, we clean everything.
574  *
575  *      The object must be locked.
576  */
577
578 void
579 vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int flags)
580 {
581         vm_page_t p, np, tp;
582         vm_offset_t tstart, tend;
583         vm_pindex_t pi;
584         int s;
585         struct vnode *vp;
586         int runlen;
587         int maxf;
588         int chkb;
589         int maxb;
590         int i;
591         int clearobjflags;
592         int pagerflags;
593         vm_page_t maf[vm_pageout_page_count];
594         vm_page_t mab[vm_pageout_page_count];
595         vm_page_t ma[vm_pageout_page_count];
596         int curgeneration;
597
598         GIANT_REQUIRED;
599
600         if (object->type != OBJT_VNODE ||
601                 (object->flags & OBJ_MIGHTBEDIRTY) == 0)
602                 return;
603
604         pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : 0;
605         pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
606
607         vp = object->handle;
608
609         vm_object_set_flag(object, OBJ_CLEANING);
610
611         tstart = start;
612         if (end == 0) {
613                 tend = object->size;
614         } else {
615                 tend = end;
616         }
617
618         /*
619          * Generally set CLEANCHK interlock and make the page read-only so
620          * we can then clear the object flags.
621          *
622          * However, if this is a nosync mmap then the object is likely to 
623          * stay dirty so do not mess with the page and do not clear the
624          * object flags.
625          */
626
627         clearobjflags = 1;
628
629         TAILQ_FOREACH(p, &object->memq, listq) {
630                 vm_page_flag_set(p, PG_CLEANCHK);
631                 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC))
632                         clearobjflags = 0;
633                 else
634                         vm_page_protect(p, VM_PROT_READ);
635         }
636
637         if (clearobjflags && (tstart == 0) && (tend == object->size)) {
638                 struct vnode *vp;
639
640                 vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
641                 if (object->type == OBJT_VNODE &&
642                     (vp = (struct vnode *)object->handle) != NULL) {
643                         if (vp->v_flag & VOBJDIRTY) {
644                                 mtx_lock(&vp->v_interlock);
645                                 vp->v_flag &= ~VOBJDIRTY;
646                                 mtx_unlock(&vp->v_interlock);
647                         }
648                 }
649         }
650
651 rescan:
652         curgeneration = object->generation;
653
654         for (p = TAILQ_FIRST(&object->memq); p; p = np) {
655                 np = TAILQ_NEXT(p, listq);
656
657                 pi = p->pindex;
658                 if (((p->flags & PG_CLEANCHK) == 0) ||
659                         (pi < tstart) || (pi >= tend) ||
660                         (p->valid == 0) ||
661                         ((p->queue - p->pc) == PQ_CACHE)) {
662                         vm_page_flag_clear(p, PG_CLEANCHK);
663                         continue;
664                 }
665
666                 vm_page_test_dirty(p);
667                 if ((p->dirty & p->valid) == 0) {
668                         vm_page_flag_clear(p, PG_CLEANCHK);
669                         continue;
670                 }
671
672                 /*
673                  * If we have been asked to skip nosync pages and this is a
674                  * nosync page, skip it.  Note that the object flags were
675                  * not cleared in this case so we do not have to set them.
676                  */
677                 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
678                         vm_page_flag_clear(p, PG_CLEANCHK);
679                         continue;
680                 }
681
682                 s = splvm();
683                 while (vm_page_sleep_busy(p, TRUE, "vpcwai")) {
684                         if (object->generation != curgeneration) {
685                                 splx(s);
686                                 goto rescan;
687                         }
688                 }
689
690                 maxf = 0;
691                 for (i = 1; i < vm_pageout_page_count; i++) {
692                         if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
693                                 if ((tp->flags & PG_BUSY) ||
694                                         (tp->flags & PG_CLEANCHK) == 0 ||
695                                         (tp->busy != 0))
696                                         break;
697                                 if((tp->queue - tp->pc) == PQ_CACHE) {
698                                         vm_page_flag_clear(tp, PG_CLEANCHK);
699                                         break;
700                                 }
701                                 vm_page_test_dirty(tp);
702                                 if ((tp->dirty & tp->valid) == 0) {
703                                         vm_page_flag_clear(tp, PG_CLEANCHK);
704                                         break;
705                                 }
706                                 maf[ i - 1 ] = tp;
707                                 maxf++;
708                                 continue;
709                         }
710                         break;
711                 }
712
713                 maxb = 0;
714                 chkb = vm_pageout_page_count -  maxf;
715                 if (chkb) {
716                         for (i = 1; i < chkb; i++) {
717                                 if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
718                                         if ((tp->flags & PG_BUSY) ||
719                                                 (tp->flags & PG_CLEANCHK) == 0 ||
720                                                 (tp->busy != 0))
721                                                 break;
722                                         if((tp->queue - tp->pc) == PQ_CACHE) {
723                                                 vm_page_flag_clear(tp, PG_CLEANCHK);
724                                                 break;
725                                         }
726                                         vm_page_test_dirty(tp);
727                                         if ((tp->dirty & tp->valid) == 0) {
728                                                 vm_page_flag_clear(tp, PG_CLEANCHK);
729                                                 break;
730                                         }
731                                         mab[ i - 1 ] = tp;
732                                         maxb++;
733                                         continue;
734                                 }
735                                 break;
736                         }
737                 }
738
739                 for (i = 0; i < maxb; i++) {
740                         int index = (maxb - i) - 1;
741                         ma[index] = mab[i];
742                         vm_page_flag_clear(ma[index], PG_CLEANCHK);
743                 }
744                 vm_page_flag_clear(p, PG_CLEANCHK);
745                 ma[maxb] = p;
746                 for (i = 0 ; i < maxf; i++) {
747                         int index = (maxb + i) + 1;
748                         ma[index] = maf[i];
749                         vm_page_flag_clear(ma[index], PG_CLEANCHK);
750                 }
751                 runlen = maxb + maxf + 1;
752
753                 splx(s);
754                 vm_pageout_flush(ma, runlen, pagerflags);
755                 for (i = 0; i < runlen; i++) {
756                         if (ma[i]->valid & ma[i]->dirty) {
757                                 vm_page_protect(ma[i], VM_PROT_READ);
758                                 vm_page_flag_set(ma[i], PG_CLEANCHK);
759                         }
760                 }
761                 if (object->generation != curgeneration)
762                         goto rescan;
763         }
764
765 #if 0
766         VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
767 #endif
768
769         vm_object_clear_flag(object, OBJ_CLEANING);
770         return;
771 }
772
773 /*
774  * Same as vm_object_pmap_copy, except range checking really
775  * works, and is meant for small sections of an object.
776  *
777  * This code protects resident pages by making them read-only
778  * and is typically called on a fork or split when a page
779  * is converted to copy-on-write.  
780  *
781  * NOTE: If the page is already at VM_PROT_NONE, calling
782  * vm_page_protect will have no effect.
783  */
784
785 void
786 vm_object_pmap_copy_1(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
787 {
788         vm_pindex_t idx;
789         vm_page_t p;
790
791         GIANT_REQUIRED;
792
793         if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0)
794                 return;
795
796         for (idx = start; idx < end; idx++) {
797                 p = vm_page_lookup(object, idx);
798                 if (p == NULL)
799                         continue;
800                 vm_page_protect(p, VM_PROT_READ);
801         }
802 }
803
804 /*
805  *      vm_object_pmap_remove:
806  *
807  *      Removes all physical pages in the specified
808  *      object range from all physical maps.
809  *
810  *      The object must *not* be locked.
811  */
812 void
813 vm_object_pmap_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
814 {
815         vm_page_t p;
816
817         GIANT_REQUIRED;
818         if (object == NULL)
819                 return;
820         TAILQ_FOREACH(p, &object->memq, listq) {
821                 if (p->pindex >= start && p->pindex < end)
822                         vm_page_protect(p, VM_PROT_NONE);
823         }
824         if ((start == 0) && (object->size == end))
825                 vm_object_clear_flag(object, OBJ_WRITEABLE);
826 }
827
828 /*
829  *      vm_object_madvise:
830  *
831  *      Implements the madvise function at the object/page level.
832  *
833  *      MADV_WILLNEED   (any object)
834  *
835  *          Activate the specified pages if they are resident.
836  *
837  *      MADV_DONTNEED   (any object)
838  *
839  *          Deactivate the specified pages if they are resident.
840  *
841  *      MADV_FREE       (OBJT_DEFAULT/OBJT_SWAP objects,
842  *                       OBJ_ONEMAPPING only)
843  *
844  *          Deactivate and clean the specified pages if they are
845  *          resident.  This permits the process to reuse the pages
846  *          without faulting or the kernel to reclaim the pages
847  *          without I/O.
848  */
849 void
850 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
851 {
852         vm_pindex_t end, tpindex;
853         vm_object_t tobject;
854         vm_page_t m;
855
856         GIANT_REQUIRED;
857         if (object == NULL)
858                 return;
859
860         end = pindex + count;
861
862         /*
863          * Locate and adjust resident pages
864          */
865
866         for (; pindex < end; pindex += 1) {
867 relookup:
868                 tobject = object;
869                 tpindex = pindex;
870 shadowlookup:
871                 /*
872                  * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
873                  * and those pages must be OBJ_ONEMAPPING.
874                  */
875                 if (advise == MADV_FREE) {
876                         if ((tobject->type != OBJT_DEFAULT &&
877                              tobject->type != OBJT_SWAP) ||
878                             (tobject->flags & OBJ_ONEMAPPING) == 0) {
879                                 continue;
880                         }
881                 }
882
883                 m = vm_page_lookup(tobject, tpindex);
884
885                 if (m == NULL) {
886                         /*
887                          * There may be swap even if there is no backing page
888                          */
889                         if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
890                                 swap_pager_freespace(tobject, tpindex, 1);
891
892                         /*
893                          * next object
894                          */
895                         tobject = tobject->backing_object;
896                         if (tobject == NULL)
897                                 continue;
898                         tpindex += OFF_TO_IDX(tobject->backing_object_offset);
899                         goto shadowlookup;
900                 }
901
902                 /*
903                  * If the page is busy or not in a normal active state,
904                  * we skip it.  If the page is not managed there are no
905                  * page queues to mess with.  Things can break if we mess
906                  * with pages in any of the below states.
907                  */
908                 if (
909                     m->hold_count ||
910                     m->wire_count ||
911                     (m->flags & PG_UNMANAGED) ||
912                     m->valid != VM_PAGE_BITS_ALL
913                 ) {
914                         continue;
915                 }
916
917                 if (vm_page_sleep_busy(m, TRUE, "madvpo"))
918                         goto relookup;
919
920                 if (advise == MADV_WILLNEED) {
921                         vm_page_activate(m);
922                 } else if (advise == MADV_DONTNEED) {
923                         vm_page_dontneed(m);
924                 } else if (advise == MADV_FREE) {
925                         /*
926                          * Mark the page clean.  This will allow the page
927                          * to be freed up by the system.  However, such pages
928                          * are often reused quickly by malloc()/free()
929                          * so we do not do anything that would cause
930                          * a page fault if we can help it.
931                          *
932                          * Specifically, we do not try to actually free
933                          * the page now nor do we try to put it in the
934                          * cache (which would cause a page fault on reuse).
935                          *
936                          * But we do make the page is freeable as we
937                          * can without actually taking the step of unmapping
938                          * it.
939                          */
940                         pmap_clear_modify(m);
941                         m->dirty = 0;
942                         m->act_count = 0;
943                         vm_page_dontneed(m);
944                         if (tobject->type == OBJT_SWAP)
945                                 swap_pager_freespace(tobject, tpindex, 1);
946                 }
947         }       
948 }
949
950 /*
951  *      vm_object_shadow:
952  *
953  *      Create a new object which is backed by the
954  *      specified existing object range.  The source
955  *      object reference is deallocated.
956  *
957  *      The new object and offset into that object
958  *      are returned in the source parameters.
959  */
960
961 void
962 vm_object_shadow(
963         vm_object_t *object,    /* IN/OUT */
964         vm_ooffset_t *offset,   /* IN/OUT */
965         vm_size_t length)
966 {
967         vm_object_t source;
968         vm_object_t result;
969
970         GIANT_REQUIRED;
971         source = *object;
972
973         /*
974          * Don't create the new object if the old object isn't shared.
975          */
976
977         if (source != NULL &&
978             source->ref_count == 1 &&
979             source->handle == NULL &&
980             (source->type == OBJT_DEFAULT ||
981              source->type == OBJT_SWAP))
982                 return;
983
984         /*
985          * Allocate a new object with the given length
986          */
987         result = vm_object_allocate(OBJT_DEFAULT, length);
988         KASSERT(result != NULL, ("vm_object_shadow: no object for shadowing"));
989
990         /*
991          * The new object shadows the source object, adding a reference to it.
992          * Our caller changes his reference to point to the new object,
993          * removing a reference to the source object.  Net result: no change
994          * of reference count.
995          *
996          * Try to optimize the result object's page color when shadowing
997          * in order to maintain page coloring consistency in the combined 
998          * shadowed object.
999          */
1000         result->backing_object = source;
1001         if (source) {
1002                 TAILQ_INSERT_TAIL(&source->shadow_head, result, shadow_list);
1003                 source->shadow_count++;
1004                 source->generation++;
1005                 result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & PQ_L2_MASK;
1006         }
1007
1008         /*
1009          * Store the offset into the source object, and fix up the offset into
1010          * the new object.
1011          */
1012
1013         result->backing_object_offset = *offset;
1014
1015         /*
1016          * Return the new things
1017          */
1018
1019         *offset = 0;
1020         *object = result;
1021 }
1022
1023 #define OBSC_TEST_ALL_SHADOWED  0x0001
1024 #define OBSC_COLLAPSE_NOWAIT    0x0002
1025 #define OBSC_COLLAPSE_WAIT      0x0004
1026
1027 static __inline int
1028 vm_object_backing_scan(vm_object_t object, int op)
1029 {
1030         int s;
1031         int r = 1;
1032         vm_page_t p;
1033         vm_object_t backing_object;
1034         vm_pindex_t backing_offset_index;
1035
1036         s = splvm();
1037         GIANT_REQUIRED;
1038
1039         backing_object = object->backing_object;
1040         backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
1041
1042         /*
1043          * Initial conditions
1044          */
1045
1046         if (op & OBSC_TEST_ALL_SHADOWED) {
1047                 /*
1048                  * We do not want to have to test for the existence of
1049                  * swap pages in the backing object.  XXX but with the
1050                  * new swapper this would be pretty easy to do.
1051                  *
1052                  * XXX what about anonymous MAP_SHARED memory that hasn't
1053                  * been ZFOD faulted yet?  If we do not test for this, the
1054                  * shadow test may succeed! XXX
1055                  */
1056                 if (backing_object->type != OBJT_DEFAULT) {
1057                         splx(s);
1058                         return(0);
1059                 }
1060         }
1061         if (op & OBSC_COLLAPSE_WAIT) {
1062                 vm_object_set_flag(backing_object, OBJ_DEAD);
1063         }
1064
1065         /*
1066          * Our scan
1067          */
1068
1069         p = TAILQ_FIRST(&backing_object->memq);
1070         while (p) {
1071                 vm_page_t next = TAILQ_NEXT(p, listq);
1072                 vm_pindex_t new_pindex = p->pindex - backing_offset_index;
1073
1074                 if (op & OBSC_TEST_ALL_SHADOWED) {
1075                         vm_page_t pp;
1076
1077                         /*
1078                          * Ignore pages outside the parent object's range
1079                          * and outside the parent object's mapping of the 
1080                          * backing object.
1081                          *
1082                          * note that we do not busy the backing object's
1083                          * page.
1084                          */
1085
1086                         if (
1087                             p->pindex < backing_offset_index ||
1088                             new_pindex >= object->size
1089                         ) {
1090                                 p = next;
1091                                 continue;
1092                         }
1093
1094                         /*
1095                          * See if the parent has the page or if the parent's
1096                          * object pager has the page.  If the parent has the
1097                          * page but the page is not valid, the parent's
1098                          * object pager must have the page.
1099                          *
1100                          * If this fails, the parent does not completely shadow
1101                          * the object and we might as well give up now.
1102                          */
1103
1104                         pp = vm_page_lookup(object, new_pindex);
1105                         if (
1106                             (pp == NULL || pp->valid == 0) &&
1107                             !vm_pager_has_page(object, new_pindex, NULL, NULL)
1108                         ) {
1109                                 r = 0;
1110                                 break;
1111                         }
1112                 }
1113
1114                 /*
1115                  * Check for busy page
1116                  */
1117
1118                 if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
1119                         vm_page_t pp;
1120
1121                         if (op & OBSC_COLLAPSE_NOWAIT) {
1122                                 if (
1123                                     (p->flags & PG_BUSY) ||
1124                                     !p->valid || 
1125                                     p->hold_count || 
1126                                     p->wire_count ||
1127                                     p->busy
1128                                 ) {
1129                                         p = next;
1130                                         continue;
1131                                 }
1132                         } else if (op & OBSC_COLLAPSE_WAIT) {
1133                                 if (vm_page_sleep_busy(p, TRUE, "vmocol")) {
1134                                         /*
1135                                          * If we slept, anything could have
1136                                          * happened.  Since the object is
1137                                          * marked dead, the backing offset
1138                                          * should not have changed so we
1139                                          * just restart our scan.
1140                                          */
1141                                         p = TAILQ_FIRST(&backing_object->memq);
1142                                         continue;
1143                                 }
1144                         }
1145
1146                         /* 
1147                          * Busy the page
1148                          */
1149                         vm_page_busy(p);
1150
1151                         KASSERT(
1152                             p->object == backing_object,
1153                             ("vm_object_qcollapse(): object mismatch")
1154                         );
1155
1156                         /*
1157                          * Destroy any associated swap
1158                          */
1159                         if (backing_object->type == OBJT_SWAP) {
1160                                 swap_pager_freespace(
1161                                     backing_object, 
1162                                     p->pindex,
1163                                     1
1164                                 );
1165                         }
1166
1167                         if (
1168                             p->pindex < backing_offset_index ||
1169                             new_pindex >= object->size
1170                         ) {
1171                                 /*
1172                                  * Page is out of the parent object's range, we 
1173                                  * can simply destroy it. 
1174                                  */
1175                                 vm_page_protect(p, VM_PROT_NONE);
1176                                 vm_page_free(p);
1177                                 p = next;
1178                                 continue;
1179                         }
1180
1181                         pp = vm_page_lookup(object, new_pindex);
1182                         if (
1183                             pp != NULL ||
1184                             vm_pager_has_page(object, new_pindex, NULL, NULL)
1185                         ) {
1186                                 /*
1187                                  * page already exists in parent OR swap exists
1188                                  * for this location in the parent.  Destroy 
1189                                  * the original page from the backing object.
1190                                  *
1191                                  * Leave the parent's page alone
1192                                  */
1193                                 vm_page_protect(p, VM_PROT_NONE);
1194                                 vm_page_free(p);
1195                                 p = next;
1196                                 continue;
1197                         }
1198
1199                         /*
1200                          * Page does not exist in parent, rename the
1201                          * page from the backing object to the main object. 
1202                          *
1203                          * If the page was mapped to a process, it can remain 
1204                          * mapped through the rename.
1205                          */
1206                         if ((p->queue - p->pc) == PQ_CACHE)
1207                                 vm_page_deactivate(p);
1208
1209                         vm_page_rename(p, object, new_pindex);
1210                         /* page automatically made dirty by rename */
1211                 }
1212                 p = next;
1213         }
1214         splx(s);
1215         return(r);
1216 }
1217
1218
1219 /*
1220  * this version of collapse allows the operation to occur earlier and
1221  * when paging_in_progress is true for an object...  This is not a complete
1222  * operation, but should plug 99.9% of the rest of the leaks.
1223  */
1224 static void
1225 vm_object_qcollapse(vm_object_t object)
1226 {
1227         vm_object_t backing_object = object->backing_object;
1228
1229         GIANT_REQUIRED;
1230
1231         if (backing_object->ref_count != 1)
1232                 return;
1233
1234         backing_object->ref_count += 2;
1235
1236         vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
1237
1238         backing_object->ref_count -= 2;
1239 }
1240
1241 /*
1242  *      vm_object_collapse:
1243  *
1244  *      Collapse an object with the object backing it.
1245  *      Pages in the backing object are moved into the
1246  *      parent, and the backing object is deallocated.
1247  */
1248 void
1249 vm_object_collapse(vm_object_t object)
1250 {
1251         GIANT_REQUIRED;
1252         
1253         while (TRUE) {
1254                 vm_object_t backing_object;
1255
1256                 /*
1257                  * Verify that the conditions are right for collapse:
1258                  *
1259                  * The object exists and the backing object exists.
1260                  */
1261                 if (object == NULL)
1262                         break;
1263
1264                 if ((backing_object = object->backing_object) == NULL)
1265                         break;
1266
1267                 /*
1268                  * we check the backing object first, because it is most likely
1269                  * not collapsable.
1270                  */
1271                 if (backing_object->handle != NULL ||
1272                     (backing_object->type != OBJT_DEFAULT &&
1273                      backing_object->type != OBJT_SWAP) ||
1274                     (backing_object->flags & OBJ_DEAD) ||
1275                     object->handle != NULL ||
1276                     (object->type != OBJT_DEFAULT &&
1277                      object->type != OBJT_SWAP) ||
1278                     (object->flags & OBJ_DEAD)) {
1279                         break;
1280                 }
1281
1282                 if (
1283                     object->paging_in_progress != 0 ||
1284                     backing_object->paging_in_progress != 0
1285                 ) {
1286                         vm_object_qcollapse(object);
1287                         break;
1288                 }
1289
1290                 /*
1291                  * We know that we can either collapse the backing object (if
1292                  * the parent is the only reference to it) or (perhaps) have
1293                  * the parent bypass the object if the parent happens to shadow
1294                  * all the resident pages in the entire backing object.
1295                  *
1296                  * This is ignoring pager-backed pages such as swap pages.
1297                  * vm_object_backing_scan fails the shadowing test in this
1298                  * case.
1299                  */
1300
1301                 if (backing_object->ref_count == 1) {
1302                         /*
1303                          * If there is exactly one reference to the backing
1304                          * object, we can collapse it into the parent.  
1305                          */
1306
1307                         vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
1308
1309                         /*
1310                          * Move the pager from backing_object to object.
1311                          */
1312
1313                         if (backing_object->type == OBJT_SWAP) {
1314                                 vm_object_pip_add(backing_object, 1);
1315
1316                                 /*
1317                                  * scrap the paging_offset junk and do a 
1318                                  * discrete copy.  This also removes major 
1319                                  * assumptions about how the swap-pager 
1320                                  * works from where it doesn't belong.  The
1321                                  * new swapper is able to optimize the
1322                                  * destroy-source case.
1323                                  */
1324
1325                                 vm_object_pip_add(object, 1);
1326                                 swap_pager_copy(
1327                                     backing_object,
1328                                     object,
1329                                     OFF_TO_IDX(object->backing_object_offset), TRUE);
1330                                 vm_object_pip_wakeup(object);
1331
1332                                 vm_object_pip_wakeup(backing_object);
1333                         }
1334                         /*
1335                          * Object now shadows whatever backing_object did.
1336                          * Note that the reference to 
1337                          * backing_object->backing_object moves from within 
1338                          * backing_object to within object.
1339                          */
1340
1341                         TAILQ_REMOVE(
1342                             &object->backing_object->shadow_head, 
1343                             object,
1344                             shadow_list
1345                         );
1346                         object->backing_object->shadow_count--;
1347                         object->backing_object->generation++;
1348                         if (backing_object->backing_object) {
1349                                 TAILQ_REMOVE(
1350                                     &backing_object->backing_object->shadow_head,
1351                                     backing_object, 
1352                                     shadow_list
1353                                 );
1354                                 backing_object->backing_object->shadow_count--;
1355                                 backing_object->backing_object->generation++;
1356                         }
1357                         object->backing_object = backing_object->backing_object;
1358                         if (object->backing_object) {
1359                                 TAILQ_INSERT_TAIL(
1360                                     &object->backing_object->shadow_head,
1361                                     object, 
1362                                     shadow_list
1363                                 );
1364                                 object->backing_object->shadow_count++;
1365                                 object->backing_object->generation++;
1366                         }
1367
1368                         object->backing_object_offset +=
1369                             backing_object->backing_object_offset;
1370
1371                         /*
1372                          * Discard backing_object.
1373                          *
1374                          * Since the backing object has no pages, no pager left,
1375                          * and no object references within it, all that is
1376                          * necessary is to dispose of it.
1377                          */
1378                         KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
1379                         KASSERT(TAILQ_FIRST(&backing_object->memq) == NULL, ("backing_object %p somehow has left over pages during collapse!", backing_object));
1380
1381                         TAILQ_REMOVE(
1382                             &vm_object_list, 
1383                             backing_object,
1384                             object_list
1385                         );
1386                         vm_object_count--;
1387
1388                         zfree(obj_zone, backing_object);
1389
1390                         object_collapses++;
1391                 } else {
1392                         vm_object_t new_backing_object;
1393
1394                         /*
1395                          * If we do not entirely shadow the backing object,
1396                          * there is nothing we can do so we give up.
1397                          */
1398
1399                         if (vm_object_backing_scan(object, OBSC_TEST_ALL_SHADOWED) == 0) {
1400                                 break;
1401                         }
1402
1403                         /*
1404                          * Make the parent shadow the next object in the
1405                          * chain.  Deallocating backing_object will not remove
1406                          * it, since its reference count is at least 2.
1407                          */
1408
1409                         TAILQ_REMOVE(
1410                             &backing_object->shadow_head,
1411                             object,
1412                             shadow_list
1413                         );
1414                         backing_object->shadow_count--;
1415                         backing_object->generation++;
1416
1417                         new_backing_object = backing_object->backing_object;
1418                         if ((object->backing_object = new_backing_object) != NULL) {
1419                                 vm_object_reference(new_backing_object);
1420                                 TAILQ_INSERT_TAIL(
1421                                     &new_backing_object->shadow_head,
1422                                     object,
1423                                     shadow_list
1424                                 );
1425                                 new_backing_object->shadow_count++;
1426                                 new_backing_object->generation++;
1427                                 object->backing_object_offset +=
1428                                         backing_object->backing_object_offset;
1429                         }
1430
1431                         /*
1432                          * Drop the reference count on backing_object. Since
1433                          * its ref_count was at least 2, it will not vanish;
1434                          * so we don't need to call vm_object_deallocate, but
1435                          * we do anyway.
1436                          */
1437                         vm_object_deallocate(backing_object);
1438                         object_bypasses++;
1439                 }
1440
1441                 /*
1442                  * Try again with this object's new backing object.
1443                  */
1444         }
1445 }
1446
1447 /*
1448  *      vm_object_page_remove: [internal]
1449  *
1450  *      Removes all physical pages in the specified
1451  *      object range from the object's list of pages.
1452  *
1453  *      The object must be locked.
1454  */
1455 void
1456 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, boolean_t clean_only)
1457 {
1458         vm_page_t p, next;
1459         unsigned int size;
1460         int all;
1461
1462         GIANT_REQUIRED;
1463         
1464         if (object == NULL ||
1465             object->resident_page_count == 0)
1466                 return;
1467
1468         all = ((end == 0) && (start == 0));
1469
1470         /*
1471          * Since physically-backed objects do not use managed pages, we can't
1472          * remove pages from the object (we must instead remove the page
1473          * references, and then destroy the object).
1474          */
1475         KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object"));
1476
1477         vm_object_pip_add(object, 1);
1478 again:
1479         size = end - start;
1480         if (all || size > object->resident_page_count / 4) {
1481                 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) {
1482                         next = TAILQ_NEXT(p, listq);
1483                         if (all || ((start <= p->pindex) && (p->pindex < end))) {
1484                                 if (p->wire_count != 0) {
1485                                         vm_page_protect(p, VM_PROT_NONE);
1486                                         if (!clean_only)
1487                                                 p->valid = 0;
1488                                         continue;
1489                                 }
1490
1491                                 /*
1492                                  * The busy flags are only cleared at
1493                                  * interrupt -- minimize the spl transitions
1494                                  */
1495
1496                                 if (vm_page_sleep_busy(p, TRUE, "vmopar"))
1497                                         goto again;
1498
1499                                 if (clean_only && p->valid) {
1500                                         vm_page_test_dirty(p);
1501                                         if (p->valid & p->dirty)
1502                                                 continue;
1503                                 }
1504
1505                                 vm_page_busy(p);
1506                                 vm_page_protect(p, VM_PROT_NONE);
1507                                 vm_page_free(p);
1508                         }
1509                 }
1510         } else {
1511                 while (size > 0) {
1512                         if ((p = vm_page_lookup(object, start)) != 0) {
1513
1514                                 if (p->wire_count != 0) {
1515                                         vm_page_protect(p, VM_PROT_NONE);
1516                                         if (!clean_only)
1517                                                 p->valid = 0;
1518                                         start += 1;
1519                                         size -= 1;
1520                                         continue;
1521                                 }
1522
1523                                 /*
1524                                  * The busy flags are only cleared at
1525                                  * interrupt -- minimize the spl transitions
1526                                  */
1527                                 if (vm_page_sleep_busy(p, TRUE, "vmopar"))
1528                                         goto again;
1529
1530                                 if (clean_only && p->valid) {
1531                                         vm_page_test_dirty(p);
1532                                         if (p->valid & p->dirty) {
1533                                                 start += 1;
1534                                                 size -= 1;
1535                                                 continue;
1536                                         }
1537                                 }
1538
1539                                 vm_page_busy(p);
1540                                 vm_page_protect(p, VM_PROT_NONE);
1541                                 vm_page_free(p);
1542                         }
1543                         start += 1;
1544                         size -= 1;
1545                 }
1546         }
1547         vm_object_pip_wakeup(object);
1548 }
1549
1550 /*
1551  *      Routine:        vm_object_coalesce
1552  *      Function:       Coalesces two objects backing up adjoining
1553  *                      regions of memory into a single object.
1554  *
1555  *      returns TRUE if objects were combined.
1556  *
1557  *      NOTE:   Only works at the moment if the second object is NULL -
1558  *              if it's not, which object do we lock first?
1559  *
1560  *      Parameters:
1561  *              prev_object     First object to coalesce
1562  *              prev_offset     Offset into prev_object
1563  *              next_object     Second object into coalesce
1564  *              next_offset     Offset into next_object
1565  *
1566  *              prev_size       Size of reference to prev_object
1567  *              next_size       Size of reference to next_object
1568  *
1569  *      Conditions:
1570  *      The object must *not* be locked.
1571  */
1572 boolean_t
1573 vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, vm_size_t prev_size, vm_size_t next_size)
1574 {
1575         vm_pindex_t next_pindex;
1576
1577         GIANT_REQUIRED;
1578
1579         if (prev_object == NULL) {
1580                 return (TRUE);
1581         }
1582
1583         if (prev_object->type != OBJT_DEFAULT &&
1584             prev_object->type != OBJT_SWAP) {
1585                 return (FALSE);
1586         }
1587
1588         /*
1589          * Try to collapse the object first
1590          */
1591         vm_object_collapse(prev_object);
1592
1593         /*
1594          * Can't coalesce if: . more than one reference . paged out . shadows
1595          * another object . has a copy elsewhere (any of which mean that the
1596          * pages not mapped to prev_entry may be in use anyway)
1597          */
1598
1599         if (prev_object->backing_object != NULL) {
1600                 return (FALSE);
1601         }
1602
1603         prev_size >>= PAGE_SHIFT;
1604         next_size >>= PAGE_SHIFT;
1605         next_pindex = prev_pindex + prev_size;
1606
1607         if ((prev_object->ref_count > 1) &&
1608             (prev_object->size != next_pindex)) {
1609                 return (FALSE);
1610         }
1611
1612         /*
1613          * Remove any pages that may still be in the object from a previous
1614          * deallocation.
1615          */
1616         if (next_pindex < prev_object->size) {
1617                 vm_object_page_remove(prev_object,
1618                                       next_pindex,
1619                                       next_pindex + next_size, FALSE);
1620                 if (prev_object->type == OBJT_SWAP)
1621                         swap_pager_freespace(prev_object,
1622                                              next_pindex, next_size);
1623         }
1624
1625         /*
1626          * Extend the object if necessary.
1627          */
1628         if (next_pindex + next_size > prev_object->size)
1629                 prev_object->size = next_pindex + next_size;
1630
1631         return (TRUE);
1632 }
1633
1634 void
1635 vm_object_set_writeable_dirty(vm_object_t object)
1636 {
1637         struct vnode *vp;
1638
1639         vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
1640         if (object->type == OBJT_VNODE &&
1641             (vp = (struct vnode *)object->handle) != NULL) {
1642                 if ((vp->v_flag & VOBJDIRTY) == 0) {
1643                         mtx_lock(&vp->v_interlock);
1644                         vp->v_flag |= VOBJDIRTY;
1645                         mtx_unlock(&vp->v_interlock);
1646                 }
1647         }
1648 }
1649
1650 #include "opt_ddb.h"
1651 #ifdef DDB
1652 #include <sys/kernel.h>
1653
1654 #include <sys/cons.h>
1655
1656 #include <ddb/ddb.h>
1657
1658 static int
1659 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
1660 {
1661         vm_map_t tmpm;
1662         vm_map_entry_t tmpe;
1663         vm_object_t obj;
1664         int entcount;
1665
1666         if (map == 0)
1667                 return 0;
1668
1669         if (entry == 0) {
1670                 tmpe = map->header.next;
1671                 entcount = map->nentries;
1672                 while (entcount-- && (tmpe != &map->header)) {
1673                         if( _vm_object_in_map(map, object, tmpe)) {
1674                                 return 1;
1675                         }
1676                         tmpe = tmpe->next;
1677                 }
1678         } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
1679                 tmpm = entry->object.sub_map;
1680                 tmpe = tmpm->header.next;
1681                 entcount = tmpm->nentries;
1682                 while (entcount-- && tmpe != &tmpm->header) {
1683                         if( _vm_object_in_map(tmpm, object, tmpe)) {
1684                                 return 1;
1685                         }
1686                         tmpe = tmpe->next;
1687                 }
1688         } else if ((obj = entry->object.vm_object) != NULL) {
1689                 for (; obj; obj = obj->backing_object)
1690                         if( obj == object) {
1691                                 return 1;
1692                         }
1693         }
1694         return 0;
1695 }
1696
1697 static int
1698 vm_object_in_map(vm_object_t object)
1699 {
1700         struct proc *p;
1701
1702         /* sx_slock(&allproc_lock); */
1703         LIST_FOREACH(p, &allproc, p_list) {
1704                 if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
1705                         continue;
1706                 if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
1707                         /* sx_sunlock(&allproc_lock); */
1708                         return 1;
1709                 }
1710         }
1711         /* sx_sunlock(&allproc_lock); */
1712         if( _vm_object_in_map( kernel_map, object, 0))
1713                 return 1;
1714         if( _vm_object_in_map( kmem_map, object, 0))
1715                 return 1;
1716         if( _vm_object_in_map( pager_map, object, 0))
1717                 return 1;
1718         if( _vm_object_in_map( buffer_map, object, 0))
1719                 return 1;
1720         return 0;
1721 }
1722
1723 DB_SHOW_COMMAND(vmochk, vm_object_check)
1724 {
1725         vm_object_t object;
1726
1727         /*
1728          * make sure that internal objs are in a map somewhere
1729          * and none have zero ref counts.
1730          */
1731         TAILQ_FOREACH(object, &vm_object_list, object_list) {
1732                 if (object->handle == NULL &&
1733                     (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
1734                         if (object->ref_count == 0) {
1735                                 db_printf("vmochk: internal obj has zero ref count: %ld\n",
1736                                         (long)object->size);
1737                         }
1738                         if (!vm_object_in_map(object)) {
1739                                 db_printf(
1740                         "vmochk: internal obj is not in a map: "
1741                         "ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
1742                                     object->ref_count, (u_long)object->size, 
1743                                     (u_long)object->size,
1744                                     (void *)object->backing_object);
1745                         }
1746                 }
1747         }
1748 }
1749
1750 /*
1751  *      vm_object_print:        [ debug ]
1752  */
1753 DB_SHOW_COMMAND(object, vm_object_print_static)
1754 {
1755         /* XXX convert args. */
1756         vm_object_t object = (vm_object_t)addr;
1757         boolean_t full = have_addr;
1758
1759         vm_page_t p;
1760
1761         /* XXX count is an (unused) arg.  Avoid shadowing it. */
1762 #define count   was_count
1763
1764         int count;
1765
1766         if (object == NULL)
1767                 return;
1768
1769         db_iprintf(
1770             "Object %p: type=%d, size=0x%lx, res=%d, ref=%d, flags=0x%x\n",
1771             object, (int)object->type, (u_long)object->size,
1772             object->resident_page_count, object->ref_count, object->flags);
1773         /*
1774          * XXX no %qd in kernel.  Truncate object->backing_object_offset.
1775          */
1776         db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%lx\n",
1777             object->shadow_count, 
1778             object->backing_object ? object->backing_object->ref_count : 0,
1779             object->backing_object, (long)object->backing_object_offset);
1780
1781         if (!full)
1782                 return;
1783
1784         db_indent += 2;
1785         count = 0;
1786         TAILQ_FOREACH(p, &object->memq, listq) {
1787                 if (count == 0)
1788                         db_iprintf("memory:=");
1789                 else if (count == 6) {
1790                         db_printf("\n");
1791                         db_iprintf(" ...");
1792                         count = 0;
1793                 } else
1794                         db_printf(",");
1795                 count++;
1796
1797                 db_printf("(off=0x%lx,page=0x%lx)",
1798                     (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p));
1799         }
1800         if (count != 0)
1801                 db_printf("\n");
1802         db_indent -= 2;
1803 }
1804
1805 /* XXX. */
1806 #undef count
1807
1808 /* XXX need this non-static entry for calling from vm_map_print. */
1809 void
1810 vm_object_print(
1811         /* db_expr_t */ long addr,
1812         boolean_t have_addr,
1813         /* db_expr_t */ long count,
1814         char *modif)
1815 {
1816         vm_object_print_static(addr, have_addr, count, modif);
1817 }
1818
1819 DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
1820 {
1821         vm_object_t object;
1822         int nl = 0;
1823         int c;
1824
1825         TAILQ_FOREACH(object, &vm_object_list, object_list) {
1826                 vm_pindex_t idx, fidx;
1827                 vm_pindex_t osize;
1828                 vm_offset_t pa = -1, padiff;
1829                 int rcount;
1830                 vm_page_t m;
1831
1832                 db_printf("new object: %p\n", (void *)object);
1833                 if ( nl > 18) {
1834                         c = cngetc();
1835                         if (c != ' ')
1836                                 return;
1837                         nl = 0;
1838                 }
1839                 nl++;
1840                 rcount = 0;
1841                 fidx = 0;
1842                 osize = object->size;
1843                 if (osize > 128)
1844                         osize = 128;
1845                 for (idx = 0; idx < osize; idx++) {
1846                         m = vm_page_lookup(object, idx);
1847                         if (m == NULL) {
1848                                 if (rcount) {
1849                                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
1850                                                 (long)fidx, rcount, (long)pa);
1851                                         if ( nl > 18) {
1852                                                 c = cngetc();
1853                                                 if (c != ' ')
1854                                                         return;
1855                                                 nl = 0;
1856                                         }
1857                                         nl++;
1858                                         rcount = 0;
1859                                 }
1860                                 continue;
1861                         }
1862
1863                                 
1864                         if (rcount &&
1865                                 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
1866                                 ++rcount;
1867                                 continue;
1868                         }
1869                         if (rcount) {
1870                                 padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m);
1871                                 padiff >>= PAGE_SHIFT;
1872                                 padiff &= PQ_L2_MASK;
1873                                 if (padiff == 0) {
1874                                         pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE;
1875                                         ++rcount;
1876                                         continue;
1877                                 }
1878                                 db_printf(" index(%ld)run(%d)pa(0x%lx)",
1879                                         (long)fidx, rcount, (long)pa);
1880                                 db_printf("pd(%ld)\n", (long)padiff);
1881                                 if ( nl > 18) {
1882                                         c = cngetc();
1883                                         if (c != ' ')
1884                                                 return;
1885                                         nl = 0;
1886                                 }
1887                                 nl++;
1888                         }
1889                         fidx = idx;
1890                         pa = VM_PAGE_TO_PHYS(m);
1891                         rcount = 1;
1892                 }
1893                 if (rcount) {
1894                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
1895                                 (long)fidx, rcount, (long)pa);
1896                         if ( nl > 18) {
1897                                 c = cngetc();
1898                                 if (c != ' ')
1899                                         return;
1900                                 nl = 0;
1901                         }
1902                         nl++;
1903                 }
1904         }
1905 }
1906 #endif /* DDB */