sys/vm/vm_object.c

   1 /*-
   2  * Copyright (c) 1991, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * This code is derived from software contributed to Berkeley by
   6  * The Mach Operating System project at Carnegie-Mellon University.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  * 4. Neither the name of the University nor the names of its contributors
  17  *    may be used to endorse or promote products derived from this software
  18  *    without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  *
  32  *      from: @(#)vm_object.c   8.5 (Berkeley) 3/22/94
  33  *
  34  *
  35  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  36  * All rights reserved.
  37  *
  38  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  39  *
  40  * Permission to use, copy, modify and distribute this software and
  41  * its documentation is hereby granted, provided that both the copyright
  42  * notice and this permission notice appear in all copies of the
  43  * software, derivative works or modified versions, and any portions
  44  * thereof, and that both notices appear in supporting documentation.
  45  *
  46  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  47  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  48  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  49  *
  50  * Carnegie Mellon requests users of this software to return to
  51  *
  52  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  53  *  School of Computer Science
  54  *  Carnegie Mellon University
  55  *  Pittsburgh PA 15213-3890
  56  *
  57  * any improvements or extensions that they make and grant Carnegie the
  58  * rights to redistribute these changes.
  59  */
  60
  61 /*
  62  *      Virtual memory object module.
  63  */
  64
  65 #include <sys/cdefs.h>
  66 __FBSDID("$FreeBSD$");
  67
  68 #include <sys/param.h>
  69 #include <sys/systm.h>
  70 #include <sys/lock.h>
  71 #include <sys/mman.h>
  72 #include <sys/mount.h>
  73 #include <sys/kernel.h>
  74 #include <sys/sysctl.h>
  75 #include <sys/mutex.h>
  76 #include <sys/proc.h>           /* for curproc, pageproc */
  77 #include <sys/socket.h>
  78 #include <sys/vnode.h>
  79 #include <sys/vmmeter.h>
  80 #include <sys/sx.h>
  81
  82 #include <vm/vm.h>
  83 #include <vm/vm_param.h>
  84 #include <vm/pmap.h>
  85 #include <vm/vm_map.h>
  86 #include <vm/vm_object.h>
  87 #include <vm/vm_page.h>
  88 #include <vm/vm_pageout.h>
  89 #include <vm/vm_pager.h>
  90 #include <vm/swap_pager.h>
  91 #include <vm/vm_kern.h>
  92 #include <vm/vm_extern.h>
  93 #include <vm/uma.h>
  94
  95 #define EASY_SCAN_FACTOR       8
  96
  97 #define MSYNC_FLUSH_HARDSEQ     0x01
  98 #define MSYNC_FLUSH_SOFTSEQ     0x02
  99
 100 /*
 101  * msync / VM object flushing optimizations
 102  */
 103 static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ;
 104 SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags,
 105         CTLFLAG_RW, &msync_flush_flags, 0, "");
 106
 107 static int old_msync;
 108 SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
 109     "Use old (insecure) msync behavior");
 110
 111 static void     vm_object_qcollapse(vm_object_t object);
 112 static int      vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags);
 113
 114 /*
 115  *      Virtual memory objects maintain the actual data
 116  *      associated with allocated virtual memory.  A given
 117  *      page of memory exists within exactly one object.
 118  *
 119  *      An object is only deallocated when all "references"
 120  *      are given up.  Only one "reference" to a given
 121  *      region of an object should be writeable.
 122  *
 123  *      Associated with each object is a list of all resident
 124  *      memory pages belonging to that object; this list is
 125  *      maintained by the "vm_page" module, and locked by the object's
 126  *      lock.
 127  *
 128  *      Each object also records a "pager" routine which is
 129  *      used to retrieve (and store) pages to the proper backing
 130  *      storage.  In addition, objects may be backed by other
 131  *      objects from which they were virtual-copied.
 132  *
 133  *      The only items within the object structure which are
 134  *      modified after time of creation are:
 135  *              reference count         locked by object's lock
 136  *              pager routine           locked by object's lock
 137  *
 138  */
 139
 140 struct object_q vm_object_list;
 141 struct mtx vm_object_list_mtx;  /* lock for object list and count */
 142
 143 struct vm_object kernel_object_store;
 144 struct vm_object kmem_object_store;
 145
 146 static long object_collapses;
 147 static long object_bypasses;
 148
 149 /*
 150  * next_index determines the page color that is assigned to the next
 151  * allocated object.  Accesses to next_index are not synchronized
 152  * because the effects of two or more object allocations using
 153  * next_index simultaneously are inconsequential.  At any given time,
 154  * numerous objects have the same page color.
 155  */
 156 static int next_index;
 157
 158 static uma_zone_t obj_zone;
 159
 160 static int vm_object_zinit(void *mem, int size, int flags);
 161
 162 #ifdef INVARIANTS
 163 static void vm_object_zdtor(void *mem, int size, void *arg);
 164
 165 static void
 166 vm_object_zdtor(void *mem, int size, void *arg)
 167 {
 168         vm_object_t object;
 169
 170         object = (vm_object_t)mem;
 171         KASSERT(TAILQ_EMPTY(&object->memq),
 172             ("object %p has resident pages",
 173             object));
 174         KASSERT(object->paging_in_progress == 0,
 175             ("object %p paging_in_progress = %d",
 176             object, object->paging_in_progress));
 177         KASSERT(object->resident_page_count == 0,
 178             ("object %p resident_page_count = %d",
 179             object, object->resident_page_count));
 180         KASSERT(object->shadow_count == 0,
 181             ("object %p shadow_count = %d",
 182             object, object->shadow_count));
 183 }
 184 #endif
 185
 186 static int
 187 vm_object_zinit(void *mem, int size, int flags)
 188 {
 189         vm_object_t object;
 190
 191         object = (vm_object_t)mem;
 192         bzero(&object->mtx, sizeof(object->mtx));
 193         VM_OBJECT_LOCK_INIT(object, "standard object");
 194
 195         /* These are true for any object that has been freed */
 196         object->paging_in_progress = 0;
 197         object->resident_page_count = 0;
 198         object->shadow_count = 0;
 199         return (0);
 200 }
 201
 202 void
 203 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
 204 {
 205         int incr;
 206
 207         TAILQ_INIT(&object->memq);
 208         LIST_INIT(&object->shadow_head);
 209
 210         object->root = NULL;
 211         object->type = type;
 212         object->size = size;
 213         object->generation = 1;
 214         object->ref_count = 1;
 215         object->flags = 0;
 216         if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
 217                 object->flags = OBJ_ONEMAPPING;
 218         if (size > (PQ_L2_SIZE / 3 + PQ_PRIME1))
 219                 incr = PQ_L2_SIZE / 3 + PQ_PRIME1;
 220         else
 221                 incr = size;
 222         object->pg_color = next_index;
 223         next_index = (object->pg_color + incr) & PQ_L2_MASK;
 224         object->handle = NULL;
 225         object->backing_object = NULL;
 226         object->backing_object_offset = (vm_ooffset_t) 0;
 227
 228         mtx_lock(&vm_object_list_mtx);
 229         TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
 230         mtx_unlock(&vm_object_list_mtx);
 231 }
 232
 233 /*
 234  *      vm_object_init:
 235  *
 236  *      Initialize the VM objects module.
 237  */
 238 void
 239 vm_object_init(void)
 240 {
 241         TAILQ_INIT(&vm_object_list);
 242         mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
 243
 244         VM_OBJECT_LOCK_INIT(&kernel_object_store, "kernel object");
 245         _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
 246             kernel_object);
 247
 248         VM_OBJECT_LOCK_INIT(&kmem_object_store, "kmem object");
 249         _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
 250             kmem_object);
 251
 252         /*
 253          * The lock portion of struct vm_object must be type stable due
 254          * to vm_pageout_fallback_object_lock locking a vm object
 255          * without holding any references to it.
 256          */
 257         obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
 258 #ifdef INVARIANTS
 259             vm_object_zdtor,
 260 #else
 261             NULL,
 262 #endif
 263             vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
 264 }
 265
 266 void
 267 vm_object_clear_flag(vm_object_t object, u_short bits)
 268 {
 269
 270         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 271         object->flags &= ~bits;
 272 }
 273
 274 void
 275 vm_object_pip_add(vm_object_t object, short i)
 276 {
 277
 278         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 279         object->paging_in_progress += i;
 280 }
 281
 282 void
 283 vm_object_pip_subtract(vm_object_t object, short i)
 284 {
 285
 286         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 287         object->paging_in_progress -= i;
 288 }
 289
 290 void
 291 vm_object_pip_wakeup(vm_object_t object)
 292 {
 293
 294         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 295         object->paging_in_progress--;
 296         if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
 297                 vm_object_clear_flag(object, OBJ_PIPWNT);
 298                 wakeup(object);
 299         }
 300 }
 301
 302 void
 303 vm_object_pip_wakeupn(vm_object_t object, short i)
 304 {
 305
 306         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 307         if (i)
 308                 object->paging_in_progress -= i;
 309         if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
 310                 vm_object_clear_flag(object, OBJ_PIPWNT);
 311                 wakeup(object);
 312         }
 313 }
 314
 315 void
 316 vm_object_pip_wait(vm_object_t object, char *waitid)
 317 {
 318
 319         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 320         while (object->paging_in_progress) {
 321                 object->flags |= OBJ_PIPWNT;
 322                 msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0);
 323         }
 324 }
 325
 326 /*
 327  *      vm_object_allocate:
 328  *
 329  *      Returns a new object with the given size.
 330  */
 331 vm_object_t
 332 vm_object_allocate(objtype_t type, vm_pindex_t size)
 333 {
 334         vm_object_t object;
 335
 336         object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
 337         _vm_object_allocate(type, size, object);
 338         return (object);
 339 }
 340
 341
 342 /*
 343  *      vm_object_reference:
 344  *
 345  *      Gets another reference to the given object.  Note: OBJ_DEAD
 346  *      objects can be referenced during final cleaning.
 347  */
 348 void
 349 vm_object_reference(vm_object_t object)
 350 {
 351         struct vnode *vp;
 352         int flags;
 353
 354         if (object == NULL)
 355                 return;
 356         VM_OBJECT_LOCK(object);
 357         object->ref_count++;
 358         if (object->type == OBJT_VNODE) {
 359                 vp = object->handle;
 360                 VI_LOCK(vp);
 361                 VM_OBJECT_UNLOCK(object);
 362                 for (flags = LK_INTERLOCK; vget(vp, flags, curthread);
 363                      flags = 0)
 364                         printf("vm_object_reference: delay in vget\n");
 365         } else
 366                 VM_OBJECT_UNLOCK(object);
 367 }
 368
 369 /*
 370  *      vm_object_reference_locked:
 371  *
 372  *      Gets another reference to the given object.
 373  *
 374  *      The object must be locked.
 375  */
 376 void
 377 vm_object_reference_locked(vm_object_t object)
 378 {
 379         struct vnode *vp;
 380
 381         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 382         KASSERT((object->flags & OBJ_DEAD) == 0,
 383             ("vm_object_reference_locked: dead object referenced"));
 384         object->ref_count++;
 385         if (object->type == OBJT_VNODE) {
 386                 vp = object->handle;
 387                 vref(vp);
 388         }
 389 }
 390
 391 /*
 392  * Handle deallocating an object of type OBJT_VNODE.
 393  */
 394 void
 395 vm_object_vndeallocate(vm_object_t object)
 396 {
 397         struct vnode *vp = (struct vnode *) object->handle;
 398
 399         VFS_ASSERT_GIANT(vp->v_mount);
 400         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 401         KASSERT(object->type == OBJT_VNODE,
 402             ("vm_object_vndeallocate: not a vnode object"));
 403         KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
 404 #ifdef INVARIANTS
 405         if (object->ref_count == 0) {
 406                 vprint("vm_object_vndeallocate", vp);
 407                 panic("vm_object_vndeallocate: bad object reference count");
 408         }
 409 #endif
 410
 411         object->ref_count--;
 412         if (object->ref_count == 0) {
 413                 mp_fixme("Unlocked vflag access.");
 414                 vp->v_vflag &= ~VV_TEXT;
 415         }
 416         VM_OBJECT_UNLOCK(object);
 417         /*
 418          * vrele may need a vop lock
 419          */
 420         vrele(vp);
 421 }
 422
 423 /*
 424  *      vm_object_deallocate:
 425  *
 426  *      Release a reference to the specified object,
 427  *      gained either through a vm_object_allocate
 428  *      or a vm_object_reference call.  When all references
 429  *      are gone, storage associated with this object
 430  *      may be relinquished.
 431  *
 432  *      No object may be locked.
 433  */
 434 void
 435 vm_object_deallocate(vm_object_t object)
 436 {
 437         vm_object_t temp;
 438
 439         while (object != NULL) {
 440                 int vfslocked;
 441                 /*
 442                  * In general, the object should be locked when working with
 443                  * its type.  In this case, in order to maintain proper lock
 444                  * ordering, an exception is possible because a vnode-backed
 445                  * object never changes its type.
 446                  */
 447                 vfslocked = 0;
 448                 if (object->type == OBJT_VNODE) {
 449                         struct vnode *vp = (struct vnode *) object->handle;
 450                         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 451                 }
 452                 VM_OBJECT_LOCK(object);
 453                 if (object->type == OBJT_VNODE) {
 454                         vm_object_vndeallocate(object);
 455                         VFS_UNLOCK_GIANT(vfslocked);
 456                         return;
 457                 }
 458
 459                 KASSERT(object->ref_count != 0,
 460                         ("vm_object_deallocate: object deallocated too many times: %d", object->type));
 461
 462                 /*
 463                  * If the reference count goes to 0 we start calling
 464                  * vm_object_terminate() on the object chain.
 465                  * A ref count of 1 may be a special case depending on the
 466                  * shadow count being 0 or 1.
 467                  */
 468                 object->ref_count--;
 469                 if (object->ref_count > 1) {
 470                         VM_OBJECT_UNLOCK(object);
 471                         return;
 472                 } else if (object->ref_count == 1) {
 473                         if (object->shadow_count == 0) {
 474                                 vm_object_set_flag(object, OBJ_ONEMAPPING);
 475                         } else if ((object->shadow_count == 1) &&
 476                             (object->handle == NULL) &&
 477                             (object->type == OBJT_DEFAULT ||
 478                              object->type == OBJT_SWAP)) {
 479                                 vm_object_t robject;
 480
 481                                 robject = LIST_FIRST(&object->shadow_head);
 482                                 KASSERT(robject != NULL,
 483                                     ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
 484                                          object->ref_count,
 485                                          object->shadow_count));
 486                                 if (!VM_OBJECT_TRYLOCK(robject)) {
 487                                         /*
 488                                          * Avoid a potential deadlock.
 489                                          */
 490                                         object->ref_count++;
 491                                         VM_OBJECT_UNLOCK(object);
 492                                         /*
 493                                          * More likely than not the thread
 494                                          * holding robject's lock has lower
 495                                          * priority than the current thread.
 496                                          * Let the lower priority thread run.
 497                                          */
 498                                         tsleep(&proc0, PVM, "vmo_de", 1);
 499                                         continue;
 500                                 }
 501                                 /*
 502                                  * Collapse object into its shadow unless its
 503                                  * shadow is dead.  In that case, object will
 504                                  * be deallocated by the thread that is
 505                                  * deallocating its shadow.
 506                                  */
 507                                 if ((robject->flags & OBJ_DEAD) == 0 &&
 508                                     (robject->handle == NULL) &&
 509                                     (robject->type == OBJT_DEFAULT ||
 510                                      robject->type == OBJT_SWAP)) {
 511
 512                                         robject->ref_count++;
 513 retry:
 514                                         if (robject->paging_in_progress) {
 515                                                 VM_OBJECT_UNLOCK(object);
 516                                                 vm_object_pip_wait(robject,
 517                                                     "objde1");
 518                                                 VM_OBJECT_LOCK(object);
 519                                                 goto retry;
 520                                         } else if (object->paging_in_progress) {
 521                                                 VM_OBJECT_UNLOCK(robject);
 522                                                 object->flags |= OBJ_PIPWNT;
 523                                                 msleep(object,
 524                                                     VM_OBJECT_MTX(object),
 525                                                     PDROP | PVM, "objde2", 0);
 526                                                 VM_OBJECT_LOCK(robject);
 527                                                 VM_OBJECT_LOCK(object);
 528                                                 goto retry;
 529                                         }
 530                                         VM_OBJECT_UNLOCK(object);
 531                                         if (robject->ref_count == 1) {
 532                                                 robject->ref_count--;
 533                                                 object = robject;
 534                                                 goto doterm;
 535                                         }
 536                                         object = robject;
 537                                         vm_object_collapse(object);
 538                                         VM_OBJECT_UNLOCK(object);
 539                                         continue;
 540                                 }
 541                                 VM_OBJECT_UNLOCK(robject);
 542                         }
 543                         VM_OBJECT_UNLOCK(object);
 544                         return;
 545                 }
 546 doterm:
 547                 temp = object->backing_object;
 548                 if (temp != NULL) {
 549                         VM_OBJECT_LOCK(temp);
 550                         LIST_REMOVE(object, shadow_list);
 551                         temp->shadow_count--;
 552                         temp->generation++;
 553                         VM_OBJECT_UNLOCK(temp);
 554                         object->backing_object = NULL;
 555                 }
 556                 /*
 557                  * Don't double-terminate, we could be in a termination
 558                  * recursion due to the terminate having to sync data
 559                  * to disk.
 560                  */
 561                 if ((object->flags & OBJ_DEAD) == 0)
 562                         vm_object_terminate(object);
 563                 else
 564                         VM_OBJECT_UNLOCK(object);
 565                 object = temp;
 566         }
 567 }
 568
 569 /*
 570  *      vm_object_terminate actually destroys the specified object, freeing
 571  *      up all previously used resources.
 572  *
 573  *      The object must be locked.
 574  *      This routine may block.
 575  */
 576 void
 577 vm_object_terminate(vm_object_t object)
 578 {
 579         vm_page_t p;
 580
 581         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 582
 583         /*
 584          * Make sure no one uses us.
 585          */
 586         vm_object_set_flag(object, OBJ_DEAD);
 587
 588         /*
 589          * wait for the pageout daemon to be done with the object
 590          */
 591         vm_object_pip_wait(object, "objtrm");
 592
 593         KASSERT(!object->paging_in_progress,
 594                 ("vm_object_terminate: pageout in progress"));
 595
 596         /*
 597          * Clean and free the pages, as appropriate. All references to the
 598          * object are gone, so we don't need to lock it.
 599          */
 600         if (object->type == OBJT_VNODE) {
 601                 struct vnode *vp = (struct vnode *)object->handle;
 602
 603                 /*
 604                  * Clean pages and flush buffers.
 605                  */
 606                 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
 607                 VM_OBJECT_UNLOCK(object);
 608
 609                 vinvalbuf(vp, V_SAVE, NULL, 0, 0);
 610
 611                 VM_OBJECT_LOCK(object);
 612         }
 613
 614         KASSERT(object->ref_count == 0,
 615                 ("vm_object_terminate: object with references, ref_count=%d",
 616                 object->ref_count));
 617
 618         /*
 619          * Now free any remaining pages. For internal objects, this also
 620          * removes them from paging queues. Don't free wired pages, just
 621          * remove them from the object.
 622          */
 623         vm_page_lock_queues();
 624         while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
 625                 KASSERT(!p->busy && (p->flags & PG_BUSY) == 0,
 626                         ("vm_object_terminate: freeing busy page %p "
 627                         "p->busy = %d, p->flags %x\n", p, p->busy, p->flags));
 628                 if (p->wire_count == 0) {
 629                         vm_page_free(p);
 630                         cnt.v_pfree++;
 631                 } else {
 632                         vm_page_remove(p);
 633                 }
 634         }
 635         vm_page_unlock_queues();
 636
 637         /*
 638          * Let the pager know object is dead.
 639          */
 640         vm_pager_deallocate(object);
 641         VM_OBJECT_UNLOCK(object);
 642
 643         /*
 644          * Remove the object from the global object list.
 645          */
 646         mtx_lock(&vm_object_list_mtx);
 647         TAILQ_REMOVE(&vm_object_list, object, object_list);
 648         mtx_unlock(&vm_object_list_mtx);
 649
 650         /*
 651          * Free the space for the object.
 652          */
 653         uma_zfree(obj_zone, object);
 654 }
 655
 656 /*
 657  *      vm_object_page_clean
 658  *
 659  *      Clean all dirty pages in the specified range of object.  Leaves page
 660  *      on whatever queue it is currently on.   If NOSYNC is set then do not
 661  *      write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC),
 662  *      leaving the object dirty.
 663  *
 664  *      When stuffing pages asynchronously, allow clustering.  XXX we need a
 665  *      synchronous clustering mode implementation.
 666  *
 667  *      Odd semantics: if start == end, we clean everything.
 668  *
 669  *      The object must be locked.
 670  */
 671 void
 672 vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int flags)
 673 {
 674         vm_page_t p, np;
 675         vm_pindex_t tstart, tend;
 676         vm_pindex_t pi;
 677         int clearobjflags;
 678         int pagerflags;
 679         int curgeneration;
 680
 681         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 682         if (object->type != OBJT_VNODE ||
 683                 (object->flags & OBJ_MIGHTBEDIRTY) == 0)
 684                 return;
 685
 686         pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
 687         pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
 688
 689         vm_object_set_flag(object, OBJ_CLEANING);
 690
 691         tstart = start;
 692         if (end == 0) {
 693                 tend = object->size;
 694         } else {
 695                 tend = end;
 696         }
 697
 698         vm_page_lock_queues();
 699         /*
 700          * If the caller is smart and only msync()s a range he knows is
 701          * dirty, we may be able to avoid an object scan.  This results in
 702          * a phenominal improvement in performance.  We cannot do this
 703          * as a matter of course because the object may be huge - e.g.
 704          * the size might be in the gigabytes or terrabytes.
 705          */
 706         if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) {
 707                 vm_pindex_t tscan;
 708                 int scanlimit;
 709                 int scanreset;
 710
 711                 scanreset = object->resident_page_count / EASY_SCAN_FACTOR;
 712                 if (scanreset < 16)
 713                         scanreset = 16;
 714                 pagerflags |= VM_PAGER_IGNORE_CLEANCHK;
 715
 716                 scanlimit = scanreset;
 717                 tscan = tstart;
 718                 while (tscan < tend) {
 719                         curgeneration = object->generation;
 720                         p = vm_page_lookup(object, tscan);
 721                         if (p == NULL || p->valid == 0 ||
 722                             (p->queue - p->pc) == PQ_CACHE) {
 723                                 if (--scanlimit == 0)
 724                                         break;
 725                                 ++tscan;
 726                                 continue;
 727                         }
 728                         vm_page_test_dirty(p);
 729                         if ((p->dirty & p->valid) == 0) {
 730                                 if (--scanlimit == 0)
 731                                         break;
 732                                 ++tscan;
 733                                 continue;
 734                         }
 735                         /*
 736                          * If we have been asked to skip nosync pages and
 737                          * this is a nosync page, we can't continue.
 738                          */
 739                         if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
 740                                 if (--scanlimit == 0)
 741                                         break;
 742                                 ++tscan;
 743                                 continue;
 744                         }
 745                         scanlimit = scanreset;
 746
 747                         /*
 748                          * This returns 0 if it was unable to busy the first
 749                          * page (i.e. had to sleep).
 750                          */
 751                         tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags);
 752                 }
 753
 754                 /*
 755                  * If everything was dirty and we flushed it successfully,
 756                  * and the requested range is not the entire object, we
 757                  * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can
 758                  * return immediately.
 759                  */
 760                 if (tscan >= tend && (tstart || tend < object->size)) {
 761                         vm_page_unlock_queues();
 762                         vm_object_clear_flag(object, OBJ_CLEANING);
 763                         return;
 764                 }
 765                 pagerflags &= ~VM_PAGER_IGNORE_CLEANCHK;
 766         }
 767
 768         /*
 769          * Generally set CLEANCHK interlock and make the page read-only so
 770          * we can then clear the object flags.
 771          *
 772          * However, if this is a nosync mmap then the object is likely to
 773          * stay dirty so do not mess with the page and do not clear the
 774          * object flags.
 775          */
 776         clearobjflags = 1;
 777         TAILQ_FOREACH(p, &object->memq, listq) {
 778                 vm_page_flag_set(p, PG_CLEANCHK);
 779                 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC))
 780                         clearobjflags = 0;
 781                 else
 782                         pmap_page_protect(p, VM_PROT_READ);
 783         }
 784
 785         if (clearobjflags && (tstart == 0) && (tend == object->size)) {
 786                 struct vnode *vp;
 787
 788                 vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
 789                 if (object->type == OBJT_VNODE &&
 790                     (vp = (struct vnode *)object->handle) != NULL) {
 791                         VI_LOCK(vp);
 792                         if (vp->v_iflag & VI_OBJDIRTY)
 793                                 vp->v_iflag &= ~VI_OBJDIRTY;
 794                         VI_UNLOCK(vp);
 795                 }
 796         }
 797
 798 rescan:
 799         curgeneration = object->generation;
 800
 801         for (p = TAILQ_FIRST(&object->memq); p; p = np) {
 802                 int n;
 803
 804                 np = TAILQ_NEXT(p, listq);
 805
 806 again:
 807                 pi = p->pindex;
 808                 if (((p->flags & PG_CLEANCHK) == 0) ||
 809                         (pi < tstart) || (pi >= tend) ||
 810                         (p->valid == 0) ||
 811                         ((p->queue - p->pc) == PQ_CACHE)) {
 812                         vm_page_flag_clear(p, PG_CLEANCHK);
 813                         continue;
 814                 }
 815
 816                 vm_page_test_dirty(p);
 817                 if ((p->dirty & p->valid) == 0) {
 818                         vm_page_flag_clear(p, PG_CLEANCHK);
 819                         continue;
 820                 }
 821
 822                 /*
 823                  * If we have been asked to skip nosync pages and this is a
 824                  * nosync page, skip it.  Note that the object flags were
 825                  * not cleared in this case so we do not have to set them.
 826                  */
 827                 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
 828                         vm_page_flag_clear(p, PG_CLEANCHK);
 829                         continue;
 830                 }
 831
 832                 n = vm_object_page_collect_flush(object, p,
 833                         curgeneration, pagerflags);
 834                 if (n == 0)
 835                         goto rescan;
 836
 837                 if (object->generation != curgeneration)
 838                         goto rescan;
 839
 840                 /*
 841                  * Try to optimize the next page.  If we can't we pick up
 842                  * our (random) scan where we left off.
 843                  */
 844                 if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) {
 845                         if ((p = vm_page_lookup(object, pi + n)) != NULL)
 846                                 goto again;
 847                 }
 848         }
 849         vm_page_unlock_queues();
 850 #if 0
 851         VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
 852 #endif
 853
 854         vm_object_clear_flag(object, OBJ_CLEANING);
 855         return;
 856 }
 857
 858 static int
 859 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags)
 860 {
 861         int runlen;
 862         int maxf;
 863         int chkb;
 864         int maxb;
 865         int i;
 866         vm_pindex_t pi;
 867         vm_page_t maf[vm_pageout_page_count];
 868         vm_page_t mab[vm_pageout_page_count];
 869         vm_page_t ma[vm_pageout_page_count];
 870
 871         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 872         pi = p->pindex;
 873         while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) {
 874                 vm_page_lock_queues();
 875                 if (object->generation != curgeneration) {
 876                         return(0);
 877                 }
 878         }
 879         maxf = 0;
 880         for(i = 1; i < vm_pageout_page_count; i++) {
 881                 vm_page_t tp;
 882
 883                 if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
 884                         if ((tp->flags & PG_BUSY) ||
 885                                 ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
 886                                  (tp->flags & PG_CLEANCHK) == 0) ||
 887                                 (tp->busy != 0))
 888                                 break;
 889                         if((tp->queue - tp->pc) == PQ_CACHE) {
 890                                 vm_page_flag_clear(tp, PG_CLEANCHK);
 891                                 break;
 892                         }
 893                         vm_page_test_dirty(tp);
 894                         if ((tp->dirty & tp->valid) == 0) {
 895                                 vm_page_flag_clear(tp, PG_CLEANCHK);
 896                                 break;
 897                         }
 898                         maf[ i - 1 ] = tp;
 899                         maxf++;
 900                         continue;
 901                 }
 902                 break;
 903         }
 904
 905         maxb = 0;
 906         chkb = vm_pageout_page_count -  maxf;
 907         if (chkb) {
 908                 for(i = 1; i < chkb;i++) {
 909                         vm_page_t tp;
 910
 911                         if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
 912                                 if ((tp->flags & PG_BUSY) ||
 913                                         ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
 914                                          (tp->flags & PG_CLEANCHK) == 0) ||
 915                                         (tp->busy != 0))
 916                                         break;
 917                                 if ((tp->queue - tp->pc) == PQ_CACHE) {
 918                                         vm_page_flag_clear(tp, PG_CLEANCHK);
 919                                         break;
 920                                 }
 921                                 vm_page_test_dirty(tp);
 922                                 if ((tp->dirty & tp->valid) == 0) {
 923                                         vm_page_flag_clear(tp, PG_CLEANCHK);
 924                                         break;
 925                                 }
 926                                 mab[ i - 1 ] = tp;
 927                                 maxb++;
 928                                 continue;
 929                         }
 930                         break;
 931                 }
 932         }
 933
 934         for(i = 0; i < maxb; i++) {
 935                 int index = (maxb - i) - 1;
 936                 ma[index] = mab[i];
 937                 vm_page_flag_clear(ma[index], PG_CLEANCHK);
 938         }
 939         vm_page_flag_clear(p, PG_CLEANCHK);
 940         ma[maxb] = p;
 941         for(i = 0; i < maxf; i++) {
 942                 int index = (maxb + i) + 1;
 943                 ma[index] = maf[i];
 944                 vm_page_flag_clear(ma[index], PG_CLEANCHK);
 945         }
 946         runlen = maxb + maxf + 1;
 947
 948         vm_pageout_flush(ma, runlen, pagerflags);
 949         for (i = 0; i < runlen; i++) {
 950                 if (ma[i]->valid & ma[i]->dirty) {
 951                         pmap_page_protect(ma[i], VM_PROT_READ);
 952                         vm_page_flag_set(ma[i], PG_CLEANCHK);
 953
 954                         /*
 955                          * maxf will end up being the actual number of pages
 956                          * we wrote out contiguously, non-inclusive of the
 957                          * first page.  We do not count look-behind pages.
 958                          */
 959                         if (i >= maxb + 1 && (maxf > i - maxb - 1))
 960                                 maxf = i - maxb - 1;
 961                 }
 962         }
 963         return(maxf + 1);
 964 }
 965
 966 /*
 967  * Note that there is absolutely no sense in writing out
 968  * anonymous objects, so we track down the vnode object
 969  * to write out.
 970  * We invalidate (remove) all pages from the address space
 971  * for semantic correctness.
 972  *
 973  * Note: certain anonymous maps, such as MAP_NOSYNC maps,
 974  * may start out with a NULL object.
 975  */
 976 void
 977 vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
 978     boolean_t syncio, boolean_t invalidate)
 979 {
 980         vm_object_t backing_object;
 981         struct vnode *vp;
 982         int flags;
 983
 984         if (object == NULL)
 985                 return;
 986         VM_OBJECT_LOCK(object);
 987         while ((backing_object = object->backing_object) != NULL) {
 988                 VM_OBJECT_LOCK(backing_object);
 989                 offset += object->backing_object_offset;
 990                 VM_OBJECT_UNLOCK(object);
 991                 object = backing_object;
 992                 if (object->size < OFF_TO_IDX(offset + size))
 993                         size = IDX_TO_OFF(object->size) - offset;
 994         }
 995         /*
 996          * Flush pages if writing is allowed, invalidate them
 997          * if invalidation requested.  Pages undergoing I/O
 998          * will be ignored by vm_object_page_remove().
 999          *
1000          * We cannot lock the vnode and then wait for paging
1001          * to complete without deadlocking against vm_fault.
1002          * Instead we simply call vm_object_page_remove() and
1003          * allow it to block internally on a page-by-page
1004          * basis when it encounters pages undergoing async
1005          * I/O.
1006          */
1007         if (object->type == OBJT_VNODE &&
1008             (object->flags & OBJ_MIGHTBEDIRTY) != 0) {
1009                 int vfslocked;
1010                 vp = object->handle;
1011                 VM_OBJECT_UNLOCK(object);
1012                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1013                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
1014                 flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1015                 flags |= invalidate ? OBJPC_INVAL : 0;
1016                 VM_OBJECT_LOCK(object);
1017                 vm_object_page_clean(object,
1018                     OFF_TO_IDX(offset),
1019                     OFF_TO_IDX(offset + size + PAGE_MASK),
1020                     flags);
1021                 VM_OBJECT_UNLOCK(object);
1022                 VOP_UNLOCK(vp, 0, curthread);
1023                 VFS_UNLOCK_GIANT(vfslocked);
1024                 VM_OBJECT_LOCK(object);
1025         }
1026         if ((object->type == OBJT_VNODE ||
1027              object->type == OBJT_DEVICE) && invalidate) {
1028                 boolean_t purge;
1029                 purge = old_msync || (object->type == OBJT_DEVICE);
1030                 vm_object_page_remove(object,
1031                     OFF_TO_IDX(offset),
1032                     OFF_TO_IDX(offset + size + PAGE_MASK),
1033                     purge ? FALSE : TRUE);
1034         }
1035         VM_OBJECT_UNLOCK(object);
1036 }
1037
1038 /*
1039  *      vm_object_madvise:
1040  *
1041  *      Implements the madvise function at the object/page level.
1042  *
1043  *      MADV_WILLNEED   (any object)
1044  *
1045  *          Activate the specified pages if they are resident.
1046  *
1047  *      MADV_DONTNEED   (any object)
1048  *
1049  *          Deactivate the specified pages if they are resident.
1050  *
1051  *      MADV_FREE       (OBJT_DEFAULT/OBJT_SWAP objects,
1052  *                       OBJ_ONEMAPPING only)
1053  *
1054  *          Deactivate and clean the specified pages if they are
1055  *          resident.  This permits the process to reuse the pages
1056  *          without faulting or the kernel to reclaim the pages
1057  *          without I/O.
1058  */
1059 void
1060 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
1061 {
1062         vm_pindex_t end, tpindex;
1063         vm_object_t backing_object, tobject;
1064         vm_page_t m;
1065
1066         if (object == NULL)
1067                 return;
1068         VM_OBJECT_LOCK(object);
1069         end = pindex + count;
1070         /*
1071          * Locate and adjust resident pages
1072          */
1073         for (; pindex < end; pindex += 1) {
1074 relookup:
1075                 tobject = object;
1076                 tpindex = pindex;
1077 shadowlookup:
1078                 /*
1079                  * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
1080                  * and those pages must be OBJ_ONEMAPPING.
1081                  */
1082                 if (advise == MADV_FREE) {
1083                         if ((tobject->type != OBJT_DEFAULT &&
1084                              tobject->type != OBJT_SWAP) ||
1085                             (tobject->flags & OBJ_ONEMAPPING) == 0) {
1086                                 goto unlock_tobject;
1087                         }
1088                 }
1089                 m = vm_page_lookup(tobject, tpindex);
1090                 if (m == NULL) {
1091                         /*
1092                          * There may be swap even if there is no backing page
1093                          */
1094                         if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
1095                                 swap_pager_freespace(tobject, tpindex, 1);
1096                         /*
1097                          * next object
1098                          */
1099                         backing_object = tobject->backing_object;
1100                         if (backing_object == NULL)
1101                                 goto unlock_tobject;
1102                         VM_OBJECT_LOCK(backing_object);
1103                         tpindex += OFF_TO_IDX(tobject->backing_object_offset);
1104                         if (tobject != object)
1105                                 VM_OBJECT_UNLOCK(tobject);
1106                         tobject = backing_object;
1107                         goto shadowlookup;
1108                 }
1109                 /*
1110                  * If the page is busy or not in a normal active state,
1111                  * we skip it.  If the page is not managed there are no
1112                  * page queues to mess with.  Things can break if we mess
1113                  * with pages in any of the below states.
1114                  */
1115                 vm_page_lock_queues();
1116                 if (m->hold_count ||
1117                     m->wire_count ||
1118                     (m->flags & PG_UNMANAGED) ||
1119                     m->valid != VM_PAGE_BITS_ALL) {
1120                         vm_page_unlock_queues();
1121                         goto unlock_tobject;
1122                 }
1123                 if ((m->flags & PG_BUSY) || m->busy) {
1124                         vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
1125                         if (object != tobject)
1126                                 VM_OBJECT_UNLOCK(object);
1127                         VM_OBJECT_UNLOCK(tobject);
1128                         msleep(m, &vm_page_queue_mtx, PDROP | PVM, "madvpo", 0);
1129                         VM_OBJECT_LOCK(object);
1130                         goto relookup;
1131                 }
1132                 if (advise == MADV_WILLNEED) {
1133                         vm_page_activate(m);
1134                 } else if (advise == MADV_DONTNEED) {
1135                         vm_page_dontneed(m);
1136                 } else if (advise == MADV_FREE) {
1137                         /*
1138                          * Mark the page clean.  This will allow the page
1139                          * to be freed up by the system.  However, such pages
1140                          * are often reused quickly by malloc()/free()
1141                          * so we do not do anything that would cause
1142                          * a page fault if we can help it.
1143                          *
1144                          * Specifically, we do not try to actually free
1145                          * the page now nor do we try to put it in the
1146                          * cache (which would cause a page fault on reuse).
1147                          *
1148                          * But we do make the page is freeable as we
1149                          * can without actually taking the step of unmapping
1150                          * it.
1151                          */
1152                         pmap_clear_modify(m);
1153                         m->dirty = 0;
1154                         m->act_count = 0;
1155                         vm_page_dontneed(m);
1156                 }
1157                 vm_page_unlock_queues();
1158                 if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
1159                         swap_pager_freespace(tobject, tpindex, 1);
1160 unlock_tobject:
1161                 if (tobject != object)
1162                         VM_OBJECT_UNLOCK(tobject);
1163         }
1164         VM_OBJECT_UNLOCK(object);
1165 }
1166
1167 /*
1168  *      vm_object_shadow:
1169  *
1170  *      Create a new object which is backed by the
1171  *      specified existing object range.  The source
1172  *      object reference is deallocated.
1173  *
1174  *      The new object and offset into that object
1175  *      are returned in the source parameters.
1176  */
1177 void
1178 vm_object_shadow(
1179         vm_object_t *object,    /* IN/OUT */
1180         vm_ooffset_t *offset,   /* IN/OUT */
1181         vm_size_t length)
1182 {
1183         vm_object_t source;
1184         vm_object_t result;
1185
1186         source = *object;
1187
1188         /*
1189          * Don't create the new object if the old object isn't shared.
1190          */
1191         if (source != NULL) {
1192                 VM_OBJECT_LOCK(source);
1193                 if (source->ref_count == 1 &&
1194                     source->handle == NULL &&
1195                     (source->type == OBJT_DEFAULT ||
1196                      source->type == OBJT_SWAP)) {
1197                         VM_OBJECT_UNLOCK(source);
1198                         return;
1199                 }
1200                 VM_OBJECT_UNLOCK(source);
1201         }
1202
1203         /*
1204          * Allocate a new object with the given length.
1205          */
1206         result = vm_object_allocate(OBJT_DEFAULT, length);
1207
1208         /*
1209          * The new object shadows the source object, adding a reference to it.
1210          * Our caller changes his reference to point to the new object,
1211          * removing a reference to the source object.  Net result: no change
1212          * of reference count.
1213          *
1214          * Try to optimize the result object's page color when shadowing
1215          * in order to maintain page coloring consistency in the combined
1216          * shadowed object.
1217          */
1218         result->backing_object = source;
1219         /*
1220          * Store the offset into the source object, and fix up the offset into
1221          * the new object.
1222          */
1223         result->backing_object_offset = *offset;
1224         if (source != NULL) {
1225                 VM_OBJECT_LOCK(source);
1226                 LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
1227                 source->shadow_count++;
1228                 source->generation++;
1229                 if (length < source->size)
1230                         length = source->size;
1231                 if (length > PQ_L2_SIZE / 3 + PQ_PRIME1 ||
1232                     source->generation > 1)
1233                         length = PQ_L2_SIZE / 3 + PQ_PRIME1;
1234                 result->pg_color = (source->pg_color +
1235                     length * source->generation) & PQ_L2_MASK;
1236                 result->flags |= source->flags & OBJ_NEEDGIANT;
1237                 VM_OBJECT_UNLOCK(source);
1238                 next_index = (result->pg_color + PQ_L2_SIZE / 3 + PQ_PRIME1) &
1239                     PQ_L2_MASK;
1240         }
1241
1242
1243         /*
1244          * Return the new things
1245          */
1246         *offset = 0;
1247         *object = result;
1248 }
1249
1250 /*
1251  *      vm_object_split:
1252  *
1253  * Split the pages in a map entry into a new object.  This affords
1254  * easier removal of unused pages, and keeps object inheritance from
1255  * being a negative impact on memory usage.
1256  */
1257 void
1258 vm_object_split(vm_map_entry_t entry)
1259 {
1260         vm_page_t m;
1261         vm_object_t orig_object, new_object, source;
1262         vm_pindex_t offidxstart, offidxend;
1263         vm_size_t idx, size;
1264
1265         orig_object = entry->object.vm_object;
1266         if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
1267                 return;
1268         if (orig_object->ref_count <= 1)
1269                 return;
1270         VM_OBJECT_UNLOCK(orig_object);
1271
1272         offidxstart = OFF_TO_IDX(entry->offset);
1273         offidxend = offidxstart + OFF_TO_IDX(entry->end - entry->start);
1274         size = offidxend - offidxstart;
1275
1276         /*
1277          * If swap_pager_copy() is later called, it will convert new_object
1278          * into a swap object.
1279          */
1280         new_object = vm_object_allocate(OBJT_DEFAULT, size);
1281
1282         VM_OBJECT_LOCK(new_object);
1283         VM_OBJECT_LOCK(orig_object);
1284         source = orig_object->backing_object;
1285         if (source != NULL) {
1286                 VM_OBJECT_LOCK(source);
1287                 LIST_INSERT_HEAD(&source->shadow_head,
1288                                   new_object, shadow_list);
1289                 source->shadow_count++;
1290                 source->generation++;
1291                 vm_object_reference_locked(source);     /* for new_object */
1292                 vm_object_clear_flag(source, OBJ_ONEMAPPING);
1293                 VM_OBJECT_UNLOCK(source);
1294                 new_object->backing_object_offset =
1295                         orig_object->backing_object_offset + entry->offset;
1296                 new_object->backing_object = source;
1297         }
1298         new_object->flags |= orig_object->flags & OBJ_NEEDGIANT;
1299         vm_page_lock_queues();
1300         for (idx = 0; idx < size; idx++) {
1301         retry:
1302                 m = vm_page_lookup(orig_object, offidxstart + idx);
1303                 if (m == NULL)
1304                         continue;
1305
1306                 /*
1307                  * We must wait for pending I/O to complete before we can
1308                  * rename the page.
1309                  *
1310                  * We do not have to VM_PROT_NONE the page as mappings should
1311                  * not be changed by this operation.
1312                  */
1313                 if ((m->flags & PG_BUSY) || m->busy) {
1314                         vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
1315                         VM_OBJECT_UNLOCK(orig_object);
1316                         VM_OBJECT_UNLOCK(new_object);
1317                         msleep(m, &vm_page_queue_mtx, PDROP | PVM, "spltwt", 0);
1318                         VM_OBJECT_LOCK(new_object);
1319                         VM_OBJECT_LOCK(orig_object);
1320                         vm_page_lock_queues();
1321                         goto retry;
1322                 }
1323                 vm_page_rename(m, new_object, idx);
1324                 /* page automatically made dirty by rename and cache handled */
1325                 vm_page_busy(m);
1326         }
1327         vm_page_unlock_queues();
1328         if (orig_object->type == OBJT_SWAP) {
1329                 /*
1330                  * swap_pager_copy() can sleep, in which case the orig_object's
1331                  * and new_object's locks are released and reacquired.
1332                  */
1333                 swap_pager_copy(orig_object, new_object, offidxstart, 0);
1334         }
1335         VM_OBJECT_UNLOCK(orig_object);
1336         vm_page_lock_queues();
1337         TAILQ_FOREACH(m, &new_object->memq, listq)
1338                 vm_page_wakeup(m);
1339         vm_page_unlock_queues();
1340         VM_OBJECT_UNLOCK(new_object);
1341         entry->object.vm_object = new_object;
1342         entry->offset = 0LL;
1343         vm_object_deallocate(orig_object);
1344         VM_OBJECT_LOCK(new_object);
1345 }
1346
1347 #define OBSC_TEST_ALL_SHADOWED  0x0001
1348 #define OBSC_COLLAPSE_NOWAIT    0x0002
1349 #define OBSC_COLLAPSE_WAIT      0x0004
1350
1351 static int
1352 vm_object_backing_scan(vm_object_t object, int op)
1353 {
1354         int r = 1;
1355         vm_page_t p;
1356         vm_object_t backing_object;
1357         vm_pindex_t backing_offset_index;
1358
1359         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1360         VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED);
1361
1362         backing_object = object->backing_object;
1363         backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
1364
1365         /*
1366          * Initial conditions
1367          */
1368         if (op & OBSC_TEST_ALL_SHADOWED) {
1369                 /*
1370                  * We do not want to have to test for the existence of
1371                  * swap pages in the backing object.  XXX but with the
1372                  * new swapper this would be pretty easy to do.
1373                  *
1374                  * XXX what about anonymous MAP_SHARED memory that hasn't
1375                  * been ZFOD faulted yet?  If we do not test for this, the
1376                  * shadow test may succeed! XXX
1377                  */
1378                 if (backing_object->type != OBJT_DEFAULT) {
1379                         return (0);
1380                 }
1381         }
1382         if (op & OBSC_COLLAPSE_WAIT) {
1383                 vm_object_set_flag(backing_object, OBJ_DEAD);
1384         }
1385
1386         /*
1387          * Our scan
1388          */
1389         p = TAILQ_FIRST(&backing_object->memq);
1390         while (p) {
1391                 vm_page_t next = TAILQ_NEXT(p, listq);
1392                 vm_pindex_t new_pindex = p->pindex - backing_offset_index;
1393
1394                 if (op & OBSC_TEST_ALL_SHADOWED) {
1395                         vm_page_t pp;
1396
1397                         /*
1398                          * Ignore pages outside the parent object's range
1399                          * and outside the parent object's mapping of the
1400                          * backing object.
1401                          *
1402                          * note that we do not busy the backing object's
1403                          * page.
1404                          */
1405                         if (
1406                             p->pindex < backing_offset_index ||
1407                             new_pindex >= object->size
1408                         ) {
1409                                 p = next;
1410                                 continue;
1411                         }
1412
1413                         /*
1414                          * See if the parent has the page or if the parent's
1415                          * object pager has the page.  If the parent has the
1416                          * page but the page is not valid, the parent's
1417                          * object pager must have the page.
1418                          *
1419                          * If this fails, the parent does not completely shadow
1420                          * the object and we might as well give up now.
1421                          */
1422
1423                         pp = vm_page_lookup(object, new_pindex);
1424                         if (
1425                             (pp == NULL || pp->valid == 0) &&
1426                             !vm_pager_has_page(object, new_pindex, NULL, NULL)
1427                         ) {
1428                                 r = 0;
1429                                 break;
1430                         }
1431                 }
1432
1433                 /*
1434                  * Check for busy page
1435                  */
1436                 if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
1437                         vm_page_t pp;
1438
1439                         if (op & OBSC_COLLAPSE_NOWAIT) {
1440                                 if ((p->flags & PG_BUSY) ||
1441                                     !p->valid ||
1442                                     p->busy) {
1443                                         p = next;
1444                                         continue;
1445                                 }
1446                         } else if (op & OBSC_COLLAPSE_WAIT) {
1447                                 if ((p->flags & PG_BUSY) || p->busy) {
1448                                         vm_page_lock_queues();
1449                                         vm_page_flag_set(p,
1450                                             PG_WANTED | PG_REFERENCED);
1451                                         VM_OBJECT_UNLOCK(backing_object);
1452                                         VM_OBJECT_UNLOCK(object);
1453                                         msleep(p, &vm_page_queue_mtx,
1454                                             PDROP | PVM, "vmocol", 0);
1455                                         VM_OBJECT_LOCK(object);
1456                                         VM_OBJECT_LOCK(backing_object);
1457                                         /*
1458                                          * If we slept, anything could have
1459                                          * happened.  Since the object is
1460                                          * marked dead, the backing offset
1461                                          * should not have changed so we
1462                                          * just restart our scan.
1463                                          */
1464                                         p = TAILQ_FIRST(&backing_object->memq);
1465                                         continue;
1466                                 }
1467                         }
1468
1469                         KASSERT(
1470                             p->object == backing_object,
1471                             ("vm_object_backing_scan: object mismatch")
1472                         );
1473
1474                         /*
1475                          * Destroy any associated swap
1476                          */
1477                         if (backing_object->type == OBJT_SWAP) {
1478                                 swap_pager_freespace(
1479                                     backing_object,
1480                                     p->pindex,
1481                                     1
1482                                 );
1483                         }
1484
1485                         if (
1486                             p->pindex < backing_offset_index ||
1487                             new_pindex >= object->size
1488                         ) {
1489                                 /*
1490                                  * Page is out of the parent object's range, we
1491                                  * can simply destroy it.
1492                                  */
1493                                 vm_page_lock_queues();
1494                                 KASSERT(!pmap_page_is_mapped(p),
1495                                     ("freeing mapped page %p", p));
1496                                 if (p->wire_count == 0)
1497                                         vm_page_free(p);
1498                                 else
1499                                         vm_page_remove(p);
1500                                 vm_page_unlock_queues();
1501                                 p = next;
1502                                 continue;
1503                         }
1504
1505                         pp = vm_page_lookup(object, new_pindex);
1506                         if (
1507                             pp != NULL ||
1508                             vm_pager_has_page(object, new_pindex, NULL, NULL)
1509                         ) {
1510                                 /*
1511                                  * page already exists in parent OR swap exists
1512                                  * for this location in the parent.  Destroy
1513                                  * the original page from the backing object.
1514                                  *
1515                                  * Leave the parent's page alone
1516                                  */
1517                                 vm_page_lock_queues();
1518                                 KASSERT(!pmap_page_is_mapped(p),
1519                                     ("freeing mapped page %p", p));
1520                                 if (p->wire_count == 0)
1521                                         vm_page_free(p);
1522                                 else
1523                                         vm_page_remove(p);
1524                                 vm_page_unlock_queues();
1525                                 p = next;
1526                                 continue;
1527                         }
1528
1529                         /*
1530                          * Page does not exist in parent, rename the
1531                          * page from the backing object to the main object.
1532                          *
1533                          * If the page was mapped to a process, it can remain
1534                          * mapped through the rename.
1535                          */
1536                         vm_page_lock_queues();
1537                         vm_page_rename(p, object, new_pindex);
1538                         vm_page_unlock_queues();
1539                         /* page automatically made dirty by rename */
1540                 }
1541                 p = next;
1542         }
1543         return (r);
1544 }
1545
1546
1547 /*
1548  * this version of collapse allows the operation to occur earlier and
1549  * when paging_in_progress is true for an object...  This is not a complete
1550  * operation, but should plug 99.9% of the rest of the leaks.
1551  */
1552 static void
1553 vm_object_qcollapse(vm_object_t object)
1554 {
1555         vm_object_t backing_object = object->backing_object;
1556
1557         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1558         VM_OBJECT_LOCK_ASSERT(backing_object, MA_OWNED);
1559
1560         if (backing_object->ref_count != 1)
1561                 return;
1562
1563         vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
1564 }
1565
1566 /*
1567  *      vm_object_collapse:
1568  *
1569  *      Collapse an object with the object backing it.
1570  *      Pages in the backing object are moved into the
1571  *      parent, and the backing object is deallocated.
1572  */
1573 void
1574 vm_object_collapse(vm_object_t object)
1575 {
1576         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1577
1578         while (TRUE) {
1579                 vm_object_t backing_object;
1580
1581                 /*
1582                  * Verify that the conditions are right for collapse:
1583                  *
1584                  * The object exists and the backing object exists.
1585                  */
1586                 if ((backing_object = object->backing_object) == NULL)
1587                         break;
1588
1589                 /*
1590                  * we check the backing object first, because it is most likely
1591                  * not collapsable.
1592                  */
1593                 VM_OBJECT_LOCK(backing_object);
1594                 if (backing_object->handle != NULL ||
1595                     (backing_object->type != OBJT_DEFAULT &&
1596                      backing_object->type != OBJT_SWAP) ||
1597                     (backing_object->flags & OBJ_DEAD) ||
1598                     object->handle != NULL ||
1599                     (object->type != OBJT_DEFAULT &&
1600                      object->type != OBJT_SWAP) ||
1601                     (object->flags & OBJ_DEAD)) {
1602                         VM_OBJECT_UNLOCK(backing_object);
1603                         break;
1604                 }
1605
1606                 if (
1607                     object->paging_in_progress != 0 ||
1608                     backing_object->paging_in_progress != 0
1609                 ) {
1610                         vm_object_qcollapse(object);
1611                         VM_OBJECT_UNLOCK(backing_object);
1612                         break;
1613                 }
1614                 /*
1615                  * We know that we can either collapse the backing object (if
1616                  * the parent is the only reference to it) or (perhaps) have
1617                  * the parent bypass the object if the parent happens to shadow
1618                  * all the resident pages in the entire backing object.
1619                  *
1620                  * This is ignoring pager-backed pages such as swap pages.
1621                  * vm_object_backing_scan fails the shadowing test in this
1622                  * case.
1623                  */
1624                 if (backing_object->ref_count == 1) {
1625                         /*
1626                          * If there is exactly one reference to the backing
1627                          * object, we can collapse it into the parent.
1628                          */
1629                         vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
1630
1631                         /*
1632                          * Move the pager from backing_object to object.
1633                          */
1634                         if (backing_object->type == OBJT_SWAP) {
1635                                 /*
1636                                  * swap_pager_copy() can sleep, in which case
1637                                  * the backing_object's and object's locks are
1638                                  * released and reacquired.
1639                                  */
1640                                 swap_pager_copy(
1641                                     backing_object,
1642                                     object,
1643                                     OFF_TO_IDX(object->backing_object_offset), TRUE);
1644                         }
1645                         /*
1646                          * Object now shadows whatever backing_object did.
1647                          * Note that the reference to
1648                          * backing_object->backing_object moves from within
1649                          * backing_object to within object.
1650                          */
1651                         LIST_REMOVE(object, shadow_list);
1652                         backing_object->shadow_count--;
1653                         backing_object->generation++;
1654                         if (backing_object->backing_object) {
1655                                 VM_OBJECT_LOCK(backing_object->backing_object);
1656                                 LIST_REMOVE(backing_object, shadow_list);
1657                                 LIST_INSERT_HEAD(
1658                                     &backing_object->backing_object->shadow_head,
1659                                     object, shadow_list);
1660                                 /*
1661                                  * The shadow_count has not changed.
1662                                  */
1663                                 backing_object->backing_object->generation++;
1664                                 VM_OBJECT_UNLOCK(backing_object->backing_object);
1665                         }
1666                         object->backing_object = backing_object->backing_object;
1667                         object->backing_object_offset +=
1668                             backing_object->backing_object_offset;
1669
1670                         /*
1671                          * Discard backing_object.
1672                          *
1673                          * Since the backing object has no pages, no pager left,
1674                          * and no object references within it, all that is
1675                          * necessary is to dispose of it.
1676                          */
1677                         KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
1678                         VM_OBJECT_UNLOCK(backing_object);
1679
1680                         mtx_lock(&vm_object_list_mtx);
1681                         TAILQ_REMOVE(
1682                             &vm_object_list,
1683                             backing_object,
1684                             object_list
1685                         );
1686                         mtx_unlock(&vm_object_list_mtx);
1687
1688                         uma_zfree(obj_zone, backing_object);
1689
1690                         object_collapses++;
1691                 } else {
1692                         vm_object_t new_backing_object;
1693
1694                         /*
1695                          * If we do not entirely shadow the backing object,
1696                          * there is nothing we can do so we give up.
1697                          */
1698                         if (vm_object_backing_scan(object, OBSC_TEST_ALL_SHADOWED) == 0) {
1699                                 VM_OBJECT_UNLOCK(backing_object);
1700                                 break;
1701                         }
1702
1703                         /*
1704                          * Make the parent shadow the next object in the
1705                          * chain.  Deallocating backing_object will not remove
1706                          * it, since its reference count is at least 2.
1707                          */
1708                         LIST_REMOVE(object, shadow_list);
1709                         backing_object->shadow_count--;
1710                         backing_object->generation++;
1711
1712                         new_backing_object = backing_object->backing_object;
1713                         if ((object->backing_object = new_backing_object) != NULL) {
1714                                 VM_OBJECT_LOCK(new_backing_object);
1715                                 LIST_INSERT_HEAD(
1716                                     &new_backing_object->shadow_head,
1717                                     object,
1718                                     shadow_list
1719                                 );
1720                                 new_backing_object->shadow_count++;
1721                                 new_backing_object->generation++;
1722                                 vm_object_reference_locked(new_backing_object);
1723                                 VM_OBJECT_UNLOCK(new_backing_object);
1724                                 object->backing_object_offset +=
1725                                         backing_object->backing_object_offset;
1726                         }
1727
1728                         /*
1729                          * Drop the reference count on backing_object. Since
1730                          * its ref_count was at least 2, it will not vanish.
1731                          */
1732                         backing_object->ref_count--;
1733                         VM_OBJECT_UNLOCK(backing_object);
1734                         object_bypasses++;
1735                 }
1736
1737                 /*
1738                  * Try again with this object's new backing object.
1739                  */
1740         }
1741 }
1742
1743 /*
1744  *      vm_object_page_remove:
1745  *
1746  *      Removes all physical pages in the given range from the
1747  *      object's list of pages.  If the range's end is zero, all
1748  *      physical pages from the range's start to the end of the object
1749  *      are deleted.
1750  *
1751  *      The object must be locked.
1752  */
1753 void
1754 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
1755     boolean_t clean_only)
1756 {
1757         vm_page_t p, next;
1758
1759         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1760         if (object->resident_page_count == 0)
1761                 return;
1762
1763         /*
1764          * Since physically-backed objects do not use managed pages, we can't
1765          * remove pages from the object (we must instead remove the page
1766          * references, and then destroy the object).
1767          */
1768         KASSERT(object->type != OBJT_PHYS,
1769             ("attempt to remove pages from a physical object"));
1770
1771         vm_object_pip_add(object, 1);
1772 again:
1773         vm_page_lock_queues();
1774         if ((p = TAILQ_FIRST(&object->memq)) != NULL) {
1775                 if (p->pindex < start) {
1776                         p = vm_page_splay(start, object->root);
1777                         if ((object->root = p)->pindex < start)
1778                                 p = TAILQ_NEXT(p, listq);
1779                 }
1780         }
1781         /*
1782          * Assert: the variable p is either (1) the page with the
1783          * least pindex greater than or equal to the parameter pindex
1784          * or (2) NULL.
1785          */
1786         for (;
1787              p != NULL && (p->pindex < end || end == 0);
1788              p = next) {
1789                 next = TAILQ_NEXT(p, listq);
1790
1791                 if (p->wire_count != 0) {
1792                         pmap_remove_all(p);
1793                         if (!clean_only)
1794                                 p->valid = 0;
1795                         continue;
1796                 }
1797                 if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
1798                         goto again;
1799                 if (clean_only && p->valid) {
1800                         pmap_page_protect(p, VM_PROT_READ | VM_PROT_EXECUTE);
1801                         if (p->valid & p->dirty)
1802                                 continue;
1803                 }
1804                 pmap_remove_all(p);
1805                 vm_page_free(p);
1806         }
1807         vm_page_unlock_queues();
1808         vm_object_pip_wakeup(object);
1809 }
1810
1811 /*
1812  *      Routine:        vm_object_coalesce
1813  *      Function:       Coalesces two objects backing up adjoining
1814  *                      regions of memory into a single object.
1815  *
1816  *      returns TRUE if objects were combined.
1817  *
1818  *      NOTE:   Only works at the moment if the second object is NULL -
1819  *              if it's not, which object do we lock first?
1820  *
1821  *      Parameters:
1822  *              prev_object     First object to coalesce
1823  *              prev_offset     Offset into prev_object
1824  *              prev_size       Size of reference to prev_object
1825  *              next_size       Size of reference to the second object
1826  *
1827  *      Conditions:
1828  *      The object must *not* be locked.
1829  */
1830 boolean_t
1831 vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
1832         vm_size_t prev_size, vm_size_t next_size)
1833 {
1834         vm_pindex_t next_pindex;
1835
1836         if (prev_object == NULL)
1837                 return (TRUE);
1838         VM_OBJECT_LOCK(prev_object);
1839         if (prev_object->type != OBJT_DEFAULT &&
1840             prev_object->type != OBJT_SWAP) {
1841                 VM_OBJECT_UNLOCK(prev_object);
1842                 return (FALSE);
1843         }
1844
1845         /*
1846          * Try to collapse the object first
1847          */
1848         vm_object_collapse(prev_object);
1849
1850         /*
1851          * Can't coalesce if: . more than one reference . paged out . shadows
1852          * another object . has a copy elsewhere (any of which mean that the
1853          * pages not mapped to prev_entry may be in use anyway)
1854          */
1855         if (prev_object->backing_object != NULL) {
1856                 VM_OBJECT_UNLOCK(prev_object);
1857                 return (FALSE);
1858         }
1859
1860         prev_size >>= PAGE_SHIFT;
1861         next_size >>= PAGE_SHIFT;
1862         next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
1863
1864         if ((prev_object->ref_count > 1) &&
1865             (prev_object->size != next_pindex)) {
1866                 VM_OBJECT_UNLOCK(prev_object);
1867                 return (FALSE);
1868         }
1869
1870         /*
1871          * Remove any pages that may still be in the object from a previous
1872          * deallocation.
1873          */
1874         if (next_pindex < prev_object->size) {
1875                 vm_object_page_remove(prev_object,
1876                                       next_pindex,
1877                                       next_pindex + next_size, FALSE);
1878                 if (prev_object->type == OBJT_SWAP)
1879                         swap_pager_freespace(prev_object,
1880                                              next_pindex, next_size);
1881         }
1882
1883         /*
1884          * Extend the object if necessary.
1885          */
1886         if (next_pindex + next_size > prev_object->size)
1887                 prev_object->size = next_pindex + next_size;
1888
1889         VM_OBJECT_UNLOCK(prev_object);
1890         return (TRUE);
1891 }
1892
1893 void
1894 vm_object_set_writeable_dirty(vm_object_t object)
1895 {
1896         struct vnode *vp;
1897
1898         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1899         if ((object->flags & (OBJ_MIGHTBEDIRTY|OBJ_WRITEABLE)) ==
1900             (OBJ_MIGHTBEDIRTY|OBJ_WRITEABLE))
1901                 return;
1902         vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
1903         if (object->type == OBJT_VNODE &&
1904             (vp = (struct vnode *)object->handle) != NULL) {
1905                 VI_LOCK(vp);
1906                 vp->v_iflag |= VI_OBJDIRTY;
1907                 VI_UNLOCK(vp);
1908         }
1909 }
1910
1911 #include "opt_ddb.h"
1912 #ifdef DDB
1913 #include <sys/kernel.h>
1914
1915 #include <sys/cons.h>
1916
1917 #include <ddb/ddb.h>
1918
1919 static int
1920 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
1921 {
1922         vm_map_t tmpm;
1923         vm_map_entry_t tmpe;
1924         vm_object_t obj;
1925         int entcount;
1926
1927         if (map == 0)
1928                 return 0;
1929
1930         if (entry == 0) {
1931                 tmpe = map->header.next;
1932                 entcount = map->nentries;
1933                 while (entcount-- && (tmpe != &map->header)) {
1934                         if (_vm_object_in_map(map, object, tmpe)) {
1935                                 return 1;
1936                         }
1937                         tmpe = tmpe->next;
1938                 }
1939         } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
1940                 tmpm = entry->object.sub_map;
1941                 tmpe = tmpm->header.next;
1942                 entcount = tmpm->nentries;
1943                 while (entcount-- && tmpe != &tmpm->header) {
1944                         if (_vm_object_in_map(tmpm, object, tmpe)) {
1945                                 return 1;
1946                         }
1947                         tmpe = tmpe->next;
1948                 }
1949         } else if ((obj = entry->object.vm_object) != NULL) {
1950                 for (; obj; obj = obj->backing_object)
1951                         if (obj == object) {
1952                                 return 1;
1953                         }
1954         }
1955         return 0;
1956 }
1957
1958 static int
1959 vm_object_in_map(vm_object_t object)
1960 {
1961         struct proc *p;
1962
1963         /* sx_slock(&allproc_lock); */
1964         LIST_FOREACH(p, &allproc, p_list) {
1965                 if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
1966                         continue;
1967                 if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
1968                         /* sx_sunlock(&allproc_lock); */
1969                         return 1;
1970                 }
1971         }
1972         /* sx_sunlock(&allproc_lock); */
1973         if (_vm_object_in_map(kernel_map, object, 0))
1974                 return 1;
1975         if (_vm_object_in_map(kmem_map, object, 0))
1976                 return 1;
1977         if (_vm_object_in_map(pager_map, object, 0))
1978                 return 1;
1979         if (_vm_object_in_map(buffer_map, object, 0))
1980                 return 1;
1981         return 0;
1982 }
1983
1984 DB_SHOW_COMMAND(vmochk, vm_object_check)
1985 {
1986         vm_object_t object;
1987
1988         /*
1989          * make sure that internal objs are in a map somewhere
1990          * and none have zero ref counts.
1991          */
1992         TAILQ_FOREACH(object, &vm_object_list, object_list) {
1993                 if (object->handle == NULL &&
1994                     (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
1995                         if (object->ref_count == 0) {
1996                                 db_printf("vmochk: internal obj has zero ref count: %ld\n",
1997                                         (long)object->size);
1998                         }
1999                         if (!vm_object_in_map(object)) {
2000                                 db_printf(
2001                         "vmochk: internal obj is not in a map: "
2002                         "ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
2003                                     object->ref_count, (u_long)object->size,
2004                                     (u_long)object->size,
2005                                     (void *)object->backing_object);
2006                         }
2007                 }
2008         }
2009 }
2010
2011 /*
2012  *      vm_object_print:        [ debug ]
2013  */
2014 DB_SHOW_COMMAND(object, vm_object_print_static)
2015 {
2016         /* XXX convert args. */
2017         vm_object_t object = (vm_object_t)addr;
2018         boolean_t full = have_addr;
2019
2020         vm_page_t p;
2021
2022         /* XXX count is an (unused) arg.  Avoid shadowing it. */
2023 #define count   was_count
2024
2025         int count;
2026
2027         if (object == NULL)
2028                 return;
2029
2030         db_iprintf(
2031             "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x\n",
2032             object, (int)object->type, (uintmax_t)object->size,
2033             object->resident_page_count, object->ref_count, object->flags);
2034         db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
2035             object->shadow_count,
2036             object->backing_object ? object->backing_object->ref_count : 0,
2037             object->backing_object, (uintmax_t)object->backing_object_offset);
2038
2039         if (!full)
2040                 return;
2041
2042         db_indent += 2;
2043         count = 0;
2044         TAILQ_FOREACH(p, &object->memq, listq) {
2045                 if (count == 0)
2046                         db_iprintf("memory:=");
2047                 else if (count == 6) {
2048                         db_printf("\n");
2049                         db_iprintf(" ...");
2050                         count = 0;
2051                 } else
2052                         db_printf(",");
2053                 count++;
2054
2055                 db_printf("(off=0x%jx,page=0x%jx)",
2056                     (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
2057         }
2058         if (count != 0)
2059                 db_printf("\n");
2060         db_indent -= 2;
2061 }
2062
2063 /* XXX. */
2064 #undef count
2065
2066 /* XXX need this non-static entry for calling from vm_map_print. */
2067 void
2068 vm_object_print(
2069         /* db_expr_t */ long addr,
2070         boolean_t have_addr,
2071         /* db_expr_t */ long count,
2072         char *modif)
2073 {
2074         vm_object_print_static(addr, have_addr, count, modif);
2075 }
2076
2077 DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
2078 {
2079         vm_object_t object;
2080         int nl = 0;
2081         int c;
2082
2083         TAILQ_FOREACH(object, &vm_object_list, object_list) {
2084                 vm_pindex_t idx, fidx;
2085                 vm_pindex_t osize;
2086                 vm_paddr_t pa = -1, padiff;
2087                 int rcount;
2088                 vm_page_t m;
2089
2090                 db_printf("new object: %p\n", (void *)object);
2091                 if (nl > 18) {
2092                         c = cngetc();
2093                         if (c != ' ')
2094                                 return;
2095                         nl = 0;
2096                 }
2097                 nl++;
2098                 rcount = 0;
2099                 fidx = 0;
2100                 osize = object->size;
2101                 if (osize > 128)
2102                         osize = 128;
2103                 for (idx = 0; idx < osize; idx++) {
2104                         m = vm_page_lookup(object, idx);
2105                         if (m == NULL) {
2106                                 if (rcount) {
2107                                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2108                                                 (long)fidx, rcount, (long)pa);
2109                                         if (nl > 18) {
2110                                                 c = cngetc();
2111                                                 if (c != ' ')
2112                                                         return;
2113                                                 nl = 0;
2114                                         }
2115                                         nl++;
2116                                         rcount = 0;
2117                                 }
2118                                 continue;
2119                         }
2120
2121
2122                         if (rcount &&
2123                                 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
2124                                 ++rcount;
2125                                 continue;
2126                         }
2127                         if (rcount) {
2128                                 padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m);
2129                                 padiff >>= PAGE_SHIFT;
2130                                 padiff &= PQ_L2_MASK;
2131                                 if (padiff == 0) {
2132                                         pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE;
2133                                         ++rcount;
2134                                         continue;
2135                                 }
2136                                 db_printf(" index(%ld)run(%d)pa(0x%lx)",
2137                                         (long)fidx, rcount, (long)pa);
2138                                 db_printf("pd(%ld)\n", (long)padiff);
2139                                 if (nl > 18) {
2140                                         c = cngetc();
2141                                         if (c != ' ')
2142                                                 return;
2143                                         nl = 0;
2144                                 }
2145                                 nl++;
2146                         }
2147                         fidx = idx;
2148                         pa = VM_PAGE_TO_PHYS(m);
2149                         rcount = 1;
2150                 }
2151                 if (rcount) {
2152                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2153                                 (long)fidx, rcount, (long)pa);
2154                         if (nl > 18) {
2155                                 c = cngetc();
2156                                 if (c != ' ')
2157                                         return;
2158                                 nl = 0;
2159                         }
2160                         nl++;
2161                 }
2162         }
2163 }
2164 #endif /* DDB */