sys/vm/vnode_pager.c

   1 /*
   2  * Copyright (c) 1990 University of Utah.
   3  * Copyright (c) 1991 The Regents of the University of California.
   4  * All rights reserved.
   5  * Copyright (c) 1993, 1994 John S. Dyson
   6  * Copyright (c) 1995, David Greenman
   7  *
   8  * This code is derived from software contributed to Berkeley by
   9  * the Systems Programming Group of the University of Utah Computer
  10  * Science Department.
  11  *
  12  * Redistribution and use in source and binary forms, with or without
  13  * modification, are permitted provided that the following conditions
  14  * are met:
  15  * 1. Redistributions of source code must retain the above copyright
  16  *    notice, this list of conditions and the following disclaimer.
  17  * 2. Redistributions in binary form must reproduce the above copyright
  18  *    notice, this list of conditions and the following disclaimer in the
  19  *    documentation and/or other materials provided with the distribution.
  20  * 3. All advertising materials mentioning features or use of this software
  21  *    must display the following acknowledgement:
  22  *      This product includes software developed by the University of
  23  *      California, Berkeley and its contributors.
  24  * 4. Neither the name of the University nor the names of its contributors
  25  *    may be used to endorse or promote products derived from this software
  26  *    without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  38  * SUCH DAMAGE.
  39  *
  40  *      from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91
  41  * $FreeBSD$
  42  */
  43
  44 /*
  45  * Page to/from files (vnodes).
  46  */
  47
  48 /*
  49  * TODO:
  50  *      Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
  51  *      greatly re-simplify the vnode_pager.
  52  */
  53
  54 #include <sys/param.h>
  55 #include <sys/systm.h>
  56 #include <sys/proc.h>
  57 #include <sys/vnode.h>
  58 #include <sys/mount.h>
  59 #include <sys/bio.h>
  60 #include <sys/buf.h>
  61 #include <sys/vmmeter.h>
  62 #include <sys/conf.h>
  63
  64 #include <vm/vm.h>
  65 #include <vm/vm_object.h>
  66 #include <vm/vm_page.h>
  67 #include <vm/vm_pager.h>
  68 #include <vm/vm_map.h>
  69 #include <vm/vnode_pager.h>
  70 #include <vm/vm_extern.h>
  71
  72 static void vnode_pager_init __P((void));
  73 static vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_ooffset_t address,
  74                                          int *run));
  75 static void vnode_pager_iodone __P((struct buf *bp));
  76 static int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m));
  77 static int vnode_pager_input_old __P((vm_object_t object, vm_page_t m));
  78 static void vnode_pager_dealloc __P((vm_object_t));
  79 static int vnode_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
  80 static void vnode_pager_putpages __P((vm_object_t, vm_page_t *, int, boolean_t, int *));
  81 static boolean_t vnode_pager_haspage __P((vm_object_t, vm_pindex_t, int *, int *));
  82
  83 struct pagerops vnodepagerops = {
  84         vnode_pager_init,
  85         vnode_pager_alloc,
  86         vnode_pager_dealloc,
  87         vnode_pager_getpages,
  88         vnode_pager_putpages,
  89         vnode_pager_haspage,
  90         NULL
  91 };
  92
  93 int vnode_pbuf_freecnt;
  94
  95 void
  96 vnode_pager_init(void)
  97 {
  98
  99         vnode_pbuf_freecnt = nswbuf / 2 + 1;
 100 }
 101
 102 /*
 103  * Allocate (or lookup) pager for a vnode.
 104  * Handle is a vnode pointer.
 105  */
 106 vm_object_t
 107 vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
 108                   vm_ooffset_t offset)
 109 {
 110         vm_object_t object;
 111         struct vnode *vp;
 112
 113         GIANT_REQUIRED;
 114
 115         /*
 116          * Pageout to vnode, no can do yet.
 117          */
 118         if (handle == NULL)
 119                 return (NULL);
 120
 121         vp = (struct vnode *) handle;
 122
 123         /*
 124          * Prevent race condition when allocating the object. This
 125          * can happen with NFS vnodes since the nfsnode isn't locked.
 126          */
 127         while (vp->v_flag & VOLOCK) {
 128                 vp->v_flag |= VOWANT;
 129                 tsleep(vp, PVM, "vnpobj", 0);
 130         }
 131         vp->v_flag |= VOLOCK;
 132
 133         /*
 134          * If the object is being terminated, wait for it to
 135          * go away.
 136          */
 137         while (((object = vp->v_object) != NULL) &&
 138                 (object->flags & OBJ_DEAD)) {
 139                 tsleep(object, PVM, "vadead", 0);
 140         }
 141
 142         if (vp->v_usecount == 0)
 143                 panic("vnode_pager_alloc: no vnode reference");
 144
 145         if (object == NULL) {
 146                 /*
 147                  * And an object of the appropriate size
 148                  */
 149                 object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size)));
 150                 object->flags = 0;
 151
 152                 object->un_pager.vnp.vnp_size = size;
 153
 154                 object->handle = handle;
 155                 vp->v_object = object;
 156                 vp->v_usecount++;
 157         } else {
 158                 object->ref_count++;
 159                 vp->v_usecount++;
 160         }
 161
 162         vp->v_flag &= ~VOLOCK;
 163         if (vp->v_flag & VOWANT) {
 164                 vp->v_flag &= ~VOWANT;
 165                 wakeup(vp);
 166         }
 167         return (object);
 168 }
 169
 170 static void
 171 vnode_pager_dealloc(object)
 172         vm_object_t object;
 173 {
 174         struct vnode *vp = object->handle;
 175
 176         GIANT_REQUIRED;
 177         if (vp == NULL)
 178                 panic("vnode_pager_dealloc: pager already dealloced");
 179
 180         vm_object_pip_wait(object, "vnpdea");
 181
 182         object->handle = NULL;
 183         object->type = OBJT_DEAD;
 184         vp->v_object = NULL;
 185         vp->v_flag &= ~(VTEXT | VOBJBUF);
 186 }
 187
 188 static boolean_t
 189 vnode_pager_haspage(object, pindex, before, after)
 190         vm_object_t object;
 191         vm_pindex_t pindex;
 192         int *before;
 193         int *after;
 194 {
 195         struct vnode *vp = object->handle;
 196         daddr_t bn;
 197         int err;
 198         daddr_t reqblock;
 199         int poff;
 200         int bsize;
 201         int pagesperblock, blocksperpage;
 202
 203         GIANT_REQUIRED;
 204         /*
 205          * If no vp or vp is doomed or marked transparent to VM, we do not
 206          * have the page.
 207          */
 208         if ((vp == NULL) || (vp->v_flag & VDOOMED))
 209                 return FALSE;
 210
 211         /*
 212          * If filesystem no longer mounted or offset beyond end of file we do
 213          * not have the page.
 214          */
 215         if ((vp->v_mount == NULL) ||
 216             (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size))
 217                 return FALSE;
 218
 219         bsize = vp->v_mount->mnt_stat.f_iosize;
 220         pagesperblock = bsize / PAGE_SIZE;
 221         blocksperpage = 0;
 222         if (pagesperblock > 0) {
 223                 reqblock = pindex / pagesperblock;
 224         } else {
 225                 blocksperpage = (PAGE_SIZE / bsize);
 226                 reqblock = pindex * blocksperpage;
 227         }
 228         err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
 229                 after, before);
 230         if (err)
 231                 return TRUE;
 232         if ( bn == -1)
 233                 return FALSE;
 234         if (pagesperblock > 0) {
 235                 poff = pindex - (reqblock * pagesperblock);
 236                 if (before) {
 237                         *before *= pagesperblock;
 238                         *before += poff;
 239                 }
 240                 if (after) {
 241                         int numafter;
 242                         *after *= pagesperblock;
 243                         numafter = pagesperblock - (poff + 1);
 244                         if (IDX_TO_OFF(pindex + numafter) > object->un_pager.vnp.vnp_size) {
 245                                 numafter = OFF_TO_IDX((object->un_pager.vnp.vnp_size - IDX_TO_OFF(pindex)));
 246                         }
 247                         *after += numafter;
 248                 }
 249         } else {
 250                 if (before) {
 251                         *before /= blocksperpage;
 252                 }
 253
 254                 if (after) {
 255                         *after /= blocksperpage;
 256                 }
 257         }
 258         return TRUE;
 259 }
 260
 261 /*
 262  * Lets the VM system know about a change in size for a file.
 263  * We adjust our own internal size and flush any cached pages in
 264  * the associated object that are affected by the size change.
 265  *
 266  * Note: this routine may be invoked as a result of a pager put
 267  * operation (possibly at object termination time), so we must be careful.
 268  */
 269 void
 270 vnode_pager_setsize(vp, nsize)
 271         struct vnode *vp;
 272         vm_ooffset_t nsize;
 273 {
 274         vm_pindex_t nobjsize;
 275         vm_object_t object = vp->v_object;
 276
 277         GIANT_REQUIRED;
 278
 279         if (object == NULL)
 280                 return;
 281
 282         /*
 283          * Hasn't changed size
 284          */
 285         if (nsize == object->un_pager.vnp.vnp_size)
 286                 return;
 287
 288         nobjsize = OFF_TO_IDX(nsize + PAGE_MASK);
 289
 290         /*
 291          * File has shrunk. Toss any cached pages beyond the new EOF.
 292          */
 293         if (nsize < object->un_pager.vnp.vnp_size) {
 294                 vm_freeze_copyopts(object, OFF_TO_IDX(nsize), object->size);
 295                 if (nobjsize < object->size) {
 296                         vm_object_page_remove(object, nobjsize, object->size,
 297                                 FALSE);
 298                 }
 299                 /*
 300                  * this gets rid of garbage at the end of a page that is now
 301                  * only partially backed by the vnode...
 302                  */
 303                 if (nsize & PAGE_MASK) {
 304                         vm_offset_t kva;
 305                         vm_page_t m;
 306
 307                         m = vm_page_lookup(object, OFF_TO_IDX(nsize));
 308                         if (m) {
 309                                 int base = (int)nsize & PAGE_MASK;
 310                                 int size = PAGE_SIZE - base;
 311
 312                                 /*
 313                                  * Clear out partial-page garbage in case
 314                                  * the page has been mapped.
 315                                  */
 316                                 kva = vm_pager_map_page(m);
 317                                 bzero((caddr_t)kva + base, size);
 318                                 vm_pager_unmap_page(kva);
 319
 320                                 /*
 321                                  * Clear out partial-page dirty bits.  This
 322                                  * has the side effect of setting the valid
 323                                  * bits, but that is ok.  There are a bunch
 324                                  * of places in the VM system where we expected
 325                                  * m->dirty == VM_PAGE_BITS_ALL.  The file EOF
 326                                  * case is one of them.  If the page is still
 327                                  * partially dirty, make it fully dirty.
 328                                  */
 329                                 vm_page_set_validclean(m, base, size);
 330                                 if (m->dirty != 0)
 331                                         m->dirty = VM_PAGE_BITS_ALL;
 332                         }
 333                 }
 334         }
 335         object->un_pager.vnp.vnp_size = nsize;
 336         object->size = nobjsize;
 337 }
 338
 339 /*
 340  * calculate the linear (byte) disk address of specified virtual
 341  * file address
 342  */
 343 static vm_offset_t
 344 vnode_pager_addr(vp, address, run)
 345         struct vnode *vp;
 346         vm_ooffset_t address;
 347         int *run;
 348 {
 349         int rtaddress;
 350         int bsize;
 351         daddr_t block;
 352         struct vnode *rtvp;
 353         int err;
 354         daddr_t vblock;
 355         int voffset;
 356
 357         GIANT_REQUIRED;
 358         if ((int) address < 0)
 359                 return -1;
 360
 361         if (vp->v_mount == NULL)
 362                 return -1;
 363
 364         bsize = vp->v_mount->mnt_stat.f_iosize;
 365         vblock = address / bsize;
 366         voffset = address % bsize;
 367
 368         err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL);
 369
 370         if (err || (block == -1))
 371                 rtaddress = -1;
 372         else {
 373                 rtaddress = block + voffset / DEV_BSIZE;
 374                 if( run) {
 375                         *run += 1;
 376                         *run *= bsize/PAGE_SIZE;
 377                         *run -= voffset/PAGE_SIZE;
 378                 }
 379         }
 380
 381         return rtaddress;
 382 }
 383
 384 /*
 385  * interrupt routine for I/O completion
 386  */
 387 static void
 388 vnode_pager_iodone(bp)
 389         struct buf *bp;
 390 {
 391         bp->b_flags |= B_DONE;
 392         wakeup(bp);
 393 }
 394
 395 /*
 396  * small block file system vnode pager input
 397  */
 398 static int
 399 vnode_pager_input_smlfs(object, m)
 400         vm_object_t object;
 401         vm_page_t m;
 402 {
 403         int i;
 404         int s;
 405         struct vnode *dp, *vp;
 406         struct buf *bp;
 407         vm_offset_t kva;
 408         int fileaddr;
 409         vm_offset_t bsize;
 410         int error = 0;
 411
 412         GIANT_REQUIRED;
 413
 414         vp = object->handle;
 415         if (vp->v_mount == NULL)
 416                 return VM_PAGER_BAD;
 417
 418         bsize = vp->v_mount->mnt_stat.f_iosize;
 419
 420         VOP_BMAP(vp, 0, &dp, 0, NULL, NULL);
 421
 422         kva = vm_pager_map_page(m);
 423
 424         for (i = 0; i < PAGE_SIZE / bsize; i++) {
 425
 426                 if (vm_page_bits(i * bsize, bsize) & m->valid)
 427                         continue;
 428
 429                 fileaddr = vnode_pager_addr(vp,
 430                         IDX_TO_OFF(m->pindex) + i * bsize, (int *)0);
 431                 if (fileaddr != -1) {
 432                         bp = getpbuf(&vnode_pbuf_freecnt);
 433
 434                         /* build a minimal buffer header */
 435                         bp->b_iocmd = BIO_READ;
 436                         bp->b_iodone = vnode_pager_iodone;
 437                         bp->b_rcred = bp->b_wcred = curproc->p_ucred;
 438                         if (bp->b_rcred != NOCRED)
 439                                 crhold(bp->b_rcred);
 440                         if (bp->b_wcred != NOCRED)
 441                                 crhold(bp->b_wcred);
 442                         bp->b_data = (caddr_t) kva + i * bsize;
 443                         bp->b_blkno = fileaddr;
 444                         pbgetvp(dp, bp);
 445                         bp->b_bcount = bsize;
 446                         bp->b_bufsize = bsize;
 447                         bp->b_runningbufspace = bp->b_bufsize;
 448                         runningbufspace += bp->b_runningbufspace;
 449
 450                         /* do the input */
 451                         BUF_STRATEGY(bp);
 452
 453                         /* we definitely need to be at splvm here */
 454
 455                         s = splvm();
 456                         while ((bp->b_flags & B_DONE) == 0) {
 457                                 tsleep(bp, PVM, "vnsrd", 0);
 458                         }
 459                         splx(s);
 460                         if ((bp->b_ioflags & BIO_ERROR) != 0)
 461                                 error = EIO;
 462
 463                         /*
 464                          * free the buffer header back to the swap buffer pool
 465                          */
 466                         relpbuf(bp, &vnode_pbuf_freecnt);
 467                         if (error)
 468                                 break;
 469
 470                         vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
 471                 } else {
 472                         vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
 473                         bzero((caddr_t) kva + i * bsize, bsize);
 474                 }
 475         }
 476         vm_pager_unmap_page(kva);
 477         pmap_clear_modify(m);
 478         vm_page_flag_clear(m, PG_ZERO);
 479         if (error) {
 480                 return VM_PAGER_ERROR;
 481         }
 482         return VM_PAGER_OK;
 483
 484 }
 485
 486
 487 /*
 488  * old style vnode pager output routine
 489  */
 490 static int
 491 vnode_pager_input_old(object, m)
 492         vm_object_t object;
 493         vm_page_t m;
 494 {
 495         struct uio auio;
 496         struct iovec aiov;
 497         int error;
 498         int size;
 499         vm_offset_t kva;
 500         struct vnode *vp;
 501
 502         GIANT_REQUIRED;
 503         error = 0;
 504
 505         /*
 506          * Return failure if beyond current EOF
 507          */
 508         if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
 509                 return VM_PAGER_BAD;
 510         } else {
 511                 size = PAGE_SIZE;
 512                 if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
 513                         size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
 514
 515                 /*
 516                  * Allocate a kernel virtual address and initialize so that
 517                  * we can use VOP_READ/WRITE routines.
 518                  */
 519                 kva = vm_pager_map_page(m);
 520
 521                 vp = object->handle;
 522                 aiov.iov_base = (caddr_t) kva;
 523                 aiov.iov_len = size;
 524                 auio.uio_iov = &aiov;
 525                 auio.uio_iovcnt = 1;
 526                 auio.uio_offset = IDX_TO_OFF(m->pindex);
 527                 auio.uio_segflg = UIO_SYSSPACE;
 528                 auio.uio_rw = UIO_READ;
 529                 auio.uio_resid = size;
 530                 auio.uio_procp = curproc;
 531
 532                 error = VOP_READ(vp, &auio, 0, curproc->p_ucred);
 533                 if (!error) {
 534                         int count = size - auio.uio_resid;
 535
 536                         if (count == 0)
 537                                 error = EINVAL;
 538                         else if (count != PAGE_SIZE)
 539                                 bzero((caddr_t) kva + count, PAGE_SIZE - count);
 540                 }
 541                 vm_pager_unmap_page(kva);
 542         }
 543         pmap_clear_modify(m);
 544         vm_page_undirty(m);
 545         vm_page_flag_clear(m, PG_ZERO);
 546         if (!error)
 547                 m->valid = VM_PAGE_BITS_ALL;
 548         return error ? VM_PAGER_ERROR : VM_PAGER_OK;
 549 }
 550
 551 /*
 552  * generic vnode pager input routine
 553  */
 554
 555 /*
 556  * Local media VFS's that do not implement their own VOP_GETPAGES
 557  * should have their VOP_GETPAGES should call to
 558  * vnode_pager_generic_getpages() to implement the previous behaviour.
 559  *
 560  * All other FS's should use the bypass to get to the local media
 561  * backing vp's VOP_GETPAGES.
 562  */
 563 static int
 564 vnode_pager_getpages(object, m, count, reqpage)
 565         vm_object_t object;
 566         vm_page_t *m;
 567         int count;
 568         int reqpage;
 569 {
 570         int rtval;
 571         struct vnode *vp;
 572         int bytes = count * PAGE_SIZE;
 573
 574         GIANT_REQUIRED;
 575         vp = object->handle;
 576         rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0);
 577         KASSERT(rtval != EOPNOTSUPP,
 578             ("vnode_pager: FS getpages not implemented\n"));
 579         return rtval;
 580 }
 581
 582
 583 /*
 584  * This is now called from local media FS's to operate against their
 585  * own vnodes if they fail to implement VOP_GETPAGES.
 586  */
 587 int
 588 vnode_pager_generic_getpages(vp, m, bytecount, reqpage)
 589         struct vnode *vp;
 590         vm_page_t *m;
 591         int bytecount;
 592         int reqpage;
 593 {
 594         vm_object_t object;
 595         vm_offset_t kva;
 596         off_t foff, tfoff, nextoff;
 597         int i, size, bsize, first, firstaddr;
 598         struct vnode *dp;
 599         int runpg;
 600         int runend;
 601         struct buf *bp;
 602         int s;
 603         int count;
 604         int error = 0;
 605
 606         GIANT_REQUIRED;
 607         object = vp->v_object;
 608         count = bytecount / PAGE_SIZE;
 609
 610         if (vp->v_mount == NULL)
 611                 return VM_PAGER_BAD;
 612
 613         bsize = vp->v_mount->mnt_stat.f_iosize;
 614
 615         /* get the UNDERLYING device for the file with VOP_BMAP() */
 616
 617         /*
 618          * originally, we did not check for an error return value -- assuming
 619          * an fs always has a bmap entry point -- that assumption is wrong!!!
 620          */
 621         foff = IDX_TO_OFF(m[reqpage]->pindex);
 622
 623         /*
 624          * if we can't bmap, use old VOP code
 625          */
 626         if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) {
 627                 for (i = 0; i < count; i++) {
 628                         if (i != reqpage) {
 629                                 vm_page_free(m[i]);
 630                         }
 631                 }
 632                 cnt.v_vnodein++;
 633                 cnt.v_vnodepgsin++;
 634                 return vnode_pager_input_old(object, m[reqpage]);
 635
 636                 /*
 637                  * if the blocksize is smaller than a page size, then use
 638                  * special small filesystem code.  NFS sometimes has a small
 639                  * blocksize, but it can handle large reads itself.
 640                  */
 641         } else if ((PAGE_SIZE / bsize) > 1 &&
 642             (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) {
 643                 for (i = 0; i < count; i++) {
 644                         if (i != reqpage) {
 645                                 vm_page_free(m[i]);
 646                         }
 647                 }
 648                 cnt.v_vnodein++;
 649                 cnt.v_vnodepgsin++;
 650                 return vnode_pager_input_smlfs(object, m[reqpage]);
 651         }
 652
 653         /*
 654          * If we have a completely valid page available to us, we can
 655          * clean up and return.  Otherwise we have to re-read the
 656          * media.
 657          */
 658
 659         if (m[reqpage]->valid == VM_PAGE_BITS_ALL) {
 660                 for (i = 0; i < count; i++) {
 661                         if (i != reqpage)
 662                                 vm_page_free(m[i]);
 663                 }
 664                 return VM_PAGER_OK;
 665         }
 666         m[reqpage]->valid = 0;
 667
 668         /*
 669          * here on direct device I/O
 670          */
 671
 672         firstaddr = -1;
 673         /*
 674          * calculate the run that includes the required page
 675          */
 676         for(first = 0, i = 0; i < count; i = runend) {
 677                 firstaddr = vnode_pager_addr(vp,
 678                         IDX_TO_OFF(m[i]->pindex), &runpg);
 679                 if (firstaddr == -1) {
 680                         if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
 681                                 /* XXX no %qd in kernel. */
 682                                 panic("vnode_pager_getpages: unexpected missing page: firstaddr: %d, foff: 0x%lx%08lx, vnp_size: 0x%lx%08lx",
 683                                  firstaddr, (u_long)(foff >> 32),
 684                                  (u_long)(u_int32_t)foff,
 685                                  (u_long)(u_int32_t)
 686                                  (object->un_pager.vnp.vnp_size >> 32),
 687                                  (u_long)(u_int32_t)
 688                                  object->un_pager.vnp.vnp_size);
 689                         }
 690                         vm_page_free(m[i]);
 691                         runend = i + 1;
 692                         first = runend;
 693                         continue;
 694                 }
 695                 runend = i + runpg;
 696                 if (runend <= reqpage) {
 697                         int j;
 698                         for (j = i; j < runend; j++) {
 699                                 vm_page_free(m[j]);
 700                         }
 701                 } else {
 702                         if (runpg < (count - first)) {
 703                                 for (i = first + runpg; i < count; i++)
 704                                         vm_page_free(m[i]);
 705                                 count = first + runpg;
 706                         }
 707                         break;
 708                 }
 709                 first = runend;
 710         }
 711
 712         /*
 713          * the first and last page have been calculated now, move input pages
 714          * to be zero based...
 715          */
 716         if (first != 0) {
 717                 for (i = first; i < count; i++) {
 718                         m[i - first] = m[i];
 719                 }
 720                 count -= first;
 721                 reqpage -= first;
 722         }
 723
 724         /*
 725          * calculate the file virtual address for the transfer
 726          */
 727         foff = IDX_TO_OFF(m[0]->pindex);
 728
 729         /*
 730          * calculate the size of the transfer
 731          */
 732         size = count * PAGE_SIZE;
 733         if ((foff + size) > object->un_pager.vnp.vnp_size)
 734                 size = object->un_pager.vnp.vnp_size - foff;
 735
 736         /*
 737          * round up physical size for real devices.
 738          */
 739         if (dp->v_type == VBLK || dp->v_type == VCHR) {
 740                 int secmask = dp->v_rdev->si_bsize_phys - 1;
 741                 KASSERT(secmask < PAGE_SIZE, ("vnode_pager_generic_getpages: sector size %d too large\n", secmask + 1));
 742                 size = (size + secmask) & ~secmask;
 743         }
 744
 745         bp = getpbuf(&vnode_pbuf_freecnt);
 746         kva = (vm_offset_t) bp->b_data;
 747
 748         /*
 749          * and map the pages to be read into the kva
 750          */
 751         pmap_qenter(kva, m, count);
 752
 753         /* build a minimal buffer header */
 754         bp->b_iocmd = BIO_READ;
 755         bp->b_iodone = vnode_pager_iodone;
 756         /* B_PHYS is not set, but it is nice to fill this in */
 757         bp->b_rcred = bp->b_wcred = curproc->p_ucred;
 758         if (bp->b_rcred != NOCRED)
 759                 crhold(bp->b_rcred);
 760         if (bp->b_wcred != NOCRED)
 761                 crhold(bp->b_wcred);
 762         bp->b_blkno = firstaddr;
 763         pbgetvp(dp, bp);
 764         bp->b_bcount = size;
 765         bp->b_bufsize = size;
 766         bp->b_runningbufspace = bp->b_bufsize;
 767         runningbufspace += bp->b_runningbufspace;
 768
 769         cnt.v_vnodein++;
 770         cnt.v_vnodepgsin += count;
 771
 772         /* do the input */
 773         BUF_STRATEGY(bp);
 774
 775         s = splvm();
 776         /* we definitely need to be at splvm here */
 777
 778         while ((bp->b_flags & B_DONE) == 0) {
 779                 tsleep(bp, PVM, "vnread", 0);
 780         }
 781         splx(s);
 782         if ((bp->b_ioflags & BIO_ERROR) != 0)
 783                 error = EIO;
 784
 785         if (!error) {
 786                 if (size != count * PAGE_SIZE)
 787                         bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
 788         }
 789         pmap_qremove(kva, count);
 790
 791         /*
 792          * free the buffer header back to the swap buffer pool
 793          */
 794         relpbuf(bp, &vnode_pbuf_freecnt);
 795
 796         for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) {
 797                 vm_page_t mt;
 798
 799                 nextoff = tfoff + PAGE_SIZE;
 800                 mt = m[i];
 801
 802                 if (nextoff <= object->un_pager.vnp.vnp_size) {
 803                         /*
 804                          * Read filled up entire page.
 805                          */
 806                         mt->valid = VM_PAGE_BITS_ALL;
 807                         vm_page_undirty(mt);    /* should be an assert? XXX */
 808                         pmap_clear_modify(mt);
 809                 } else {
 810                         /*
 811                          * Read did not fill up entire page.  Since this
 812                          * is getpages, the page may be mapped, so we have
 813                          * to zero the invalid portions of the page even
 814                          * though we aren't setting them valid.
 815                          *
 816                          * Currently we do not set the entire page valid,
 817                          * we just try to clear the piece that we couldn't
 818                          * read.
 819                          */
 820                         vm_page_set_validclean(mt, 0,
 821                             object->un_pager.vnp.vnp_size - tfoff);
 822                         /* handled by vm_fault now */
 823                         /* vm_page_zero_invalid(mt, FALSE); */
 824                 }
 825
 826                 vm_page_flag_clear(mt, PG_ZERO);
 827                 if (i != reqpage) {
 828
 829                         /*
 830                          * whether or not to leave the page activated is up in
 831                          * the air, but we should put the page on a page queue
 832                          * somewhere. (it already is in the object). Result:
 833                          * It appears that empirical results show that
 834                          * deactivating pages is best.
 835                          */
 836
 837                         /*
 838                          * just in case someone was asking for this page we
 839                          * now tell them that it is ok to use
 840                          */
 841                         if (!error) {
 842                                 if (mt->flags & PG_WANTED)
 843                                         vm_page_activate(mt);
 844                                 else
 845                                         vm_page_deactivate(mt);
 846                                 vm_page_wakeup(mt);
 847                         } else {
 848                                 vm_page_free(mt);
 849                         }
 850                 }
 851         }
 852         if (error) {
 853                 printf("vnode_pager_getpages: I/O read error\n");
 854         }
 855         return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
 856 }
 857
 858 /*
 859  * EOPNOTSUPP is no longer legal.  For local media VFS's that do not
 860  * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to
 861  * vnode_pager_generic_putpages() to implement the previous behaviour.
 862  *
 863  * All other FS's should use the bypass to get to the local media
 864  * backing vp's VOP_PUTPAGES.
 865  */
 866 static void
 867 vnode_pager_putpages(object, m, count, sync, rtvals)
 868         vm_object_t object;
 869         vm_page_t *m;
 870         int count;
 871         boolean_t sync;
 872         int *rtvals;
 873 {
 874         int rtval;
 875         struct vnode *vp;
 876         struct mount *mp;
 877         int bytes = count * PAGE_SIZE;
 878
 879         GIANT_REQUIRED;
 880         /*
 881          * Force synchronous operation if we are extremely low on memory
 882          * to prevent a low-memory deadlock.  VOP operations often need to
 883          * allocate more memory to initiate the I/O ( i.e. do a BMAP
 884          * operation ).  The swapper handles the case by limiting the amount
 885          * of asynchronous I/O, but that sort of solution doesn't scale well
 886          * for the vnode pager without a lot of work.
 887          *
 888          * Also, the backing vnode's iodone routine may not wake the pageout
 889          * daemon up.  This should be probably be addressed XXX.
 890          */
 891
 892         if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)
 893                 sync |= OBJPC_SYNC;
 894
 895         /*
 896          * Call device-specific putpages function
 897          */
 898
 899         vp = object->handle;
 900         if (vp->v_type != VREG)
 901                 mp = NULL;
 902         (void)vn_start_write(vp, &mp, V_WAIT);
 903         rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
 904         KASSERT(rtval != EOPNOTSUPP,
 905             ("vnode_pager: stale FS putpages\n"));
 906         vn_finished_write(mp);
 907 }
 908
 909
 910 /*
 911  * This is now called from local media FS's to operate against their
 912  * own vnodes if they fail to implement VOP_PUTPAGES.
 913  *
 914  * This is typically called indirectly via the pageout daemon and
 915  * clustering has already typically occured, so in general we ask the
 916  * underlying filesystem to write the data out asynchronously rather
 917  * then delayed.
 918  */
 919 int
 920 vnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals)
 921         struct vnode *vp;
 922         vm_page_t *m;
 923         int bytecount;
 924         int flags;
 925         int *rtvals;
 926 {
 927         int i;
 928         vm_object_t object;
 929         int count;
 930
 931         int maxsize, ncount;
 932         vm_ooffset_t poffset;
 933         struct uio auio;
 934         struct iovec aiov;
 935         int error;
 936         int ioflags;
 937
 938         GIANT_REQUIRED;
 939         object = vp->v_object;
 940         count = bytecount / PAGE_SIZE;
 941
 942         for (i = 0; i < count; i++)
 943                 rtvals[i] = VM_PAGER_AGAIN;
 944
 945         if ((int) m[0]->pindex < 0) {
 946                 printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%x)\n",
 947                         (long)m[0]->pindex, m[0]->dirty);
 948                 rtvals[0] = VM_PAGER_BAD;
 949                 return VM_PAGER_BAD;
 950         }
 951
 952         maxsize = count * PAGE_SIZE;
 953         ncount = count;
 954
 955         poffset = IDX_TO_OFF(m[0]->pindex);
 956         if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
 957                 if (object->un_pager.vnp.vnp_size > poffset)
 958                         maxsize = object->un_pager.vnp.vnp_size - poffset;
 959                 else
 960                         maxsize = 0;
 961                 ncount = btoc(maxsize);
 962                 if (ncount < count) {
 963                         for (i = ncount; i < count; i++) {
 964                                 rtvals[i] = VM_PAGER_BAD;
 965                         }
 966                 }
 967         }
 968
 969         /*
 970          * pageouts are already clustered, use IO_ASYNC t o force a bawrite()
 971          * rather then a bdwrite() to prevent paging I/O from saturating
 972          * the buffer cache.
 973          */
 974         ioflags = IO_VMIO;
 975         ioflags |= (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL)) ? IO_SYNC: IO_ASYNC;
 976         ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
 977
 978         aiov.iov_base = (caddr_t) 0;
 979         aiov.iov_len = maxsize;
 980         auio.uio_iov = &aiov;
 981         auio.uio_iovcnt = 1;
 982         auio.uio_offset = poffset;
 983         auio.uio_segflg = UIO_NOCOPY;
 984         auio.uio_rw = UIO_WRITE;
 985         auio.uio_resid = maxsize;
 986         auio.uio_procp = (struct proc *) 0;
 987         error = VOP_WRITE(vp, &auio, ioflags, curproc->p_ucred);
 988         cnt.v_vnodeout++;
 989         cnt.v_vnodepgsout += ncount;
 990
 991         if (error) {
 992                 printf("vnode_pager_putpages: I/O error %d\n", error);
 993         }
 994         if (auio.uio_resid) {
 995                 printf("vnode_pager_putpages: residual I/O %d at %lu\n",
 996                     auio.uio_resid, (u_long)m[0]->pindex);
 997         }
 998         for (i = 0; i < ncount; i++) {
 999                 rtvals[i] = VM_PAGER_OK;
1000         }
1001         return rtvals[0];
1002 }
1003
1004 struct vnode *
1005 vnode_pager_lock(object)
1006         vm_object_t object;
1007 {
1008         struct proc *p = curproc;       /* XXX */
1009
1010         GIANT_REQUIRED;
1011
1012         for (; object != NULL; object = object->backing_object) {
1013                 if (object->type != OBJT_VNODE)
1014                         continue;
1015                 if (object->flags & OBJ_DEAD) {
1016                         return NULL;
1017                 }
1018
1019                 /* XXX; If object->handle can change, we need to cache it. */
1020                 while (vget(object->handle,
1021                         LK_NOPAUSE | LK_SHARED | LK_RETRY | LK_CANRECURSE, p)) {
1022                         if ((object->flags & OBJ_DEAD) || (object->type != OBJT_VNODE))
1023                                 return NULL;
1024                         printf("vnode_pager_lock: retrying\n");
1025                 }
1026                 return object->handle;
1027         }
1028         return NULL;
1029 }