From f86e46e1ac425e799ad8becb31c3194c616181c2 Mon Sep 17 00:00:00 2001 From: gjb Date: Tue, 10 Sep 2013 19:00:32 +0000 Subject: [PATCH] Reintegrate r250907 (previously reverted as r254754) plus MFS r254947, r255443: r250907 (scottl): MFC r248830, r250027, r250409: Several fixes and improvements to sendfile() r254947 (kib): NFS implementation of VOP_READ() sometimes upgrades the vnode lock, which causes drop of the shared lock and sleep for exclusive. As result, busying of the page before the call to vn_rdwr() makes NFS code to wait for vnode lock while page is busy, which contradicts the proper order of vnode lock -> busy. r255443 (des): Fix the length calculation for the final block of a sendfile(2) transmission which could be tricked into rounding up to the nearest page size, leaking up to a page of kernel memory. [13:11] Approved by: re (delphij) Sponsored by: The FreeBSD Foundation git-svn-id: svn://svn.freebsd.org/base/releng/9.2@255456 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sys/kern/uipc_syscalls.c | 94 ++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 38 deletions(-) diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 6ea7d348..309310a2 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -1835,9 +1835,11 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, struct mbuf *m = NULL; struct sf_buf *sf; struct vm_page *pg; + struct vattr va; off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; int error, hdrlen = 0, mnw = 0; int vfslocked; + int bsize; struct sendfile_sync *sfs = NULL; /* @@ -1852,6 +1854,18 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, vfslocked = VFS_LOCK_GIANT(vp->v_mount); vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_type == VREG) { + bsize = vp->v_mount->mnt_stat.f_iosize; + if (uap->nbytes == 0) { + error = VOP_GETATTR(vp, &va, td->td_ucred); + if (error != 0) { + VOP_UNLOCK(vp, 0); + VFS_UNLOCK_GIANT(vfslocked); + obj = NULL; + goto out; + } + rem = va.va_size; + } else + rem = uap->nbytes; obj = vp->v_object; if (obj != NULL) { /* @@ -1869,7 +1883,8 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, obj = NULL; } } - } + } else + bsize = 0; /* silence gcc */ VOP_UNLOCK(vp, 0); VFS_UNLOCK_GIANT(vfslocked); if (obj == NULL) { @@ -1962,11 +1977,20 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, * The outer loop checks the state and available space of the socket * and takes care of the overall progress. */ - for (off = uap->offset, rem = uap->nbytes; ; ) { - struct mbuf *mtail = NULL; - int loopbytes = 0; - int space = 0; - int done = 0; + for (off = uap->offset; ; ) { + struct mbuf *mtail; + int loopbytes; + int space; + int done; + + if ((uap->nbytes != 0 && uap->nbytes == fsbytes) || + (uap->nbytes == 0 && va.va_size == fsbytes)) + break; + + mtail = NULL; + loopbytes = 0; + space = 0; + done = 0; /* * Check the socket state for ongoing connection, @@ -2034,6 +2058,20 @@ retry_space: */ space -= hdrlen; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + error = vn_lock(vp, LK_SHARED); + if (error != 0) { + VFS_UNLOCK_GIANT(vfslocked); + goto done; + } + error = VOP_GETATTR(vp, &va, td->td_ucred); + if (error != 0 || off >= va.va_size) { + VOP_UNLOCK(vp, 0); + VFS_UNLOCK_GIANT(vfslocked); + goto done; + } + VFS_UNLOCK_GIANT(vfslocked); + /* * Loop and construct maximum sized mbuf chain to be bulk * dumped into socket buffer. @@ -2043,25 +2081,19 @@ retry_space: vm_offset_t pgoff; struct mbuf *m0; - VM_OBJECT_LOCK(obj); /* * Calculate the amount to transfer. * Not to exceed a page, the EOF, * or the passed in nbytes. */ pgoff = (vm_offset_t)(off & PAGE_MASK); - xfsize = omin(PAGE_SIZE - pgoff, - obj->un_pager.vnp.vnp_size - uap->offset - - fsbytes - loopbytes); - if (uap->nbytes) - rem = (uap->nbytes - fsbytes - loopbytes); - else - rem = obj->un_pager.vnp.vnp_size - - uap->offset - fsbytes - loopbytes; - xfsize = omin(rem, xfsize); + rem = va.va_size - uap->offset; + if (uap->nbytes != 0) + rem = omin(rem, uap->nbytes); + rem -= fsbytes + loopbytes; + xfsize = omin(PAGE_SIZE - pgoff, rem); xfsize = omin(space - loopbytes, xfsize); if (xfsize <= 0) { - VM_OBJECT_UNLOCK(obj); done = 1; /* all data sent */ break; } @@ -2071,6 +2103,7 @@ retry_space: * if not found or wait and loop if busy. */ pindex = OFF_TO_IDX(off); + VM_OBJECT_LOCK(obj); pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY); @@ -2088,42 +2121,25 @@ retry_space: else if (uap->flags & SF_NODISKIO) error = EBUSY; else { - int bsize; ssize_t resid; - /* - * Ensure that our page is still around - * when the I/O completes. - */ - vm_page_io_start(pg); VM_OBJECT_UNLOCK(obj); /* * Get the page from backing store. - */ - vfslocked = VFS_LOCK_GIANT(vp->v_mount); - error = vn_lock(vp, LK_SHARED); - if (error != 0) - goto after_read; - bsize = vp->v_mount->mnt_stat.f_iosize; - - /* * XXXMAC: Because we don't have fp->f_cred * here, we pass in NOCRED. This is probably * wrong, but is consistent with our original * implementation. */ + vfslocked = VFS_LOCK_GIANT(vp->v_mount); error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td); - VOP_UNLOCK(vp, 0); - after_read: VFS_UNLOCK_GIANT(vfslocked); - VM_OBJECT_LOCK(obj); - vm_page_io_finish(pg); - if (!error) - VM_OBJECT_UNLOCK(obj); + if (error) + VM_OBJECT_LOCK(obj); mbstat.sf_iocnt++; } if (error) { @@ -2174,7 +2190,7 @@ retry_space: m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); if (m0 == NULL) { error = (mnw ? EAGAIN : ENOBUFS); - sf_buf_mext((void *)sf_buf_kva(sf), sf); + sf_buf_mext(NULL, sf); break; } MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, @@ -2202,6 +2218,8 @@ retry_space: } } + VOP_UNLOCK(vp, 0); + /* Add the buffer chain to the socket buffer. */ if (m != NULL) { int mlen, err; -- 2.45.0