From b21daa362e81f23e997ac0e930609881326fa00e Mon Sep 17 00:00:00 2001 From: asomers Date: Fri, 21 Jun 2019 21:44:31 +0000 Subject: [PATCH] fusefs: correctly handle short reads A fuse server may return a short read for three reasons: * The file is opened with FOPEN_DIRECT_IO. In this case, the short read should be returned directly to userland. We already handled this case correctly. * The file was truncated server-side, and the read hit EOF. In this case, the kernel should update the file size. Fixed in the case of VOP_READ. Fixing this for VOP_GETPAGES is TODO. * The file is opened in writeback mode, there are dirty buffers past what the server thinks is the file's EOF, and the read hit what the server thinks is the file's EOF. In this case, the client is trying to read a hole, and should zero-fill it. We already handled this case, and I added a test for it. Sponsored by: The FreeBSD Foundation --- sys/fs/fuse/fuse_io.c | 56 ++++++++++++++++++++++++-------- tests/sys/fs/fusefs/read.cc | 63 ++++++++++++++++++++++++++++++++++++ tests/sys/fs/fusefs/write.cc | 48 +++++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 14 deletions(-) diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c index ab5e18508d0..79b090007a3 100644 --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -348,8 +348,9 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, */ n = 0; - if (on < bcount) - n = MIN((unsigned)(bcount - on), uio->uio_resid); + if (on < bcount - bp->b_resid) + n = MIN((unsigned)(bcount - bp->b_resid - on), + uio->uio_resid); if (n > 0) { SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp); err = uiomove(bp->b_data + on, n, uio); @@ -357,6 +358,11 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, vfs_bio_brelse(bp, ioflag); SDT_PROBE4(fusefs, , io, read_bio_backend_end, err, uio->uio_resid, n, bp); + if (bp->b_resid > 0) { + /* Short read indicates EOF */ + (void)fuse_vnode_setsize(vp, uio->uio_offset); + break; + } } return (err); @@ -415,8 +421,13 @@ fuse_read_directbackend(struct vnode *vp, struct uio *uio, if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio))) break; - if (fdi.iosize < fri->size) + if (fdi.iosize < fri->size) { + /* + * Short read. Should only happen at EOF or with + * direct io. + */ break; + } } out: @@ -828,6 +839,7 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio, int fuse_io_strategy(struct vnode *vp, struct buf *bp) { + struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh; struct ucred *cred; struct uio *uiop; @@ -888,19 +900,35 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp) if (!error && uiop->uio_resid) { /* - * If we had a short read with no error, we must have - * hit a file hole. We should zero-fill the remainder. - * This can also occur if the server hits the file EOF. - * - * Holes used to be able to occur due to pending - * writes, but that is not possible any longer. + * A short read with no error, when not using direct io, + * and when no writes are cached, indicates EOF. + * Update the file size accordingly. */ - int nread = bp->b_bcount - uiop->uio_resid; - int left = uiop->uio_resid; - - if (left > 0) + if (fuse_data_cache_mode != FUSE_CACHE_WB || + (fvdat->flag & FN_SIZECHANGE) == 0) { + SDT_PROBE2(fusefs, , io, trace, 1, + "Short read of a clean file"); + /* + * XXX To prevent lock order problems, we must + * truncate the file upstack + */ + } else { + /* + * If dirty writes _are_ cached beyond EOF, + * that indicates a newly created hole that the + * server doesn't know about. Fill it in. + * XXX: we don't currently track whether dirty + * writes are cached beyond EOF, before EOF, or + * both. + */ + SDT_PROBE2(fusefs, , io, trace, 1, + "Short read of a dirty file"); + int nread = bp->b_bcount - uiop->uio_resid; + int left = uiop->uio_resid; bzero((char *)bp->b_data + nread, left); - uiop->uio_resid = 0; + uiop->uio_resid = 0; + } + } if (error) { bp->b_ioflags |= BIO_ERROR; diff --git a/tests/sys/fs/fusefs/read.cc b/tests/sys/fs/fusefs/read.cc index a0b531ccba7..2ef9f6be55a 100644 --- a/tests/sys/fs/fusefs/read.cc +++ b/tests/sys/fs/fusefs/read.cc @@ -411,6 +411,69 @@ TEST_F(Read, eio) /* Deliberately leak fd. close(2) will be tested in release.cc */ } +/* + * If the server returns a short read when direct io is not in use, that + * indicates EOF and we should update the file size. + */ +TEST_F(ReadCacheable, eof) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + const char *CONTENTS = "abcdefghijklmnop"; + uint64_t ino = 42; + int fd; + uint64_t offset = 100; + ssize_t bufsize = strlen(CONTENTS); + ssize_t partbufsize = 3 * bufsize / 4; + char buf[bufsize]; + struct stat sb; + + expect_lookup(RELPATH, ino, offset + bufsize); + expect_open(ino, 0, 1); + expect_read(ino, 0, offset + bufsize, offset + partbufsize, CONTENTS); + + fd = open(FULLPATH, O_RDONLY); + ASSERT_LE(0, fd) << strerror(errno); + + ASSERT_EQ(partbufsize, pread(fd, buf, bufsize, offset)) + << strerror(errno); + ASSERT_EQ(0, fstat(fd, &sb)); + EXPECT_EQ((off_t)(offset + partbufsize), sb.st_size); + /* Deliberately leak fd. close(2) will be tested in release.cc */ +} + +/* Like ReadCacheable.eof, but causes an entire buffer to be invalidated */ +TEST_F(ReadCacheable, eof_of_whole_buffer) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + const char *CONTENTS = "abcdefghijklmnop"; + uint64_t ino = 42; + int fd; + ssize_t bufsize = strlen(CONTENTS); + off_t old_filesize = m_maxbcachebuf * 2 + bufsize; + char buf[bufsize]; + struct stat sb; + + expect_lookup(RELPATH, ino, old_filesize); + expect_open(ino, 0, 1); + expect_read(ino, 2 * m_maxbcachebuf, bufsize, bufsize, CONTENTS); + expect_read(ino, m_maxbcachebuf, m_maxbcachebuf, 0, CONTENTS); + + fd = open(FULLPATH, O_RDONLY); + ASSERT_LE(0, fd) << strerror(errno); + + /* Cache the third block */ + ASSERT_EQ(bufsize, pread(fd, buf, bufsize, m_maxbcachebuf * 2)) + << strerror(errno); + /* Try to read the 2nd block, but it's past EOF */ + ASSERT_EQ(0, pread(fd, buf, bufsize, m_maxbcachebuf)) + << strerror(errno); + ASSERT_EQ(0, fstat(fd, &sb)); + EXPECT_EQ((off_t)(m_maxbcachebuf), sb.st_size); + /* Deliberately leak fd. close(2) will be tested in release.cc */ +} + /* * With the keep_cache option, the kernel may keep its read cache across * multiple open(2)s. diff --git a/tests/sys/fs/fusefs/write.cc b/tests/sys/fs/fusefs/write.cc index 03364ca10e8..b59273385b3 100644 --- a/tests/sys/fs/fusefs/write.cc +++ b/tests/sys/fs/fusefs/write.cc @@ -930,6 +930,54 @@ TEST_F(WriteBackAsync, delay) /* Don't close the file because that would flush the cache */ } +/* + * In WriteBack mode, writes may be cached beyond what the server thinks is the + * EOF. In this case, a short read at EOF should _not_ cause fusefs to update + * the file's size. + */ +TEST_F(WriteBackAsync, eof) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + const char *CONTENTS0 = "abcdefgh"; + const char *CONTENTS1 = "ijklmnop"; + uint64_t ino = 42; + int fd; + off_t offset = m_maxbcachebuf; + ssize_t wbufsize = strlen(CONTENTS1); + off_t old_filesize = (off_t)strlen(CONTENTS0); + ssize_t rbufsize = 2 * old_filesize; + char readbuf[rbufsize]; + size_t holesize = rbufsize - old_filesize; + char hole[holesize]; + struct stat sb; + ssize_t r; + + expect_lookup(RELPATH, ino, 0); + expect_open(ino, 0, 1); + expect_read(ino, 0, m_maxbcachebuf, old_filesize, CONTENTS0); + + fd = open(FULLPATH, O_RDWR); + EXPECT_LE(0, fd) << strerror(errno); + + /* Write and cache data beyond EOF */ + ASSERT_EQ(wbufsize, pwrite(fd, CONTENTS1, wbufsize, offset)) + << strerror(errno); + + /* Read from the old EOF */ + r = pread(fd, readbuf, rbufsize, 0); + ASSERT_LE(0, r) << strerror(errno); + EXPECT_EQ(rbufsize, r) << "read should've synthesized a hole"; + EXPECT_EQ(0, memcmp(CONTENTS0, readbuf, old_filesize)); + bzero(hole, holesize); + EXPECT_EQ(0, memcmp(hole, readbuf + old_filesize, holesize)); + + /* The file's size should still be what was established by pwrite */ + ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno); + EXPECT_EQ(offset + wbufsize, sb.st_size); + /* Deliberately leak fd. close(2) will be tested in release.cc */ +} + /* * Without direct_io, writes should be committed to cache */ -- 2.45.0