From 6922777a55d03afa8d5188f3d60c0c949a8c454c Mon Sep 17 00:00:00 2001 From: asomers Date: Thu, 13 Jun 2019 19:07:03 +0000 Subject: [PATCH] fusefs: fix a bug with WriteBack cacheing An errant vfs_bio_clrbuf snuck in in r348931. Surprisingly, it doesn't have any effect most of the time. But under some circumstances it cause the buffer to behave in a write-only fashion. Sponsored by: The FreeBSD Foundation --- sys/fs/fuse/fuse_io.c | 35 ++++++++++++++++--------- tests/sys/fs/fusefs/io.cc | 55 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 14 deletions(-) diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c index da67ad5c9ab..c373300953f 100644 --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -267,7 +267,7 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, bool pages, } SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_start, "int", "int", "int", "int"); -SDT_PROBE_DEFINE2(fusefs, , io, read_bio_backend_feed, "int", "int"); +SDT_PROBE_DEFINE2(fusefs, , io, read_bio_backend_feed, "int", "struct buf*"); SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_end, "int", "ssize_t", "int", "struct buf*"); static int @@ -330,8 +330,7 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, if (on < bcount) n = MIN((unsigned)(bcount - on), uio->uio_resid); if (n > 0) { - SDT_PROBE2(fusefs, , io, read_bio_backend_feed, - n, n + (int)bp->b_resid); + SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp); err = uiomove(bp->b_data + on, n, uio); } vfs_bio_brelse(bp, ioflag); @@ -344,8 +343,8 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, SDT_PROBE_DEFINE1(fusefs, , io, read_directbackend_start, "struct fuse_read_in*"); -SDT_PROBE_DEFINE2(fusefs, , io, read_directbackend_complete, - "struct fuse_dispatcher*", "struct uio*"); +SDT_PROBE_DEFINE3(fusefs, , io, read_directbackend_complete, + "struct fuse_dispatcher*", "struct fuse_read_in*", "struct uio*"); static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, @@ -390,8 +389,8 @@ fuse_read_directbackend(struct vnode *vp, struct uio *uio, if ((err = fdisp_wait_answ(&fdi))) goto out; - SDT_PROBE2(fusefs, , io, read_directbackend_complete, - fdi.iosize, uio); + SDT_PROBE3(fusefs, , io, read_directbackend_complete, + &fdi, fri, uio); if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio))) break; @@ -555,6 +554,7 @@ fuse_write_directbackend(struct vnode *vp, struct uio *uio, SDT_PROBE_DEFINE6(fusefs, , io, write_biobackend_start, "int64_t", "int", "int", "struct uio*", "int", "bool"); SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_append_race, "long", "int"); +SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_issue, "int", "struct buf*"); static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, @@ -602,14 +602,14 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio, again: /* Get or create a buffer for the write */ direct_append = uio->uio_offset == filesize && n; - if ((off_t)lbn * biosize + on + n < filesize) { + if (uio->uio_offset + n < filesize) { extending = false; if ((off_t)(lbn + 1) * biosize < filesize) { /* Not the file's last block */ bcount = biosize; } else { /* The file's last block */ - bcount = filesize - (off_t)lbn *biosize; + bcount = filesize - (off_t)lbn * biosize; } } else { extending = true; @@ -650,8 +650,6 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio, break; } } - if (biosize > bcount) - vfs_bio_clrbuf(bp); SDT_PROBE6(fusefs, , io, write_biobackend_start, lbn, on, n, uio, bcount, direct_append); @@ -733,6 +731,7 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio, * reasons: the only way to know if a write is valid * if its actually written out.) */ + SDT_PROBE2(fusefs, , io, write_biobackend_issue, 0, bp); bwrite(bp); if (bp->b_error == EINTR) { err = EINTR; @@ -780,23 +779,33 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio, * already-written page whenever extending a file with * ftruncate or another write. */ + SDT_PROBE2(fusefs, , io, write_biobackend_issue, 1, bp); err = bwrite(bp); } else if (ioflag & IO_SYNC) { + SDT_PROBE2(fusefs, , io, write_biobackend_issue, 2, bp); err = bwrite(bp); } else if (vm_page_count_severe() || buf_dirty_count_severe() || (ioflag & IO_ASYNC)) { /* TODO: enable write clustering later */ + SDT_PROBE2(fusefs, , io, write_biobackend_issue, 3, bp); bawrite(bp); } else if (on == 0 && n == bcount) { - if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) + if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { + SDT_PROBE2(fusefs, , io, write_biobackend_issue, + 4, bp); bdwrite(bp); - else + } else { + SDT_PROBE2(fusefs, , io, write_biobackend_issue, + 5, bp); bawrite(bp); + } } else if (ioflag & IO_DIRECT) { + SDT_PROBE2(fusefs, , io, write_biobackend_issue, 6, bp); bawrite(bp); } else { bp->b_flags &= ~B_CLUSTEROK; + SDT_PROBE2(fusefs, , io, write_biobackend_issue, 7, bp); bdwrite(bp); } if (err) diff --git a/tests/sys/fs/fusefs/io.cc b/tests/sys/fs/fusefs/io.cc index 1627b22ff44..ea4ec80972c 100644 --- a/tests/sys/fs/fusefs/io.cc +++ b/tests/sys/fs/fusefs/io.cc @@ -51,6 +51,27 @@ const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; +static void compare(const void *tbuf, const void *controlbuf, off_t baseofs, + ssize_t size) +{ + int i; + + for (i = 0; i < size; i++) { + if (((const char*)tbuf)[i] != ((const char*)controlbuf)[i]) { + off_t ofs = baseofs + i; + FAIL() << "miscompare at offset " + << std::hex + << std::showbase + << ofs + << ". expected = " + << std::setw(2) + << (unsigned)((const uint8_t*)controlbuf)[i] + << " got = " + << (unsigned)((const uint8_t*)tbuf)[i]; + } + } +} + class Io: public FuseTest { public: int m_backing_fd, m_control_fd, m_test_fd; @@ -171,7 +192,7 @@ void do_read(ssize_t size, off_t offs) ASSERT_EQ(size, pread(m_control_fd, control_buf, size, offs)) << strerror(errno); - ASSERT_EQ(0, memcmp(test_buf, control_buf, size)); + compare(test_buf, control_buf, offs, size); free(control_buf); free(test_buf); @@ -308,3 +329,35 @@ TEST_F(Io, truncate_into_dirty_buffer2) do_ftruncate(truncsize2); close(m_test_fd); } + +/* + * Regression test for a bug introduced in r348931 + * + * Sequence of operations: + * 1) The first write reads lbn so it can modify it + * 2) The first write flushes lbn 3 immediately because it's the end of file + * 3) The first write then flushes lbn 4 because it's the end of the file + * 4) The second write modifies the cached versions of lbn 3 and 4 + * 5) The third write's getblkx invalidates lbn 4's B_CACHE because it's + * extending the buffer. Then it flushes lbn 4 because B_DELWRI was set but + * B_CACHE was clear. + * 6) fuse_write_biobackend erroneously called vfs_bio_clrbuf, putting the + * buffer into a weird write-only state. All read operations would return + * 0. Writes were apparently still processed, because the buffer's contents + * were correct when examined in a core dump. + * 7) The third write reads lbn 4 because cache is clear + * 9) uiomove dutifully copies new data into the buffer + * 10) The buffer's dirty is flushed to lbn 4 + * 11) The read returns all zeros because of step 6. + * + * Based on: + * fsx -WR -l 524388 -o 131072 -P /tmp -S6456 -q fsx.bin + */ +TEST_F(Io, resize_a_valid_buffer_while_extending) +{ + do_write(0x14530, 0x36ee6); /* [0x36ee6, 0x4b415] */ + do_write(0x1507c, 0x33256); /* [0x33256, 0x482d1] */ + do_write(0x175c, 0x4c03d); /* [0x4c03d, 0x4d798] */ + do_read(0xe277, 0x3599c); /* [0x3599c, 0x43c12] */ + close(m_test_fd); +} -- 2.45.0