From 0d912f6df88cdd665e80c7141216b47d3d86440d Mon Sep 17 00:00:00 2001 From: mav Date: Fri, 27 Mar 2015 08:53:59 +0000 Subject: [PATCH] MFC r280037: Rewrite virtio block device driver to work asynchronously and use the block I/O interface. Asynchronous operation, based on r280026 change, allows to not block virtual CPU during I/O processing, that on slow/busy storage can take seconds. Use of recently improved block I/O interface allows to process multiple requests same time, that improves random I/O performance on wide storages. Benchmarks of virtual disk, backed by ZVOL on RAID10 pool of 4 HDDs, show ~3.5 times random read performance improvements, while no degradation on linear I/O. Guest CPU usage during test dropped from 100% to almost zero. git-svn-id: svn://svn.freebsd.org/base/stable/10@280744 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- usr.sbin/bhyve/block_if.c | 2 +- usr.sbin/bhyve/pci_virtio_block.c | 152 +++++++++++++++--------------- 2 files changed, 76 insertions(+), 78 deletions(-) diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index ceb48fe98..68a9bae9c 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -54,7 +54,7 @@ __FBSDID("$FreeBSD$"); #define BLOCKIF_SIG 0xb109b109 -#define BLOCKIF_MAXREQ 33 +#define BLOCKIF_MAXREQ 64 #define BLOCKIF_NUMTHR 8 enum blockop { diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c index 7248b1fed..cdfd46610 100644 --- a/usr.sbin/bhyve/pci_virtio_block.c +++ b/usr.sbin/bhyve/pci_virtio_block.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include "bhyverun.h" #include "pci_emul.h" #include "virtio.h" +#include "block_if.h" #define VTBLK_RINGSZ 64 @@ -120,6 +121,13 @@ static int pci_vtblk_debug; #define DPRINTF(params) if (pci_vtblk_debug) printf params #define WPRINTF(params) printf params +struct pci_vtblk_ioreq { + struct blockif_req io_req; + struct pci_vtblk_softc *io_sc; + uint8_t *io_status; + uint16_t io_idx; +}; + /* * Per-device softc */ @@ -127,10 +135,10 @@ struct pci_vtblk_softc { struct virtio_softc vbsc_vs; pthread_mutex_t vsc_mtx; struct vqueue_info vbsc_vq; - int vbsc_fd; - int vbsc_ischr; - struct vtblk_config vbsc_cfg; + struct vtblk_config vbsc_cfg; + struct blockif_ctxt *bc; char vbsc_ident[VTBLK_BLK_ID_BYTES]; + struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ]; }; static void pci_vtblk_reset(void *); @@ -159,11 +167,35 @@ pci_vtblk_reset(void *vsc) vi_reset_dev(&sc->vbsc_vs); } +static void +pci_vtblk_done(struct blockif_req *br, int err) +{ + struct pci_vtblk_ioreq *io = br->br_param; + struct pci_vtblk_softc *sc = io->io_sc; + + /* convert errno into a virtio block error return */ + if (err == EOPNOTSUPP || err == ENOSYS) + *io->io_status = VTBLK_S_UNSUPP; + else if (err != 0) + *io->io_status = VTBLK_S_IOERR; + else + *io->io_status = VTBLK_S_OK; + + /* + * Return the descriptor back to the host. + * We wrote 1 byte (our status) to host. + */ + pthread_mutex_lock(&sc->vsc_mtx); + vq_relchain(&sc->vbsc_vq, io->io_idx, 1); + vq_endchains(&sc->vbsc_vq, 0); + pthread_mutex_unlock(&sc->vsc_mtx); +} + static void pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) { struct virtio_blk_hdr *vbh; - uint8_t *status; + struct pci_vtblk_ioreq *io; int i, n; int err; int iolen; @@ -184,11 +216,14 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) */ assert(n >= 2 && n <= VTBLK_MAXSEGS + 2); + io = &sc->vbsc_ios[idx]; assert((flags[0] & VRING_DESC_F_WRITE) == 0); assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); vbh = iov[0].iov_base; - - status = iov[--n].iov_base; + memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2)); + io->io_req.br_iovcnt = n - 2; + io->io_req.br_offset = vbh->vbh_sector * DEV_BSIZE; + io->io_status = iov[--n].iov_base; assert(iov[n].iov_len == 1); assert(flags[n] & VRING_DESC_F_WRITE); @@ -200,8 +235,6 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) type = vbh->vbh_type & ~VBH_FLAG_BARRIER; writeop = (type == VBH_OP_WRITE); - offset = vbh->vbh_sector * DEV_BSIZE; - iolen = 0; for (i = 1; i < n; i++) { /* @@ -217,48 +250,28 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", writeop ? "write" : "read/ident", iolen, i - 1, offset)); - err = 0; switch (type) { + case VBH_OP_READ: + err = blockif_read(sc->bc, &io->io_req); + break; case VBH_OP_WRITE: - if (pwritev(sc->vbsc_fd, iov + 1, i - 1, offset) < 0) - err = errno; + err = blockif_write(sc->bc, &io->io_req); break; - case VBH_OP_READ: - if (preadv(sc->vbsc_fd, iov + 1, i - 1, offset) < 0) - err = errno; + case VBH_OP_FLUSH: + case VBH_OP_FLUSH_OUT: + err = blockif_flush(sc->bc, &io->io_req); break; case VBH_OP_IDENT: /* Assume a single buffer */ strlcpy(iov[1].iov_base, sc->vbsc_ident, MIN(iov[1].iov_len, sizeof(sc->vbsc_ident))); - err = 0; - break; - case VBH_OP_FLUSH: - case VBH_OP_FLUSH_OUT: - if (sc->vbsc_ischr) { - if (ioctl(sc->vbsc_fd, DIOCGFLUSH)) - err = errno; - } else if (fsync(sc->vbsc_fd)) - err = errno; - break; + pci_vtblk_done(&io->io_req, 0); + return; default: - err = -ENOSYS; - break; + pci_vtblk_done(&io->io_req, EOPNOTSUPP); + return; } - - /* convert errno into a virtio block error return */ - if (err == -ENOSYS) - *status = VTBLK_S_UNSUPP; - else if (err != 0) - *status = VTBLK_S_IOERR; - else - *status = VTBLK_S_OK; - - /* - * Return the descriptor back to the host. - * We wrote 1 byte (our status) to host. - */ - vq_relchain(vq, idx, 1); + assert(err == 0); } static void @@ -268,19 +281,18 @@ pci_vtblk_notify(void *vsc, struct vqueue_info *vq) while (vq_has_descs(vq)) pci_vtblk_proc(sc, vq); - vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ } static int pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { - struct stat sbuf; + char bident[sizeof("XX:X:X")]; + struct blockif_ctxt *bctxt; MD5_CTX mdctx; u_char digest[16]; struct pci_vtblk_softc *sc; - off_t size, sts, sto; - int fd; - int sectsz; + off_t size; + int i, sectsz, sts, sto; if (opts == NULL) { printf("virtio-block: backing device required\n"); @@ -290,43 +302,26 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) /* * The supplied backing file has to exist */ - fd = open(opts, O_RDWR); - if (fd < 0) { + snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func); + bctxt = blockif_open(opts, bident); + if (bctxt == NULL) { perror("Could not open backing file"); return (1); } - if (fstat(fd, &sbuf) < 0) { - perror("Could not stat backing file"); - close(fd); - return (1); - } - - /* - * Deal with raw devices - */ - size = sbuf.st_size; - sectsz = DEV_BSIZE; - sts = sto = 0; - if (S_ISCHR(sbuf.st_mode)) { - if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || - ioctl(fd, DIOCGSECTORSIZE, §sz)) { - perror("Could not fetch dev blk/sector size"); - close(fd); - return (1); - } - assert(size != 0); - assert(sectsz != 0); - if (ioctl(fd, DIOCGSTRIPESIZE, &sts) == 0 && sts > 0) - ioctl(fd, DIOCGSTRIPEOFFSET, &sto); - } else - sts = sbuf.st_blksize; + size = blockif_size(bctxt); + sectsz = blockif_sectsz(bctxt); + blockif_psectsz(bctxt, &sts, &sto); sc = calloc(1, sizeof(struct pci_vtblk_softc)); - - /* record fd of storage device/file */ - sc->vbsc_fd = fd; - sc->vbsc_ischr = S_ISCHR(sbuf.st_mode); + sc->bc = bctxt; + for (i = 0; i < VTBLK_RINGSZ; i++) { + struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i]; + io->io_req.br_callback = pci_vtblk_done; + io->io_req.br_param = io; + io->io_sc = sc; + io->io_idx = i; + } pthread_mutex_init(&sc->vsc_mtx, NULL); @@ -375,8 +370,11 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_lintr_request(pi); - if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) + if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) { + blockif_close(sc->bc); + free(sc); return (1); + } vi_set_io_bar(&sc->vbsc_vs, 0); return (0); } -- 2.45.0