2 * Copyright (c) 2011 NetApp, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
36 #include <sys/ioctl.h>
53 #define VTBLK_RINGSZ 64
55 #define VTBLK_CFGSZ 28
57 #define VTBLK_R_CFG VTCFG_R_CFG0
58 #define VTBLK_R_CFG_END VTBLK_R_CFG + VTBLK_CFGSZ -1
59 #define VTBLK_R_MAX VTBLK_R_CFG_END
61 #define VTBLK_REGSZ VTBLK_R_MAX+1
63 #define VTBLK_MAXSEGS 32
66 #define VTBLK_S_IOERR 1
71 #define VTBLK_S_HOSTCAPS \
72 ( 0x00000004 | /* host maximum request segments */ \
73 0x10000000 ) /* supports indirect descriptors */
78 uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
80 /* Host-context pointers to the queue */
81 struct virtio_desc *hq_dtable;
82 uint16_t *hq_avail_flags;
83 uint16_t *hq_avail_idx; /* monotonically increasing */
84 uint16_t *hq_avail_ring;
86 uint16_t *hq_used_flags;
87 uint16_t *hq_used_idx; /* monotonically increasing */
88 struct virtio_used *hq_used_ring;
95 uint64_t vbc_capacity;
96 uint32_t vbc_size_max;
101 uint32_t vbc_blk_size;
102 uint32_t vbc_sectors_max;
104 CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ);
107 * Fixed-size block header
109 struct virtio_blk_hdr {
110 #define VBH_OP_READ 0
111 #define VBH_OP_WRITE 1
120 static int pci_vtblk_debug;
121 #define DPRINTF(params) if (pci_vtblk_debug) printf params
122 #define WPRINTF(params) printf params
127 struct pci_vtblk_softc {
128 struct pci_devinst *vbsc_pi;
133 uint32_t vbsc_features;
135 struct vring_hqueue vbsc_q;
136 struct vtblk_config vbsc_cfg;
140 * Return the number of available descriptors in the vring taking care
141 * of the 16-bit index wraparound.
144 hq_num_avail(struct vring_hqueue *hq)
148 if (*hq->hq_avail_idx >= hq->hq_cur_aidx)
149 ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx;
151 ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1;
153 assert(ndesc >= 0 && ndesc <= hq->hq_size);
159 pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value)
162 DPRINTF(("vtblk: device reset requested !\n"));
165 sc->vbsc_status = value;
169 pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
171 struct iovec iov[VTBLK_MAXSEGS];
172 struct virtio_blk_hdr *vbh;
173 struct virtio_desc *vd, *vid;
174 struct virtio_used *vu;
180 int uidx, aidx, didx;
184 uidx = *hq->hq_used_idx;
185 aidx = hq->hq_cur_aidx;
186 didx = hq->hq_avail_ring[aidx % hq->hq_size];
187 assert(didx >= 0 && didx < hq->hq_size);
189 vd = &hq->hq_dtable[didx];
192 * Verify that the descriptor is indirect, and obtain
193 * the pointer to the indirect descriptor.
194 * There has to be space for at least 3 descriptors
195 * in the indirect descriptor array: the block header,
196 * 1 or more data descriptors, and a status byte.
198 assert(vd->vd_flags & VRING_DESC_F_INDIRECT);
200 nsegs = vd->vd_len / sizeof(struct virtio_desc);
202 assert(nsegs < VTBLK_MAXSEGS + 2);
204 vid = paddr_guest2host(vd->vd_addr);
205 assert((vid->vd_flags & VRING_DESC_F_INDIRECT) == 0);
208 * The first descriptor will be the read-only fixed header
210 vbh = paddr_guest2host(vid[0].vd_addr);
211 assert(vid[0].vd_len == sizeof(struct virtio_blk_hdr));
212 assert(vid[0].vd_flags & VRING_DESC_F_NEXT);
213 assert((vid[0].vd_flags & VRING_DESC_F_WRITE) == 0);
215 writeop = (vbh->vbh_type == VBH_OP_WRITE);
217 offset = vbh->vbh_sector * DEV_BSIZE;
220 * Build up the iovec based on the guest's data descriptors
222 for (i = 1, iolen = 0; i < nsegs - 1; i++) {
223 iov[i-1].iov_base = paddr_guest2host(vid[i].vd_addr);
224 iov[i-1].iov_len = vid[i].vd_len;
225 iolen += vid[i].vd_len;
227 assert(vid[i].vd_flags & VRING_DESC_F_NEXT);
228 assert((vid[i].vd_flags & VRING_DESC_F_INDIRECT) == 0);
231 * - write op implies read-only descriptor,
232 * - read op implies write-only descriptor,
233 * therefore test the inverse of the descriptor bit
236 assert(((vid[i].vd_flags & VRING_DESC_F_WRITE) == 0) ==
240 /* Lastly, get the address of the status byte */
241 status = paddr_guest2host(vid[nsegs - 1].vd_addr);
242 assert(vid[nsegs - 1].vd_len == 1);
243 assert((vid[nsegs - 1].vd_flags & VRING_DESC_F_NEXT) == 0);
244 assert(vid[nsegs - 1].vd_flags & VRING_DESC_F_WRITE);
246 DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
247 writeop ? "write" : "read", iolen, nsegs - 2, offset));
250 err = pwritev(sc->vbsc_fd, iov, nsegs - 2, offset);
252 err = preadv(sc->vbsc_fd, iov, nsegs - 2, offset);
255 *status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK;
258 * Return the single indirect descriptor back to the host
260 vu = &hq->hq_used_ring[uidx % hq->hq_size];
264 *hq->hq_used_idx += 1;
268 pci_vtblk_qnotify(struct pci_vtblk_softc *sc)
270 struct vring_hqueue *hq = &sc->vbsc_q;
275 * Calculate number of ring entries to process
277 ndescs = hq_num_avail(hq);
283 * Run through all the entries, placing them into iovecs and
284 * sending when an end-of-packet is found
286 for (i = 0; i < ndescs; i++)
287 pci_vtblk_proc(sc, hq);
290 * Generate an interrupt if able
292 if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0 &&
295 pci_generate_msi(sc->vbsc_pi, 0);
301 pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn)
303 struct vring_hqueue *hq;
305 sc->vbsc_pfn = pfn << VRING_PFN;
308 * Set up host pointers to the various parts of the
312 hq->hq_size = VTBLK_RINGSZ;
314 hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN);
315 hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size);
316 hq->hq_avail_idx = hq->hq_avail_flags + 1;
317 hq->hq_avail_ring = hq->hq_avail_flags + 2;
318 hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
320 hq->hq_used_idx = hq->hq_used_flags + 1;
321 hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
324 * Initialize queue indexes
330 pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
333 struct pci_vtblk_softc *sc;
337 printf("virtio-block: backing device required\n");
342 * Access to guest memory is required. Fail if
345 if (paddr_guest2host(0) == NULL)
349 * The supplied backing file has to exist
351 fd = open(opts, O_RDWR);
353 perror("Could not open backing file");
357 if (fstat(fd, &sbuf) < 0) {
358 perror("Could not stat backing file");
363 sc = malloc(sizeof(struct pci_vtblk_softc));
364 memset(sc, 0, sizeof(struct pci_vtblk_softc));
370 /* setup virtio block config space */
371 sc->vbsc_cfg.vbc_capacity = sbuf.st_size / DEV_BSIZE;
372 sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
373 sc->vbsc_cfg.vbc_blk_size = DEV_BSIZE;
374 sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
375 sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */
376 sc->vbsc_cfg.vbc_geom_h = 0;
377 sc->vbsc_cfg.vbc_geom_s = 0;
378 sc->vbsc_cfg.vbc_sectors_max = 0;
380 /* initialize config space */
381 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
382 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
383 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
384 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
385 pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTBLK_REGSZ);
386 pci_emul_add_msicap(pi, 1);
392 pci_vtblk_write(struct pci_devinst *pi, int baridx, int offset, int size,
395 struct pci_vtblk_softc *sc = pi->pi_arg;
397 if (offset + size > VTBLK_REGSZ) {
398 DPRINTF(("vtblk_write: 2big, offset %d size %d\n",
404 case VTCFG_R_GUESTCAP:
406 sc->vbsc_features = value & VTBLK_S_HOSTCAPS;
410 pci_vtblk_ring_init(sc, value);
414 sc->vbsc_lastq = value;
416 case VTCFG_R_QNOTIFY:
419 pci_vtblk_qnotify(sc);
423 pci_vtblk_update_status(sc, value);
425 case VTCFG_R_HOSTCAP:
428 case VTBLK_R_CFG ... VTBLK_R_CFG_END:
429 DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
432 DPRINTF(("vtblk: unknown i/o write offset %d\n\r", offset));
439 pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size)
441 struct pci_vtblk_softc *sc = pi->pi_arg;
444 if (offset + size > VTBLK_REGSZ) {
445 DPRINTF(("vtblk_read: 2big, offset %d size %d\n",
451 case VTCFG_R_HOSTCAP:
453 value = VTBLK_S_HOSTCAPS;
455 case VTCFG_R_GUESTCAP:
457 value = sc->vbsc_features; /* XXX never read ? */
461 value = sc->vbsc_pfn >> VRING_PFN;
464 value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0;
468 value = sc->vbsc_lastq; /* XXX never read ? */
470 case VTCFG_R_QNOTIFY:
472 value = 0; /* XXX never read ? */
476 value = sc->vbsc_status;
480 value = sc->vbsc_isr;
481 sc->vbsc_isr = 0; /* a read clears this flag */
483 case VTBLK_R_CFG ... VTBLK_R_CFG_END:
485 value = *((uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG);
488 DPRINTF(("vtblk: unknown i/o read offset %d\n\r", offset));
496 struct pci_devemu pci_de_vblk = {
497 .pe_emu = "virtio-blk",
498 .pe_init = pci_vtblk_init,
499 .pe_iow = pci_vtblk_write,
500 .pe_ior = pci_vtblk_read,
502 PCI_EMUL_SET(pci_de_vblk);