2 * XenBSD block device driver
4 * Copyright (c) 2009 Scott Long, Yahoo!
5 * Copyright (c) 2009 Frank Suchomel, Citrix
6 * Copyright (c) 2009 Doug F. Rabson, Citrix
7 * Copyright (c) 2005 Kip Macy
8 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
9 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this software and associated documentation files (the "Software"), to
14 * deal in the Software without restriction, including without limitation the
15 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
16 * sell copies of the Software, and to permit persons to whom the Software is
17 * furnished to do so, subject to the following conditions:
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 * DEALINGS IN THE SOFTWARE.
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/kernel.h>
43 #include <sys/module.h>
45 #include <machine/bus.h>
47 #include <machine/resource.h>
48 #include <machine/intr_machdep.h>
49 #include <machine/vmparam.h>
50 #include <sys/bus_dma.h>
52 #include <machine/_inttypes.h>
53 #include <machine/xen/xen-os.h>
54 #include <machine/xen/xenvar.h>
55 #include <machine/xen/xenfunc.h>
57 #include <xen/hypervisor.h>
58 #include <xen/xen_intr.h>
59 #include <xen/evtchn.h>
60 #include <xen/gnttab.h>
61 #include <xen/interface/grant_table.h>
62 #include <xen/interface/io/protocols.h>
63 #include <xen/xenbus/xenbusvar.h>
65 #include <geom/geom_disk.h>
67 #include <dev/xen/blkfront/block.h>
69 #include "xenbus_if.h"
72 static void xb_free_command(struct xb_command *cm);
73 static void xb_startio(struct xb_softc *sc);
74 static void blkfront_connect(struct xb_softc *);
75 static void blkfront_closing(device_t);
76 static int blkfront_detach(device_t);
77 static int setup_blkring(struct xb_softc *);
78 static void blkif_int(void *);
79 static void blkfront_initialize(struct xb_softc *);
80 static int blkif_completion(struct xb_command *);
81 static void blkif_free(struct xb_softc *);
82 static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int);
84 MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data");
86 #define GRANT_INVALID_REF 0
88 /* Control whether runtime update of vbds is enabled. */
89 #define ENABLE_VBD_UPDATE 0
92 static void vbd_update(void);
95 #define BLKIF_STATE_DISCONNECTED 0
96 #define BLKIF_STATE_CONNECTED 1
97 #define BLKIF_STATE_SUSPENDED 2
100 static char *blkif_state_name[] = {
101 [BLKIF_STATE_DISCONNECTED] = "disconnected",
102 [BLKIF_STATE_CONNECTED] = "connected",
103 [BLKIF_STATE_SUSPENDED] = "closed",
106 static char * blkif_status_name[] = {
107 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
108 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
109 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
110 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
115 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
117 #define DPRINTK(fmt, args...)
120 static int blkif_open(struct disk *dp);
121 static int blkif_close(struct disk *dp);
122 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
123 static int blkif_queue_request(struct xb_softc *sc, struct xb_command *cm);
124 static void xb_strategy(struct bio *bp);
126 // In order to quiesce the device during kernel dumps, outstanding requests to
127 // DOM0 for disk reads/writes need to be accounted for.
128 static int xb_dump(void *, void *, vm_offset_t, off_t, size_t);
130 /* XXX move to xb_vbd.c when VBD update support is added */
133 #define XBD_SECTOR_SIZE 512 /* XXX: assume for now */
134 #define XBD_SECTOR_SHFT 9
137 * Translate Linux major/minor to an appropriate name and unit
138 * number. For HVM guests, this allows us to use the same drive names
139 * with blkfront as the emulated drives, easing transition slightly.
142 blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
144 static struct vdev_info {
150 {3, 6, 0, "ad"}, /* ide0 */
151 {22, 6, 2, "ad"}, /* ide1 */
152 {33, 6, 4, "ad"}, /* ide2 */
153 {34, 6, 6, "ad"}, /* ide3 */
154 {56, 6, 8, "ad"}, /* ide4 */
155 {57, 6, 10, "ad"}, /* ide5 */
156 {88, 6, 12, "ad"}, /* ide6 */
157 {89, 6, 14, "ad"}, /* ide7 */
158 {90, 6, 16, "ad"}, /* ide8 */
159 {91, 6, 18, "ad"}, /* ide9 */
161 {8, 4, 0, "da"}, /* scsi disk0 */
162 {65, 4, 16, "da"}, /* scsi disk1 */
163 {66, 4, 32, "da"}, /* scsi disk2 */
164 {67, 4, 48, "da"}, /* scsi disk3 */
165 {68, 4, 64, "da"}, /* scsi disk4 */
166 {69, 4, 80, "da"}, /* scsi disk5 */
167 {70, 4, 96, "da"}, /* scsi disk6 */
168 {71, 4, 112, "da"}, /* scsi disk7 */
169 {128, 4, 128, "da"}, /* scsi disk8 */
170 {129, 4, 144, "da"}, /* scsi disk9 */
171 {130, 4, 160, "da"}, /* scsi disk10 */
172 {131, 4, 176, "da"}, /* scsi disk11 */
173 {132, 4, 192, "da"}, /* scsi disk12 */
174 {133, 4, 208, "da"}, /* scsi disk13 */
175 {134, 4, 224, "da"}, /* scsi disk14 */
176 {135, 4, 240, "da"}, /* scsi disk15 */
178 {202, 4, 0, "xbd"}, /* xbd */
182 int major = vdevice >> 8;
183 int minor = vdevice & 0xff;
186 if (vdevice & (1 << 28)) {
187 *unit = (vdevice & ((1 << 28) - 1)) >> 8;
191 for (i = 0; info[i].major; i++) {
192 if (info[i].major == major) {
193 *unit = info[i].base + (minor >> info[i].shift);
194 *name = info[i].name;
204 xlvbd_add(struct xb_softc *sc, blkif_sector_t sectors,
205 int vdevice, uint16_t vdisk_info, unsigned long sector_size)
210 blkfront_vdevice_to_unit(vdevice, &unit, &name);
214 if (strcmp(name, "xbd"))
215 device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit);
217 sc->xb_disk = disk_alloc();
218 sc->xb_disk->d_unit = sc->xb_unit;
219 sc->xb_disk->d_open = blkif_open;
220 sc->xb_disk->d_close = blkif_close;
221 sc->xb_disk->d_ioctl = blkif_ioctl;
222 sc->xb_disk->d_strategy = xb_strategy;
223 sc->xb_disk->d_dump = xb_dump;
224 sc->xb_disk->d_name = name;
225 sc->xb_disk->d_drv1 = sc;
226 sc->xb_disk->d_sectorsize = sector_size;
228 sc->xb_disk->d_mediasize = sectors * sector_size;
229 sc->xb_disk->d_maxsize = sc->max_request_size;
230 sc->xb_disk->d_flags = 0;
231 disk_create(sc->xb_disk, DISK_VERSION_00);
236 /************************ end VBD support *****************/
239 * Read/write routine for a buffer. Finds the proper unit, place it on
240 * the sortq and kick the controller.
243 xb_strategy(struct bio *bp)
245 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
249 bp->bio_error = EINVAL;
250 bp->bio_flags |= BIO_ERROR;
251 bp->bio_resid = bp->bio_bcount;
257 * Place it in the queue of disk activities for this disk
259 mtx_lock(&sc->xb_io_lock);
261 xb_enqueue_bio(sc, bp);
264 mtx_unlock(&sc->xb_io_lock);
269 xb_bio_complete(struct xb_softc *sc, struct xb_command *cm)
275 if ( unlikely(cm->status != BLKIF_RSP_OKAY) ) {
276 disk_err(bp, "disk error" , -1, 0);
277 printf(" status: %x\n", cm->status);
278 bp->bio_flags |= BIO_ERROR;
281 if (bp->bio_flags & BIO_ERROR)
290 // Quiesce the disk writes for a dump file before allowing the next buffer.
292 xb_quiesce(struct xb_softc *sc)
296 // While there are outstanding requests
297 while (!TAILQ_EMPTY(&sc->cm_busy)) {
298 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, mtd);
300 /* Recieved request completions, update queue. */
303 if (!TAILQ_EMPTY(&sc->cm_busy)) {
305 * Still pending requests, wait for the disk i/o
313 /* Kernel dump function for a paravirtualized disk device */
315 xb_dump_complete(struct xb_command *cm)
318 xb_enqueue_complete(cm);
322 xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
325 struct disk *dp = arg;
326 struct xb_softc *sc = (struct xb_softc *) dp->d_drv1;
327 struct xb_command *cm;
335 xb_quiesce(sc); /* All quiet on the western front. */
338 * If this lock is held, then this module is failing, and a
339 * successful kernel dump is highly unlikely anyway.
341 mtx_lock(&sc->xb_io_lock);
343 /* Split the 64KB block as needed */
344 for (sbp=0; length > 0; sbp++) {
345 cm = xb_dequeue_free(sc);
347 mtx_unlock(&sc->xb_io_lock);
348 device_printf(sc->xb_dev, "dump: no more commands?\n");
352 if (gnttab_alloc_grant_references(sc->max_request_segments,
353 &cm->gref_head) != 0) {
355 mtx_unlock(&sc->xb_io_lock);
356 device_printf(sc->xb_dev, "no more grant allocs?\n");
360 chunk = length > sc->max_request_size
361 ? sc->max_request_size : length;
364 cm->operation = BLKIF_OP_WRITE;
365 cm->sector_number = offset / dp->d_sectorsize;
366 cm->cm_complete = xb_dump_complete;
368 xb_enqueue_ready(cm);
372 virtual = (char *) virtual + chunk;
375 /* Tell DOM0 to do the I/O */
377 mtx_unlock(&sc->xb_io_lock);
379 /* Poll for the completion. */
380 xb_quiesce(sc); /* All quite on the eastern front */
382 /* If there were any errors, bail out... */
383 while ((cm = xb_dequeue_complete(sc)) != NULL) {
384 if (cm->status != BLKIF_RSP_OKAY) {
385 device_printf(sc->xb_dev,
386 "Dump I/O failed at sector %jd\n",
398 blkfront_probe(device_t dev)
401 if (!strcmp(xenbus_get_type(dev), "vbd")) {
402 device_set_desc(dev, "Virtual Block Device");
411 * Setup supplies the backend dir, virtual device. We place an event
412 * channel and shared frame entries. We watch backend to wait if it's
416 blkfront_attach(device_t dev)
425 /* FIXME: Use dynamic device id if this is not set. */
426 error = xs_scanf(XST_NIL, xenbus_get_node(dev),
427 "virtual-device", NULL, "%i", &vdevice);
429 xenbus_dev_fatal(dev, error, "reading virtual-device");
430 device_printf(dev, "Couldn't determine virtual device.\n");
434 blkfront_vdevice_to_unit(vdevice, &unit, &name);
435 if (!strcmp(name, "xbd"))
436 device_set_unit(dev, unit);
438 sc = device_get_softc(dev);
439 mtx_init(&sc->xb_io_lock, "blkfront i/o lock", NULL, MTX_DEF);
443 xb_initq_complete(sc);
445 for (i = 0; i < XBF_MAX_RING_PAGES; i++)
446 sc->ring_ref[i] = GRANT_INVALID_REF;
449 sc->vdevice = vdevice;
450 sc->connected = BLKIF_STATE_DISCONNECTED;
452 /* Wait for backend device to publish its protocol capabilities. */
453 xenbus_set_state(dev, XenbusStateInitialising);
459 blkfront_suspend(device_t dev)
461 struct xb_softc *sc = device_get_softc(dev);
465 /* Prevent new requests being issued until we fix things up. */
466 mtx_lock(&sc->xb_io_lock);
467 saved_state = sc->connected;
468 sc->connected = BLKIF_STATE_SUSPENDED;
470 /* Wait for outstanding I/O to drain. */
472 while (TAILQ_EMPTY(&sc->cm_busy) == 0) {
473 if (msleep(&sc->cm_busy, &sc->xb_io_lock,
474 PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
479 mtx_unlock(&sc->xb_io_lock);
482 sc->connected = saved_state;
488 blkfront_resume(device_t dev)
490 struct xb_softc *sc = device_get_softc(dev);
492 DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
495 blkfront_initialize(sc);
500 blkfront_initialize(struct xb_softc *sc)
502 const char *otherend_path;
503 const char *node_path;
507 if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising) {
508 /* Initialization has already been performed. */
513 * Protocol defaults valid even if negotiation for a
517 sc->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE);
518 sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
519 sc->max_request_size = (sc->max_request_segments - 1) * PAGE_SIZE;
520 sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
523 * Protocol negotiation.
525 * \note xs_gather() returns on the first encountered error, so
526 * we must use independant calls in order to guarantee
527 * we don't miss information in a sparsly populated back-end
530 otherend_path = xenbus_get_otherend_path(sc->xb_dev);
531 node_path = xenbus_get_node(sc->xb_dev);
532 (void)xs_scanf(XST_NIL, otherend_path,
533 "max-ring-pages", NULL, "%" PRIu32,
536 (void)xs_scanf(XST_NIL, otherend_path,
537 "max-requests", NULL, "%" PRIu32,
540 (void)xs_scanf(XST_NIL, otherend_path,
541 "max-request-segments", NULL, "%" PRIu32,
542 &sc->max_request_segments);
544 (void)xs_scanf(XST_NIL, otherend_path,
545 "max-request-size", NULL, "%" PRIu32,
546 &sc->max_request_size);
548 if (sc->ring_pages > XBF_MAX_RING_PAGES) {
549 device_printf(sc->xb_dev, "Back-end specified ring-pages of "
550 "%u limited to front-end limit of %zu.\n",
551 sc->ring_pages, XBF_MAX_RING_PAGES);
552 sc->ring_pages = XBF_MAX_RING_PAGES;
555 if (sc->max_requests > XBF_MAX_REQUESTS) {
556 device_printf(sc->xb_dev, "Back-end specified max_requests of "
557 "%u limited to front-end limit of %u.\n",
558 sc->max_requests, XBF_MAX_REQUESTS);
559 sc->max_requests = XBF_MAX_REQUESTS;
562 if (sc->max_request_segments > XBF_MAX_SEGMENTS_PER_REQUEST) {
563 device_printf(sc->xb_dev, "Back-end specificed "
564 "max_requests_segments of %u limited to "
565 "front-end limit of %u.\n",
566 sc->max_request_segments,
567 XBF_MAX_SEGMENTS_PER_REQUEST);
568 sc->max_request_segments = XBF_MAX_SEGMENTS_PER_REQUEST;
571 if (sc->max_request_size > XBF_MAX_REQUEST_SIZE) {
572 device_printf(sc->xb_dev, "Back-end specificed "
573 "max_request_size of %u limited to front-end "
574 "limit of %u.\n", sc->max_request_size,
575 XBF_MAX_REQUEST_SIZE);
576 sc->max_request_size = XBF_MAX_REQUEST_SIZE;
578 sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
580 /* Allocate datastructures based on negotiated values. */
581 error = bus_dma_tag_create(NULL, /* parent */
582 512, PAGE_SIZE, /* algnmnt, boundary */
583 BUS_SPACE_MAXADDR, /* lowaddr */
584 BUS_SPACE_MAXADDR, /* highaddr */
585 NULL, NULL, /* filter, filterarg */
586 sc->max_request_size,
587 sc->max_request_segments,
588 PAGE_SIZE, /* maxsegsize */
589 BUS_DMA_ALLOCNOW, /* flags */
590 busdma_lock_mutex, /* lockfunc */
591 &sc->xb_io_lock, /* lockarg */
594 xenbus_dev_fatal(sc->xb_dev, error,
595 "Cannot allocate parent DMA tag\n");
599 /* Per-transaction data allocation. */
600 sc->shadow = malloc(sizeof(*sc->shadow) * sc->max_requests,
601 M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
602 if (sc->shadow == NULL) {
603 bus_dma_tag_destroy(sc->xb_io_dmat);
604 xenbus_dev_fatal(sc->xb_dev, error,
605 "Cannot allocate request structures\n");
609 for (i = 0; i < sc->max_requests; i++) {
610 struct xb_command *cm;
613 cm->sg_refs = malloc(sizeof(grant_ref_t)
614 * sc->max_request_segments,
615 M_XENBLOCKFRONT, M_NOWAIT);
616 if (cm->sg_refs == NULL)
620 if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0)
625 if (setup_blkring(sc) != 0)
628 error = xs_printf(XST_NIL, node_path,
629 "ring-pages","%u", sc->ring_pages);
631 xenbus_dev_fatal(sc->xb_dev, error,
632 "writing %s/ring-pages",
637 error = xs_printf(XST_NIL, node_path,
638 "max-requests","%u", sc->max_requests);
640 xenbus_dev_fatal(sc->xb_dev, error,
641 "writing %s/max-requests",
646 error = xs_printf(XST_NIL, node_path,
647 "max-request-segments","%u", sc->max_request_segments);
649 xenbus_dev_fatal(sc->xb_dev, error,
650 "writing %s/max-request-segments",
655 error = xs_printf(XST_NIL, node_path,
656 "max-request-size","%u", sc->max_request_size);
658 xenbus_dev_fatal(sc->xb_dev, error,
659 "writing %s/max-request-size",
664 error = xs_printf(XST_NIL, node_path, "event-channel",
665 "%u", irq_to_evtchn_port(sc->irq));
667 xenbus_dev_fatal(sc->xb_dev, error,
668 "writing %s/event-channel",
673 error = xs_printf(XST_NIL, node_path,
674 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
676 xenbus_dev_fatal(sc->xb_dev, error,
677 "writing %s/protocol",
682 xenbus_set_state(sc->xb_dev, XenbusStateInitialised);
686 setup_blkring(struct xb_softc *sc)
688 blkif_sring_t *sring;
689 uintptr_t sring_page_addr;
693 sring = malloc(sc->ring_pages * PAGE_SIZE, M_XENBLOCKFRONT,
696 xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring");
699 SHARED_RING_INIT(sring);
700 FRONT_RING_INIT(&sc->ring, sring, sc->ring_pages * PAGE_SIZE);
702 for (i = 0, sring_page_addr = (uintptr_t)sring;
704 i++, sring_page_addr += PAGE_SIZE) {
706 error = xenbus_grant_ring(sc->xb_dev,
707 (vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->ring_ref[i]);
709 xenbus_dev_fatal(sc->xb_dev, error,
710 "granting ring_ref(%d)", i);
714 error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
715 "ring-ref","%u", sc->ring_ref[0]);
717 xenbus_dev_fatal(sc->xb_dev, error, "writing %s/ring-ref",
718 xenbus_get_node(sc->xb_dev));
721 for (i = 1; i < sc->ring_pages; i++) {
722 char ring_ref_name[]= "ring_refXX";
724 snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i);
725 error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
726 ring_ref_name, "%u", sc->ring_ref[i]);
728 xenbus_dev_fatal(sc->xb_dev, error, "writing %s/%s",
729 xenbus_get_node(sc->xb_dev),
735 error = bind_listening_port_to_irqhandler(
736 xenbus_get_otherend_id(sc->xb_dev),
737 "xbd", (driver_intr_t *)blkif_int, sc,
738 INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq);
740 xenbus_dev_fatal(sc->xb_dev, error,
741 "bind_evtchn_to_irqhandler failed");
749 * Callback received when the backend's state changes.
752 blkfront_backend_changed(device_t dev, XenbusState backend_state)
754 struct xb_softc *sc = device_get_softc(dev);
756 DPRINTK("backend_state=%d\n", backend_state);
758 switch (backend_state) {
759 case XenbusStateUnknown:
760 case XenbusStateInitialising:
761 case XenbusStateReconfigured:
762 case XenbusStateReconfiguring:
763 case XenbusStateClosed:
766 case XenbusStateInitWait:
767 case XenbusStateInitialised:
768 blkfront_initialize(sc);
771 case XenbusStateConnected:
772 blkfront_initialize(sc);
773 blkfront_connect(sc);
776 case XenbusStateClosing:
778 xenbus_dev_error(dev, -EBUSY,
779 "Device in use; refusing to close");
781 blkfront_closing(dev);
787 ** Invoked when the backend is finally 'ready' (and has published
788 ** the details about the physical device - #sectors, size, etc).
791 blkfront_connect(struct xb_softc *sc)
793 device_t dev = sc->xb_dev;
794 unsigned long sectors, sector_size;
796 int err, feature_barrier;
798 if( (sc->connected == BLKIF_STATE_CONNECTED) ||
799 (sc->connected == BLKIF_STATE_SUSPENDED) )
802 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
804 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
805 "sectors", "%lu", §ors,
806 "info", "%u", &binfo,
807 "sector-size", "%lu", §or_size,
810 xenbus_dev_fatal(dev, err,
811 "reading backend fields at %s",
812 xenbus_get_otherend_path(dev));
815 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
816 "feature-barrier", "%lu", &feature_barrier,
818 if (!err || feature_barrier)
819 sc->xb_flags |= XB_BARRIER;
821 if (sc->xb_disk == NULL) {
822 device_printf(dev, "%juMB <%s> at %s",
823 (uintmax_t) sectors / (1048576 / sector_size),
824 device_get_desc(dev),
825 xenbus_get_node(dev));
826 bus_print_child_footer(device_get_parent(dev), dev);
828 xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size);
831 (void)xenbus_set_state(dev, XenbusStateConnected);
833 /* Kick pending requests. */
834 mtx_lock(&sc->xb_io_lock);
835 sc->connected = BLKIF_STATE_CONNECTED;
837 sc->xb_flags |= XB_READY;
838 mtx_unlock(&sc->xb_io_lock);
842 * Handle the change of state of the backend to Closing. We must delete our
843 * device-layer structures now, to ensure that writes are flushed through to
844 * the backend. Once this is done, we can switch to Closed in
848 blkfront_closing(device_t dev)
850 struct xb_softc *sc = device_get_softc(dev);
852 xenbus_set_state(dev, XenbusStateClosing);
854 DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
856 if (sc->xb_disk != NULL) {
857 disk_destroy(sc->xb_disk);
861 xenbus_set_state(dev, XenbusStateClosed);
866 blkfront_detach(device_t dev)
868 struct xb_softc *sc = device_get_softc(dev);
870 DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
873 mtx_destroy(&sc->xb_io_lock);
880 flush_requests(struct xb_softc *sc)
884 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify);
887 notify_remote_via_irq(sc->irq);
891 blkif_restart_queue_callback(void *arg)
893 struct xb_softc *sc = arg;
895 mtx_lock(&sc->xb_io_lock);
899 mtx_unlock(&sc->xb_io_lock);
903 blkif_open(struct disk *dp)
905 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
908 printf("xb%d: not found", sc->xb_unit);
912 sc->xb_flags |= XB_OPEN;
918 blkif_close(struct disk *dp)
920 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
924 sc->xb_flags &= ~XB_OPEN;
925 if (--(sc->users) == 0) {
926 /* Check whether we have been instructed to close. We will
927 have ignored this request initially, as the device was
929 device_t dev = sc->xb_dev;
931 xenbus_read_driver_state(xenbus_get_otherend_path(dev));
933 if (state == XenbusStateClosing)
934 blkfront_closing(dev);
940 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
942 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
951 xb_free_command(struct xb_command *cm)
954 KASSERT((cm->cm_flags & XB_ON_XBQ_MASK) == 0,
955 ("Freeing command that is still on a queue\n"));
959 cm->cm_complete = NULL;
964 * blkif_queue_request
968 * id: for guest use only.
969 * operation: BLKIF_OP_{READ,WRITE,PROBE}
970 * buffer: buffer to read/write into. this should be a
971 * virtual address in the guest os.
973 static struct xb_command *
974 xb_bio_command(struct xb_softc *sc)
976 struct xb_command *cm;
979 if (unlikely(sc->connected != BLKIF_STATE_CONNECTED))
982 bp = xb_dequeue_bio(sc);
986 if ((cm = xb_dequeue_free(sc)) == NULL) {
987 xb_requeue_bio(sc, bp);
991 if (gnttab_alloc_grant_references(sc->max_request_segments,
992 &cm->gref_head) != 0) {
993 gnttab_request_free_callback(&sc->callback,
994 blkif_restart_queue_callback, sc,
995 sc->max_request_segments);
996 xb_requeue_bio(sc, bp);
998 sc->xb_flags |= XB_FROZEN;
1003 cm->data = bp->bio_data;
1004 cm->datalen = bp->bio_bcount;
1005 cm->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
1007 cm->sector_number = (blkif_sector_t)bp->bio_pblkno;
1013 blkif_queue_request(struct xb_softc *sc, struct xb_command *cm)
1017 error = bus_dmamap_load(sc->xb_io_dmat, cm->map, cm->data, cm->datalen,
1018 blkif_queue_cb, cm, 0);
1019 if (error == EINPROGRESS) {
1020 printf("EINPROGRESS\n");
1021 sc->xb_flags |= XB_FROZEN;
1022 cm->cm_flags |= XB_CMD_FROZEN;
1030 blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1032 struct xb_softc *sc;
1033 struct xb_command *cm;
1034 blkif_request_t *ring_req;
1035 struct blkif_request_segment *sg;
1036 struct blkif_request_segment *last_block_sg;
1037 grant_ref_t *sg_ref;
1038 vm_paddr_t buffer_ma;
1039 uint64_t fsect, lsect;
1047 //printf("%s: Start\n", __func__);
1049 printf("error %d in blkif_queue_cb\n", error);
1050 cm->bp->bio_error = EIO;
1052 xb_free_command(cm);
1056 /* Fill out a communications ring structure. */
1057 ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt);
1058 sc->ring.req_prod_pvt++;
1059 ring_req->id = cm->id;
1060 ring_req->operation = cm->operation;
1061 ring_req->sector_number = cm->sector_number;
1062 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
1063 ring_req->nr_segments = nsegs;
1066 block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
1068 last_block_sg = sg + block_segs;
1069 sg_ref = cm->sg_refs;
1073 while (sg < last_block_sg) {
1074 buffer_ma = segs->ds_addr;
1075 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
1076 lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1;
1078 KASSERT(lsect <= 7, ("XEN disk driver data cannot "
1079 "cross a page boundary"));
1081 /* install a grant reference. */
1082 ref = gnttab_claim_grant_reference(&cm->gref_head);
1085 * GNTTAB_LIST_END == 0xffffffff, but it is private
1088 KASSERT(ref != ~0, ("grant_reference failed"));
1090 gnttab_grant_foreign_access_ref(
1092 xenbus_get_otherend_id(sc->xb_dev),
1093 buffer_ma >> PAGE_SHIFT,
1094 ring_req->operation == BLKIF_OP_WRITE);
1097 *sg = (struct blkif_request_segment) {
1099 .first_sect = fsect,
1100 .last_sect = lsect };
1106 block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
1107 if (block_segs == 0)
1110 sg = BLKRING_GET_SG_REQUEST(&sc->ring, sc->ring.req_prod_pvt);
1111 sc->ring.req_prod_pvt++;
1112 last_block_sg = sg + block_segs;
1115 if (cm->operation == BLKIF_OP_READ)
1116 op = BUS_DMASYNC_PREREAD;
1117 else if (cm->operation == BLKIF_OP_WRITE)
1118 op = BUS_DMASYNC_PREWRITE;
1121 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
1123 gnttab_free_grant_references(cm->gref_head);
1125 xb_enqueue_busy(cm);
1128 * This flag means that we're probably executing in the busdma swi
1129 * instead of in the startio context, so an explicit flush is needed.
1131 if (cm->cm_flags & XB_CMD_FROZEN)
1134 //printf("%s: Done\n", __func__);
1139 * Dequeue buffers and place them in the shared communication ring.
1140 * Return when no more requests can be accepted or all buffers have
1143 * Signal XEN once the ring has been filled out.
1146 xb_startio(struct xb_softc *sc)
1148 struct xb_command *cm;
1149 int error, queued = 0;
1151 mtx_assert(&sc->xb_io_lock, MA_OWNED);
1153 if (sc->connected != BLKIF_STATE_CONNECTED)
1156 while (RING_FREE_REQUESTS(&sc->ring) >= sc->max_request_blocks) {
1157 if (sc->xb_flags & XB_FROZEN)
1160 cm = xb_dequeue_ready(sc);
1163 cm = xb_bio_command(sc);
1168 if ((error = blkif_queue_request(sc, cm)) != 0) {
1169 printf("blkif_queue_request returned %d\n", error);
1180 blkif_int(void *xsc)
1182 struct xb_softc *sc = xsc;
1183 struct xb_command *cm;
1184 blkif_response_t *bret;
1188 mtx_lock(&sc->xb_io_lock);
1190 if (unlikely(sc->connected == BLKIF_STATE_DISCONNECTED)) {
1191 mtx_unlock(&sc->xb_io_lock);
1196 rp = sc->ring.sring->rsp_prod;
1197 rmb(); /* Ensure we see queued responses up to 'rp'. */
1199 for (i = sc->ring.rsp_cons; i != rp;) {
1200 bret = RING_GET_RESPONSE(&sc->ring, i);
1201 cm = &sc->shadow[bret->id];
1204 i += blkif_completion(cm);
1206 if (cm->operation == BLKIF_OP_READ)
1207 op = BUS_DMASYNC_POSTREAD;
1208 else if (cm->operation == BLKIF_OP_WRITE)
1209 op = BUS_DMASYNC_POSTWRITE;
1212 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
1213 bus_dmamap_unload(sc->xb_io_dmat, cm->map);
1216 * If commands are completing then resources are probably
1217 * being freed as well. It's a cheap assumption even when
1220 sc->xb_flags &= ~XB_FROZEN;
1223 * Directly call the i/o complete routine to save an
1224 * an indirection in the common case.
1226 cm->status = bret->status;
1228 xb_bio_complete(sc, cm);
1229 else if (cm->cm_complete)
1230 (cm->cm_complete)(cm);
1232 xb_free_command(cm);
1235 sc->ring.rsp_cons = i;
1237 if (i != sc->ring.req_prod_pvt) {
1239 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, more_to_do);
1243 sc->ring.sring->rsp_event = i + 1;
1248 if (unlikely(sc->connected == BLKIF_STATE_SUSPENDED))
1249 wakeup(&sc->cm_busy);
1251 mtx_unlock(&sc->xb_io_lock);
1255 blkif_free(struct xb_softc *sc)
1257 uint8_t *sring_page_ptr;
1260 /* Prevent new requests being issued until we fix things up. */
1261 mtx_lock(&sc->xb_io_lock);
1262 sc->connected = BLKIF_STATE_DISCONNECTED;
1263 mtx_unlock(&sc->xb_io_lock);
1265 /* Free resources associated with old device channel. */
1266 if (sc->ring.sring != NULL) {
1267 sring_page_ptr = (uint8_t *)sc->ring.sring;
1268 for (i = 0; i < sc->ring_pages; i++) {
1269 if (sc->ring_ref[i] != GRANT_INVALID_REF) {
1270 gnttab_end_foreign_access_ref(sc->ring_ref[i]);
1271 sc->ring_ref[i] = GRANT_INVALID_REF;
1273 sring_page_ptr += PAGE_SIZE;
1275 free(sc->ring.sring, M_XENBLOCKFRONT);
1276 sc->ring.sring = NULL;
1281 for (i = 0; i < sc->max_requests; i++) {
1282 struct xb_command *cm;
1284 cm = &sc->shadow[i];
1285 if (cm->sg_refs != NULL) {
1286 free(cm->sg_refs, M_XENBLOCKFRONT);
1290 bus_dmamap_destroy(sc->xb_io_dmat, cm->map);
1292 free(sc->shadow, M_XENBLOCKFRONT);
1295 bus_dma_tag_destroy(sc->xb_io_dmat);
1299 xb_initq_complete(sc);
1303 unbind_from_irqhandler(sc->irq);
1309 blkif_completion(struct xb_command *s)
1311 //printf("%s: Req %p(%d)\n", __func__, s, s->nseg);
1312 gnttab_end_foreign_access_references(s->nseg, s->sg_refs);
1313 return (BLKIF_SEGS_TO_BLOCKS(s->nseg));
1316 /* ** Driver registration ** */
1317 static device_method_t blkfront_methods[] = {
1318 /* Device interface */
1319 DEVMETHOD(device_probe, blkfront_probe),
1320 DEVMETHOD(device_attach, blkfront_attach),
1321 DEVMETHOD(device_detach, blkfront_detach),
1322 DEVMETHOD(device_shutdown, bus_generic_shutdown),
1323 DEVMETHOD(device_suspend, blkfront_suspend),
1324 DEVMETHOD(device_resume, blkfront_resume),
1326 /* Xenbus interface */
1327 DEVMETHOD(xenbus_otherend_changed, blkfront_backend_changed),
1332 static driver_t blkfront_driver = {
1335 sizeof(struct xb_softc),
1337 devclass_t blkfront_devclass;
1339 DRIVER_MODULE(xbd, xenbusb_front, blkfront_driver, blkfront_devclass, 0, 0);