4 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
5 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
6 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
7 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
8 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
11 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
12 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
13 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
19 * XenBSD block device driver
21 * Copyright (c) 2009 Frank Suchomel, Citrix
24 #include <sys/cdefs.h>
25 __FBSDID("$FreeBSD$");
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/malloc.h>
30 #include <sys/kernel.h>
37 #include <sys/module.h>
39 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <machine/intr_machdep.h>
43 #include <machine/vmparam.h>
45 #include <machine/xen/xen-os.h>
46 #include <machine/xen/xenfunc.h>
47 #include <xen/hypervisor.h>
48 #include <xen/xen_intr.h>
49 #include <xen/evtchn.h>
50 #include <xen/gnttab.h>
51 #include <xen/interface/grant_table.h>
52 #include <xen/interface/io/protocols.h>
53 #include <xen/xenbus/xenbusvar.h>
55 #include <geom/geom_disk.h>
57 #include <dev/xen/blkfront/block.h>
59 #include "xenbus_if.h"
61 #define ASSERT(S) KASSERT(S, (#S))
64 static void xb_startio(struct xb_softc *sc);
65 static void connect(device_t, struct blkfront_info *);
66 static void blkfront_closing(device_t);
67 static int blkfront_detach(device_t);
68 static int talk_to_backend(device_t, struct blkfront_info *);
69 static int setup_blkring(device_t, struct blkfront_info *);
70 static void blkif_int(void *);
72 static void blkif_restart_queue(void *arg);
74 static void blkif_recover(struct blkfront_info *);
75 static void blkif_completion(struct blk_shadow *);
76 static void blkif_free(struct blkfront_info *, int);
78 #define GRANT_INVALID_REF 0
79 #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
81 LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
83 /* Control whether runtime update of vbds is enabled. */
84 #define ENABLE_VBD_UPDATE 0
87 static void vbd_update(void);
91 #define BLKIF_STATE_DISCONNECTED 0
92 #define BLKIF_STATE_CONNECTED 1
93 #define BLKIF_STATE_SUSPENDED 2
96 static char *blkif_state_name[] = {
97 [BLKIF_STATE_DISCONNECTED] = "disconnected",
98 [BLKIF_STATE_CONNECTED] = "connected",
99 [BLKIF_STATE_SUSPENDED] = "closed",
102 static char * blkif_status_name[] = {
103 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
104 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
105 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
106 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
109 #define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
111 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
113 #define DPRINTK(fmt, args...)
116 static grant_ref_t gref_head;
117 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
118 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
120 static void kick_pending_request_queues(struct blkfront_info *);
121 static int blkif_open(struct disk *dp);
122 static int blkif_close(struct disk *dp);
123 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
124 static int blkif_queue_request(struct bio *bp);
125 static void xb_strategy(struct bio *bp);
127 // In order to quiesce the device during kernel dumps, outstanding requests to
128 // DOM0 for disk reads/writes need to be accounted for.
129 static int blkif_queued_requests;
130 static int xb_dump(void *, void *, vm_offset_t, off_t, size_t);
133 /* XXX move to xb_vbd.c when VBD update support is added */
136 #define XBD_SECTOR_SIZE 512 /* XXX: assume for now */
137 #define XBD_SECTOR_SHFT 9
139 static struct mtx blkif_io_lock;
142 pfn_to_mfn(vm_paddr_t pfn)
144 return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
148 * Translate Linux major/minor to an appropriate name and unit
149 * number. For HVM guests, this allows us to use the same drive names
150 * with blkfront as the emulated drives, easing transition slightly.
153 blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
155 static struct vdev_info {
161 {3, 6, 0, "ad"}, /* ide0 */
162 {22, 6, 2, "ad"}, /* ide1 */
163 {33, 6, 4, "ad"}, /* ide2 */
164 {34, 6, 6, "ad"}, /* ide3 */
165 {56, 6, 8, "ad"}, /* ide4 */
166 {57, 6, 10, "ad"}, /* ide5 */
167 {88, 6, 12, "ad"}, /* ide6 */
168 {89, 6, 14, "ad"}, /* ide7 */
169 {90, 6, 16, "ad"}, /* ide8 */
170 {91, 6, 18, "ad"}, /* ide9 */
172 {8, 4, 0, "da"}, /* scsi disk0 */
173 {65, 4, 16, "da"}, /* scsi disk1 */
174 {66, 4, 32, "da"}, /* scsi disk2 */
175 {67, 4, 48, "da"}, /* scsi disk3 */
176 {68, 4, 64, "da"}, /* scsi disk4 */
177 {69, 4, 80, "da"}, /* scsi disk5 */
178 {70, 4, 96, "da"}, /* scsi disk6 */
179 {71, 4, 112, "da"}, /* scsi disk7 */
180 {128, 4, 128, "da"}, /* scsi disk8 */
181 {129, 4, 144, "da"}, /* scsi disk9 */
182 {130, 4, 160, "da"}, /* scsi disk10 */
183 {131, 4, 176, "da"}, /* scsi disk11 */
184 {132, 4, 192, "da"}, /* scsi disk12 */
185 {133, 4, 208, "da"}, /* scsi disk13 */
186 {134, 4, 224, "da"}, /* scsi disk14 */
187 {135, 4, 240, "da"}, /* scsi disk15 */
189 {202, 4, 0, "xbd"}, /* xbd */
193 int major = vdevice >> 8;
194 int minor = vdevice & 0xff;
197 if (vdevice & (1 << 28)) {
198 *unit = (vdevice & ((1 << 28) - 1)) >> 8;
202 for (i = 0; info[i].major; i++) {
203 if (info[i].major == major) {
204 *unit = info[i].base + (minor >> info[i].shift);
205 *name = info[i].name;
215 xlvbd_add(device_t dev, blkif_sector_t capacity,
216 int vdevice, uint16_t vdisk_info, uint16_t sector_size,
217 struct blkfront_info *info)
223 blkfront_vdevice_to_unit(vdevice, &unit, &name);
225 sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
230 if (strcmp(name, "xbd"))
231 device_printf(dev, "attaching as %s%d\n", name, unit);
233 memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
234 sc->xb_disk = disk_alloc();
235 sc->xb_disk->d_unit = sc->xb_unit;
236 sc->xb_disk->d_open = blkif_open;
237 sc->xb_disk->d_close = blkif_close;
238 sc->xb_disk->d_ioctl = blkif_ioctl;
239 sc->xb_disk->d_strategy = xb_strategy;
240 sc->xb_disk->d_dump = xb_dump;
241 sc->xb_disk->d_name = name;
242 sc->xb_disk->d_drv1 = sc;
243 sc->xb_disk->d_sectorsize = sector_size;
246 sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
248 sc->xb_disk->d_maxsize = DFLTPHYS;
249 #else /* XXX: xen can't handle large single i/o requests */
250 sc->xb_disk->d_maxsize = 4096;
253 XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
254 xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
255 sc->xb_disk->d_mediasize);
257 sc->xb_disk->d_flags = 0;
258 disk_create(sc->xb_disk, DISK_VERSION_00);
259 bioq_init(&sc->xb_bioq);
265 xlvbd_del(struct blkfront_info *info)
270 disk_destroy(sc->xb_disk);
272 /************************ end VBD support *****************/
275 * Read/write routine for a buffer. Finds the proper unit, place it on
276 * the sortq and kick the controller.
279 xb_strategy(struct bio *bp)
281 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
285 bp->bio_error = EINVAL;
286 bp->bio_flags |= BIO_ERROR;
293 * Place it in the queue of disk activities for this disk
295 mtx_lock(&blkif_io_lock);
297 bioq_disksort(&sc->xb_bioq, bp);
300 mtx_unlock(&blkif_io_lock);
305 * Correctly set the bio to indicate a failed tranfer.
307 bp->bio_resid = bp->bio_bcount;
312 static void xb_quiesce(struct blkfront_info *info);
313 // Quiesce the disk writes for a dump file before allowing the next buffer.
315 xb_quiesce(struct blkfront_info *info)
319 // While there are outstanding requests
320 while (blkif_queued_requests) {
321 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, mtd);
323 // Recieved request completions, update queue.
326 if (blkif_queued_requests) {
327 // Still pending requests, wait for the disk i/o to complete
333 // Some bio structures for dumping core
334 #define DUMP_BIO_NO 16 // 16 * 4KB = 64KB dump block
335 static struct bio xb_dump_bp[DUMP_BIO_NO];
337 // Kernel dump function for a paravirtualized disk device
339 xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
345 struct disk *dp = arg;
346 struct xb_softc *sc = (struct xb_softc *) dp->d_drv1;
349 xb_quiesce(sc->xb_info); // All quiet on the western front.
351 // If this lock is held, then this module is failing, and a successful
352 // kernel dump is highly unlikely anyway.
353 mtx_lock(&blkif_io_lock);
354 // Split the 64KB block into 16 4KB blocks
355 for (sbp=0; length>0 && sbp<DUMP_BIO_NO; sbp++) {
356 chunk = length > PAGE_SIZE ? PAGE_SIZE : length;
357 xb_dump_bp[sbp].bio_disk = dp;
358 xb_dump_bp[sbp].bio_pblkno = offset / dp->d_sectorsize;
359 xb_dump_bp[sbp].bio_bcount = chunk;
360 xb_dump_bp[sbp].bio_resid = chunk;
361 xb_dump_bp[sbp].bio_data = virtual;
362 xb_dump_bp[sbp].bio_cmd = BIO_WRITE;
363 xb_dump_bp[sbp].bio_done = NULL;
365 bioq_disksort(&sc->xb_bioq, &xb_dump_bp[sbp]);
369 virtual = (char *) virtual + chunk;
371 // Tell DOM0 to do the I/O
373 mtx_unlock(&blkif_io_lock);
375 // Must wait for the completion: the dump routine reuses the same
376 // 16 x 4KB buffer space.
377 xb_quiesce(sc->xb_info); // All quite on the eastern front
378 // If there were any errors, bail out...
379 for (mbp=0; mbp<sbp; mbp++) {
380 if ((rc = xb_dump_bp[mbp].bio_error)) break;
388 blkfront_probe(device_t dev)
391 if (!strcmp(xenbus_get_type(dev), "vbd")) {
392 device_set_desc(dev, "Virtual Block Device");
401 * Setup supplies the backend dir, virtual device. We place an event
402 * channel and shared frame entries. We watch backend to wait if it's
406 blkfront_attach(device_t dev)
408 int error, vdevice, i, unit;
409 struct blkfront_info *info;
412 /* FIXME: Use dynamic device id if this is not set. */
413 error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
414 "virtual-device", NULL, "%i", &vdevice);
416 xenbus_dev_fatal(dev, error, "reading virtual-device");
417 printf("couldn't find virtual device");
421 blkfront_vdevice_to_unit(vdevice, &unit, &name);
422 if (!strcmp(name, "xbd"))
423 device_set_unit(dev, unit);
425 info = device_get_softc(dev);
430 for (i = 0; i < sizeof(*info); i++)
431 if (((uint8_t *)info)[i] != 0)
432 panic("non-null memory");
434 info->shadow_free = 0;
436 info->vdevice = vdevice;
437 info->connected = BLKIF_STATE_DISCONNECTED;
439 /* work queue needed ? */
440 for (i = 0; i < BLK_RING_SIZE; i++)
441 info->shadow[i].req.id = i+1;
442 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
444 /* Front end dir is a number, which is used as the id. */
445 info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
447 error = talk_to_backend(dev, info);
455 blkfront_suspend(device_t dev)
457 struct blkfront_info *info = device_get_softc(dev);
459 /* Prevent new requests being issued until we fix things up. */
460 mtx_lock(&blkif_io_lock);
461 info->connected = BLKIF_STATE_SUSPENDED;
462 mtx_unlock(&blkif_io_lock);
468 blkfront_resume(device_t dev)
470 struct blkfront_info *info = device_get_softc(dev);
473 DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
476 err = talk_to_backend(dev, info);
477 if (info->connected == BLKIF_STATE_SUSPENDED && !err)
483 /* Common code used when first setting up, and when resuming. */
485 talk_to_backend(device_t dev, struct blkfront_info *info)
487 const char *message = NULL;
488 struct xenbus_transaction xbt;
491 /* Create shared ring, alloc event channel. */
492 err = setup_blkring(dev, info);
497 err = xenbus_transaction_start(&xbt);
499 xenbus_dev_fatal(dev, err, "starting transaction");
500 goto destroy_blkring;
503 err = xenbus_printf(xbt, xenbus_get_node(dev),
504 "ring-ref","%u", info->ring_ref);
506 message = "writing ring-ref";
507 goto abort_transaction;
509 err = xenbus_printf(xbt, xenbus_get_node(dev),
510 "event-channel", "%u", irq_to_evtchn_port(info->irq));
512 message = "writing event-channel";
513 goto abort_transaction;
515 err = xenbus_printf(xbt, xenbus_get_node(dev),
516 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
518 message = "writing protocol";
519 goto abort_transaction;
522 err = xenbus_transaction_end(xbt, 0);
526 xenbus_dev_fatal(dev, err, "completing transaction");
527 goto destroy_blkring;
529 xenbus_set_state(dev, XenbusStateInitialised);
534 xenbus_transaction_end(xbt, 1);
536 xenbus_dev_fatal(dev, err, "%s", message);
544 setup_blkring(device_t dev, struct blkfront_info *info)
546 blkif_sring_t *sring;
549 info->ring_ref = GRANT_INVALID_REF;
551 sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
553 xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
556 SHARED_RING_INIT(sring);
557 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
559 error = xenbus_grant_ring(dev,
560 (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref);
562 free(sring, M_DEVBUF);
563 info->ring.sring = NULL;
567 error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
568 "xbd", (driver_intr_t *)blkif_int, info,
569 INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
571 xenbus_dev_fatal(dev, error,
572 "bind_evtchn_to_irqhandler failed");
584 * Callback received when the backend's state changes.
587 blkfront_backend_changed(device_t dev, XenbusState backend_state)
589 struct blkfront_info *info = device_get_softc(dev);
591 DPRINTK("backend_state=%d\n", backend_state);
593 switch (backend_state) {
594 case XenbusStateUnknown:
595 case XenbusStateInitialising:
596 case XenbusStateInitWait:
597 case XenbusStateInitialised:
598 case XenbusStateClosed:
599 case XenbusStateReconfigured:
600 case XenbusStateReconfiguring:
603 case XenbusStateConnected:
607 case XenbusStateClosing:
609 xenbus_dev_error(dev, -EBUSY,
610 "Device in use; refusing to close");
612 blkfront_closing(dev);
614 bd = bdget(info->dev);
616 xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
620 xenbus_dev_error(dev, -EBUSY,
621 "Device in use; refusing to close");
623 blkfront_closing(dev);
633 ** Invoked when the backend is finally 'ready' (and has told produced
634 ** the details about the physical device - #sectors, size, etc).
637 connect(device_t dev, struct blkfront_info *info)
639 unsigned long sectors, sector_size;
643 if( (info->connected == BLKIF_STATE_CONNECTED) ||
644 (info->connected == BLKIF_STATE_SUSPENDED) )
647 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
649 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
650 "sectors", "%lu", §ors,
651 "info", "%u", &binfo,
652 "sector-size", "%lu", §or_size,
655 xenbus_dev_fatal(dev, err,
656 "reading backend fields at %s",
657 xenbus_get_otherend_path(dev));
660 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
661 "feature-barrier", "%lu", &info->feature_barrier,
664 info->feature_barrier = 0;
666 device_printf(dev, "%juMB <%s> at %s",
667 (uintmax_t) sectors / (1048576 / sector_size),
668 device_get_desc(dev),
669 xenbus_get_node(dev));
670 bus_print_child_footer(device_get_parent(dev), dev);
672 xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info);
674 (void)xenbus_set_state(dev, XenbusStateConnected);
676 /* Kick pending requests. */
677 mtx_lock(&blkif_io_lock);
678 info->connected = BLKIF_STATE_CONNECTED;
679 kick_pending_request_queues(info);
680 mtx_unlock(&blkif_io_lock);
689 * Handle the change of state of the backend to Closing. We must delete our
690 * device-layer structures now, to ensure that writes are flushed through to
691 * the backend. Once is this done, we can switch to Closed in
695 blkfront_closing(device_t dev)
697 struct blkfront_info *info = device_get_softc(dev);
699 DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
702 DPRINTK("Calling xlvbd_del\n");
707 xenbus_set_state(dev, XenbusStateClosed);
712 blkfront_detach(device_t dev)
714 struct blkfront_info *info = device_get_softc(dev);
716 DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
725 GET_ID_FROM_FREELIST(struct blkfront_info *info)
727 unsigned long nfree = info->shadow_free;
729 KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
730 info->shadow_free = info->shadow[nfree].req.id;
731 info->shadow[nfree].req.id = 0x0fffffee; /* debug */
732 atomic_add_int(&blkif_queued_requests, 1);
737 ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
739 info->shadow[id].req.id = info->shadow_free;
740 info->shadow[id].request = 0;
741 info->shadow_free = id;
742 atomic_subtract_int(&blkif_queued_requests, 1);
746 flush_requests(struct blkfront_info *info)
750 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
753 notify_remote_via_irq(info->irq);
757 kick_pending_request_queues(struct blkfront_info *info)
759 /* XXX check if we can't simplify */
761 if (!RING_FULL(&info->ring)) {
762 /* Re-enable calldowns. */
763 blk_start_queue(info->rq);
764 /* Kick things off immediately. */
765 do_blkif_request(info->rq);
768 if (!RING_FULL(&info->ring)) {
770 sc = LIST_FIRST(&xbsl_head);
771 LIST_REMOVE(sc, entry);
772 /* Re-enable calldowns. */
773 blk_start_queue(di->rq);
775 /* Kick things off immediately. */
776 xb_startio(info->sc);
782 static void blkif_restart_queue(void *arg)
784 struct blkfront_info *info = (struct blkfront_info *)arg;
786 mtx_lock(&blkif_io_lock);
787 kick_pending_request_queues(info);
788 mtx_unlock(&blkif_io_lock);
792 static void blkif_restart_queue_callback(void *arg)
795 struct blkfront_info *info = (struct blkfront_info *)arg;
796 /* XXX BSD equiv ? */
798 schedule_work(&info->work);
803 blkif_open(struct disk *dp)
805 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
808 printf("xb%d: not found", sc->xb_unit);
812 sc->xb_flags |= XB_OPEN;
813 sc->xb_info->users++;
818 blkif_close(struct disk *dp)
820 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
824 sc->xb_flags &= ~XB_OPEN;
825 if (--(sc->xb_info->users) == 0) {
826 /* Check whether we have been instructed to close. We will
827 have ignored this request initially, as the device was
829 device_t dev = sc->xb_info->xbdev;
831 xenbus_read_driver_state(xenbus_get_otherend_path(dev));
833 if (state == XenbusStateClosing)
834 blkfront_closing(dev);
840 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
842 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
852 * blkif_queue_request
856 * id: for guest use only.
857 * operation: BLKIF_OP_{READ,WRITE,PROBE}
858 * buffer: buffer to read/write into. this should be a
859 * virtual address in the guest os.
861 static int blkif_queue_request(struct bio *bp)
864 vm_paddr_t buffer_ma;
865 blkif_request_t *ring_req;
867 uint64_t fsect, lsect;
868 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
869 struct blkfront_info *info = sc->xb_info;
872 if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
875 if (gnttab_alloc_grant_references(
876 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
877 gnttab_request_free_callback(
879 blkif_restart_queue_callback,
881 BLKIF_MAX_SEGMENTS_PER_REQUEST);
885 /* Check if the buffer is properly aligned */
886 if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
887 int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
889 caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
892 alignbuf = (char *)roundup2((u_long)newbuf, align);
894 /* save a copy of the current buffer */
895 bp->bio_driver1 = newbuf;
896 bp->bio_driver2 = alignbuf;
898 /* Copy the data for a write */
899 if (bp->bio_cmd == BIO_WRITE)
900 bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
902 alignbuf = bp->bio_data;
904 /* Fill out a communications ring structure. */
905 ring_req = RING_GET_REQUEST(&info->ring,
906 info->ring.req_prod_pvt);
907 id = GET_ID_FROM_FREELIST(info);
908 info->shadow[id].request = (unsigned long)bp;
911 ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
914 ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
915 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
917 ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer
918 * chaining is not supported.
921 buffer_ma = vtomach(alignbuf);
922 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
923 lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
924 /* install a grant reference. */
925 ref = gnttab_claim_grant_reference(&gref_head);
926 KASSERT( ref != -ENOSPC, ("grant_reference failed") );
928 gnttab_grant_foreign_access_ref(
930 xenbus_get_otherend_id(info->xbdev),
931 buffer_ma >> PAGE_SHIFT,
932 ring_req->operation & 1 ); /* ??? */
933 info->shadow[id].frame[ring_req->nr_segments] =
934 buffer_ma >> PAGE_SHIFT;
936 ring_req->seg[ring_req->nr_segments] =
937 (struct blkif_request_segment) {
940 .last_sect = lsect };
942 ring_req->nr_segments++;
943 KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
944 ("XEN buffer must be sector aligned"));
946 ("XEN disk driver data cannot cross a page boundary"));
948 buffer_ma &= ~PAGE_MASK;
950 info->ring.req_prod_pvt++;
952 /* Keep a private copy so we can reissue requests when recovering. */
953 info->shadow[id].req = *ring_req;
955 gnttab_free_grant_references(gref_head);
963 * Dequeue buffers and place them in the shared communication ring.
964 * Return when no more requests can be accepted or all buffers have
967 * Signal XEN once the ring has been filled out.
970 xb_startio(struct xb_softc *sc)
974 struct blkfront_info *info = sc->xb_info;
977 mtx_assert(&blkif_io_lock, MA_OWNED);
979 while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
981 if (RING_FULL(&info->ring))
984 if (blkif_queue_request(bp)) {
986 bioq_insert_head(&sc->xb_bioq, bp);
993 flush_requests(sc->xb_info);
999 struct xb_softc *sc = NULL;
1001 blkif_response_t *bret;
1003 struct blkfront_info *info = xsc;
1008 mtx_lock(&blkif_io_lock);
1010 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
1011 mtx_unlock(&blkif_io_lock);
1016 rp = info->ring.sring->rsp_prod;
1017 rmb(); /* Ensure we see queued responses up to 'rp'. */
1019 for (i = info->ring.rsp_cons; i != rp; i++) {
1022 bret = RING_GET_RESPONSE(&info->ring, i);
1024 bp = (struct bio *)info->shadow[id].request;
1026 blkif_completion(&info->shadow[id]);
1028 ADD_ID_TO_FREELIST(info, id);
1030 switch (bret->operation) {
1032 /* had an unaligned buffer that needs to be copied */
1033 if (bp->bio_driver1)
1034 bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
1036 case BLKIF_OP_WRITE:
1038 /* free the copy buffer */
1039 if (bp->bio_driver1) {
1040 free(bp->bio_driver1, M_DEVBUF);
1041 bp->bio_driver1 = NULL;
1044 if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
1045 printf("Bad return from blkdev data request: %x\n",
1047 bp->bio_flags |= BIO_ERROR;
1050 sc = (struct xb_softc *)bp->bio_disk->d_drv1;
1052 if (bp->bio_flags & BIO_ERROR)
1053 bp->bio_error = EIO;
1060 panic("received invalid operation");
1065 info->ring.rsp_cons = i;
1067 if (i != info->ring.req_prod_pvt) {
1069 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
1073 info->ring.sring->rsp_event = i + 1;
1076 kick_pending_request_queues(info);
1078 mtx_unlock(&blkif_io_lock);
1082 blkif_free(struct blkfront_info *info, int suspend)
1085 /* Prevent new requests being issued until we fix things up. */
1086 mtx_lock(&blkif_io_lock);
1087 info->connected = suspend ?
1088 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
1089 mtx_unlock(&blkif_io_lock);
1091 /* Free resources associated with old device channel. */
1092 if (info->ring_ref != GRANT_INVALID_REF) {
1093 gnttab_end_foreign_access(info->ring_ref,
1095 info->ring_ref = GRANT_INVALID_REF;
1096 info->ring.sring = NULL;
1099 unbind_from_irqhandler(info->irq);
1105 blkif_completion(struct blk_shadow *s)
1109 for (i = 0; i < s->req.nr_segments; i++)
1110 gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
1114 blkif_recover(struct blkfront_info *info)
1117 blkif_request_t *req;
1118 struct blk_shadow *copy;
1123 /* Stage 1: Make a safe copy of the shadow state. */
1124 copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
1125 memcpy(copy, info->shadow, sizeof(info->shadow));
1127 /* Stage 2: Set up free list. */
1128 memset(&info->shadow, 0, sizeof(info->shadow));
1129 for (i = 0; i < BLK_RING_SIZE; i++)
1130 info->shadow[i].req.id = i+1;
1131 info->shadow_free = info->ring.req_prod_pvt;
1132 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
1134 /* Stage 3: Find pending requests and requeue them. */
1135 for (i = 0; i < BLK_RING_SIZE; i++) {
1137 if (copy[i].request == 0)
1140 /* Grab a request slot and copy shadow state into it. */
1141 req = RING_GET_REQUEST(
1142 &info->ring, info->ring.req_prod_pvt);
1145 /* We get a new request id, and must reset the shadow state. */
1146 req->id = GET_ID_FROM_FREELIST(info);
1147 memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i]));
1149 /* Rewrite any grant references invalidated by suspend/resume. */
1150 for (j = 0; j < req->nr_segments; j++)
1151 gnttab_grant_foreign_access_ref(
1153 xenbus_get_otherend_id(info->xbdev),
1154 pfn_to_mfn(info->shadow[req->id].frame[j]),
1155 0 /* assume not readonly */);
1157 info->shadow[req->id].req = *req;
1159 info->ring.req_prod_pvt++;
1162 free(copy, M_DEVBUF);
1164 xenbus_set_state(info->xbdev, XenbusStateConnected);
1166 /* Now safe for us to use the shared ring */
1167 mtx_lock(&blkif_io_lock);
1168 info->connected = BLKIF_STATE_CONNECTED;
1169 mtx_unlock(&blkif_io_lock);
1171 /* Send off requeued requests */
1172 mtx_lock(&blkif_io_lock);
1173 flush_requests(info);
1175 /* Kick any other new requests queued since we resumed */
1176 kick_pending_request_queues(info);
1177 mtx_unlock(&blkif_io_lock);
1180 /* ** Driver registration ** */
1181 static device_method_t blkfront_methods[] = {
1182 /* Device interface */
1183 DEVMETHOD(device_probe, blkfront_probe),
1184 DEVMETHOD(device_attach, blkfront_attach),
1185 DEVMETHOD(device_detach, blkfront_detach),
1186 DEVMETHOD(device_shutdown, bus_generic_shutdown),
1187 DEVMETHOD(device_suspend, blkfront_suspend),
1188 DEVMETHOD(device_resume, blkfront_resume),
1190 /* Xenbus interface */
1191 DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed),
1196 static driver_t blkfront_driver = {
1199 sizeof(struct blkfront_info),
1201 devclass_t blkfront_devclass;
1203 DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0);
1205 MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */