]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/dev/xen/blkfront/blkfront.c
MFC r310013 (by cperciva):
[FreeBSD/stable/9.git] / sys / dev / xen / blkfront / blkfront.c
1 /*
2  * XenBSD block device driver
3  *
4  * Copyright (c) 2009 Scott Long, Yahoo!
5  * Copyright (c) 2009 Frank Suchomel, Citrix
6  * Copyright (c) 2009 Doug F. Rabson, Citrix
7  * Copyright (c) 2005 Kip Macy
8  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
9  * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
10  *
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a copy
13  * of this software and associated documentation files (the "Software"), to
14  * deal in the Software without restriction, including without limitation the
15  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
16  * sell copies of the Software, and to permit persons to whom the Software is
17  * furnished to do so, subject to the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included in
20  * all copies or substantial portions of the Software.
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/kernel.h>
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39
40 #include <sys/bio.h>
41 #include <sys/bus.h>
42 #include <sys/conf.h>
43 #include <sys/module.h>
44 #include <sys/sysctl.h>
45
46 #include <machine/bus.h>
47 #include <sys/rman.h>
48 #include <machine/resource.h>
49 #include <machine/intr_machdep.h>
50 #include <machine/vmparam.h>
51 #include <sys/bus_dma.h>
52
53 #include <machine/_inttypes.h>
54 #include <machine/xen/xen-os.h>
55 #include <machine/xen/xenvar.h>
56 #include <machine/xen/xenfunc.h>
57
58 #include <xen/hypervisor.h>
59 #include <xen/xen_intr.h>
60 #include <xen/evtchn.h>
61 #include <xen/gnttab.h>
62 #include <xen/interface/grant_table.h>
63 #include <xen/interface/io/protocols.h>
64 #include <xen/xenbus/xenbusvar.h>
65
66 #include <geom/geom_disk.h>
67
68 #include <dev/xen/blkfront/block.h>
69
70 #include "xenbus_if.h"
71
72 /* prototypes */
73 static void xb_free_command(struct xb_command *cm);
74 static void xb_startio(struct xb_softc *sc);
75 static void blkfront_connect(struct xb_softc *);
76 static void blkfront_closing(device_t);
77 static int blkfront_detach(device_t);
78 static int setup_blkring(struct xb_softc *);
79 static void blkif_int(void *);
80 static void blkfront_initialize(struct xb_softc *);
81 static int blkif_completion(struct xb_command *);
82 static void blkif_free(struct xb_softc *);
83 static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int);
84
85 static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data");
86
87 #define GRANT_INVALID_REF 0
88
89 /* Control whether runtime update of vbds is enabled. */
90 #define ENABLE_VBD_UPDATE 0
91
92 #if ENABLE_VBD_UPDATE
93 static void vbd_update(void);
94 #endif
95
96 #define BLKIF_STATE_DISCONNECTED 0
97 #define BLKIF_STATE_CONNECTED    1
98 #define BLKIF_STATE_SUSPENDED    2
99
100 #ifdef notyet
101 static char *blkif_state_name[] = {
102         [BLKIF_STATE_DISCONNECTED] = "disconnected",
103         [BLKIF_STATE_CONNECTED]    = "connected",
104         [BLKIF_STATE_SUSPENDED]    = "closed",
105 };
106
107 static char * blkif_status_name[] = {
108         [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
109         [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
110         [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
111         [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
112 };
113 #endif
114
115 #if 0
116 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
117 #else
118 #define DPRINTK(fmt, args...) 
119 #endif
120
121 static int blkif_open(struct disk *dp);
122 static int blkif_close(struct disk *dp);
123 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
124 static int blkif_queue_request(struct xb_softc *sc, struct xb_command *cm);
125 static void xb_strategy(struct bio *bp);
126
127 // In order to quiesce the device during kernel dumps, outstanding requests to
128 // DOM0 for disk reads/writes need to be accounted for.
129 static  int     xb_dump(void *, void *, vm_offset_t, off_t, size_t);
130
131 /* XXX move to xb_vbd.c when VBD update support is added */
132 #define MAX_VBDS 64
133
134 #define XBD_SECTOR_SIZE         512     /* XXX: assume for now */
135 #define XBD_SECTOR_SHFT         9
136
137 /*
138  * Translate Linux major/minor to an appropriate name and unit
139  * number. For HVM guests, this allows us to use the same drive names
140  * with blkfront as the emulated drives, easing transition slightly.
141  */
142 static void
143 blkfront_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
144 {
145         static struct vdev_info {
146                 int major;
147                 int shift;
148                 int base;
149                 const char *name;
150         } info[] = {
151                 {3,     6,      0,      "ada"}, /* ide0 */
152                 {22,    6,      2,      "ada"}, /* ide1 */
153                 {33,    6,      4,      "ada"}, /* ide2 */
154                 {34,    6,      6,      "ada"}, /* ide3 */
155                 {56,    6,      8,      "ada"}, /* ide4 */
156                 {57,    6,      10,     "ada"}, /* ide5 */
157                 {88,    6,      12,     "ada"}, /* ide6 */
158                 {89,    6,      14,     "ada"}, /* ide7 */
159                 {90,    6,      16,     "ada"}, /* ide8 */
160                 {91,    6,      18,     "ada"}, /* ide9 */
161
162                 {8,     4,      0,      "da"},  /* scsi disk0 */
163                 {65,    4,      16,     "da"},  /* scsi disk1 */
164                 {66,    4,      32,     "da"},  /* scsi disk2 */
165                 {67,    4,      48,     "da"},  /* scsi disk3 */
166                 {68,    4,      64,     "da"},  /* scsi disk4 */
167                 {69,    4,      80,     "da"},  /* scsi disk5 */
168                 {70,    4,      96,     "da"},  /* scsi disk6 */
169                 {71,    4,      112,    "da"},  /* scsi disk7 */
170                 {128,   4,      128,    "da"},  /* scsi disk8 */
171                 {129,   4,      144,    "da"},  /* scsi disk9 */
172                 {130,   4,      160,    "da"},  /* scsi disk10 */
173                 {131,   4,      176,    "da"},  /* scsi disk11 */
174                 {132,   4,      192,    "da"},  /* scsi disk12 */
175                 {133,   4,      208,    "da"},  /* scsi disk13 */
176                 {134,   4,      224,    "da"},  /* scsi disk14 */
177                 {135,   4,      240,    "da"},  /* scsi disk15 */
178
179                 {202,   4,      0,      "xbd"}, /* xbd */
180
181                 {0,     0,      0,      NULL},
182         };
183         int major = vdevice >> 8;
184         int minor = vdevice & 0xff;
185         int i;
186
187         if (vdevice & (1 << 28)) {
188                 *unit = (vdevice & ((1 << 28) - 1)) >> 8;
189                 *name = "xbd";
190                 return;
191         }
192
193         for (i = 0; info[i].major; i++) {
194                 if (info[i].major == major) {
195                         *unit = info[i].base + (minor >> info[i].shift);
196                         *name = info[i].name;
197                         return;
198                 }
199         }
200
201         *unit = minor >> 4;
202         *name = "xbd";
203 }
204
205 int
206 xlvbd_add(struct xb_softc *sc, blkif_sector_t sectors,
207     int vdevice, uint16_t vdisk_info, unsigned long sector_size)
208 {
209         int     unit, error = 0;
210         const char *name;
211
212         blkfront_vdevice_to_unit(vdevice, &unit, &name);
213
214         sc->xb_unit = unit;
215
216         if (strcmp(name, "xbd"))
217                 device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit);
218
219         sc->xb_disk = disk_alloc();
220         sc->xb_disk->d_unit = sc->xb_unit;
221         sc->xb_disk->d_open = blkif_open;
222         sc->xb_disk->d_close = blkif_close;
223         sc->xb_disk->d_ioctl = blkif_ioctl;
224         sc->xb_disk->d_strategy = xb_strategy;
225         sc->xb_disk->d_dump = xb_dump;
226         sc->xb_disk->d_name = name;
227         sc->xb_disk->d_drv1 = sc;
228         sc->xb_disk->d_sectorsize = sector_size;
229
230         sc->xb_disk->d_mediasize = sectors * sector_size;
231         sc->xb_disk->d_maxsize = sc->max_request_size;
232         sc->xb_disk->d_flags = 0;
233         disk_create(sc->xb_disk, DISK_VERSION);
234
235         return error;
236 }
237
238 /************************ end VBD support *****************/
239
240 /*
241  * Read/write routine for a buffer.  Finds the proper unit, place it on
242  * the sortq and kick the controller.
243  */
244 static void
245 xb_strategy(struct bio *bp)
246 {
247         struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
248
249         /* bogus disk? */
250         if (sc == NULL) {
251                 bp->bio_error = EINVAL;
252                 bp->bio_flags |= BIO_ERROR;
253                 bp->bio_resid = bp->bio_bcount;
254                 biodone(bp);
255                 return;
256         }
257
258         /*
259          * Place it in the queue of disk activities for this disk
260          */
261         mtx_lock(&sc->xb_io_lock);
262
263         xb_enqueue_bio(sc, bp);
264         xb_startio(sc);
265
266         mtx_unlock(&sc->xb_io_lock);
267         return;
268 }
269
270 static void
271 xb_bio_complete(struct xb_softc *sc, struct xb_command *cm)
272 {
273         struct bio *bp;
274
275         bp = cm->bp;
276
277         if ( unlikely(cm->status != BLKIF_RSP_OKAY) ) {
278                 disk_err(bp, "disk error" , -1, 0);
279                 printf(" status: %x\n", cm->status);
280                 bp->bio_flags |= BIO_ERROR;
281         }
282
283         if (bp->bio_flags & BIO_ERROR)
284                 bp->bio_error = EIO;
285         else
286                 bp->bio_resid = 0;
287
288         xb_free_command(cm);
289         biodone(bp);
290 }
291
292 // Quiesce the disk writes for a dump file before allowing the next buffer.
293 static void
294 xb_quiesce(struct xb_softc *sc)
295 {
296         int             mtd;
297
298         // While there are outstanding requests
299         while (!TAILQ_EMPTY(&sc->cm_busy)) {
300                 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, mtd);
301                 if (mtd) {
302                         /* Recieved request completions, update queue. */
303                         blkif_int(sc);
304                 }
305                 if (!TAILQ_EMPTY(&sc->cm_busy)) {
306                         /*
307                          * Still pending requests, wait for the disk i/o
308                          * to complete.
309                          */
310                         HYPERVISOR_yield();
311                 }
312         }
313 }
314
315 /* Kernel dump function for a paravirtualized disk device */
316 static void
317 xb_dump_complete(struct xb_command *cm)
318 {
319
320         xb_enqueue_complete(cm);
321 }
322
323 static int
324 xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
325         size_t length)
326 {
327         struct  disk    *dp = arg;
328         struct xb_softc *sc = (struct xb_softc *) dp->d_drv1;
329         struct xb_command *cm;
330         size_t          chunk;
331         int             sbp;
332         int             rc = 0;
333
334         if (length <= 0)
335                 return (rc);
336
337         xb_quiesce(sc); /* All quiet on the western front. */
338
339         /*
340          * If this lock is held, then this module is failing, and a
341          * successful kernel dump is highly unlikely anyway.
342          */
343         mtx_lock(&sc->xb_io_lock);
344
345         /* Split the 64KB block as needed */
346         for (sbp=0; length > 0; sbp++) {
347                 cm = xb_dequeue_free(sc);
348                 if (cm == NULL) {
349                         mtx_unlock(&sc->xb_io_lock);
350                         device_printf(sc->xb_dev, "dump: no more commands?\n");
351                         return (EBUSY);
352                 }
353
354                 if (gnttab_alloc_grant_references(sc->max_request_segments,
355                                                   &cm->gref_head) != 0) {
356                         xb_free_command(cm);
357                         mtx_unlock(&sc->xb_io_lock);
358                         device_printf(sc->xb_dev, "no more grant allocs?\n");
359                         return (EBUSY);
360                 }
361
362                 chunk = length > sc->max_request_size
363                       ? sc->max_request_size : length;
364                 cm->data = virtual;
365                 cm->datalen = chunk;
366                 cm->operation = BLKIF_OP_WRITE;
367                 cm->sector_number = offset / dp->d_sectorsize;
368                 cm->cm_complete = xb_dump_complete;
369
370                 xb_enqueue_ready(cm);
371
372                 length -= chunk;
373                 offset += chunk;
374                 virtual = (char *) virtual + chunk;
375         }
376
377         /* Tell DOM0 to do the I/O */
378         xb_startio(sc);
379         mtx_unlock(&sc->xb_io_lock);
380
381         /* Poll for the completion. */
382         xb_quiesce(sc); /* All quite on the eastern front */
383
384         /* If there were any errors, bail out... */
385         while ((cm = xb_dequeue_complete(sc)) != NULL) {
386                 if (cm->status != BLKIF_RSP_OKAY) {
387                         device_printf(sc->xb_dev,
388                             "Dump I/O failed at sector %jd\n",
389                             cm->sector_number);
390                         rc = EIO;
391                 }
392                 xb_free_command(cm);
393         }
394
395         return (rc);
396 }
397
398
399 static int
400 blkfront_probe(device_t dev)
401 {
402 #ifdef XENHVM
403         int error;
404         char *type;
405 #endif
406
407         if (strcmp(xenbus_get_type(dev), "vbd") != 0)
408                 return (ENXIO);
409
410 #ifdef XENHVM
411         /*
412          * When running in an HVM domain, IDE disk emulation is
413          * disabled early in boot so that native drivers will
414          * not see emulated hardware.  However, CDROM device
415          * emulation cannot be disabled.
416          *
417          * Through use of FreeBSD's vm_guest and xen_hvm_domain()
418          * APIs, we could modify the native CDROM driver to fail its
419          * probe when running under Xen.  Unfortunatlely, the PV
420          * CDROM support in XenServer (up through at least version
421          * 6.2) isn't functional, so we instead rely on the emulated
422          * CDROM instance, and fail to attach the PV one here in
423          * the blkfront driver.
424          */
425         error = xs_read(XST_NIL, xenbus_get_node(dev),
426             "device-type", NULL, (void **) &type);
427         if (error)
428                 return (ENXIO);
429
430         if (strncmp(type, "cdrom", 5) == 0) {
431                 free(type, M_XENSTORE);
432                 return (ENXIO);
433         }
434         free(type, M_XENSTORE);
435 #endif
436
437         device_set_desc(dev, "Virtual Block Device");
438         device_quiet(dev);
439         return (0);
440 }
441
442 static void
443 xb_setup_sysctl(struct xb_softc *xb)
444 {
445         struct sysctl_ctx_list *sysctl_ctx = NULL;
446         struct sysctl_oid      *sysctl_tree = NULL;
447         
448         sysctl_ctx = device_get_sysctl_ctx(xb->xb_dev);
449         if (sysctl_ctx == NULL)
450                 return;
451
452         sysctl_tree = device_get_sysctl_tree(xb->xb_dev);
453         if (sysctl_tree == NULL)
454                 return;
455
456         SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
457                         "max_requests", CTLFLAG_RD, &xb->max_requests, -1,
458                         "maximum outstanding requests (negotiated)");
459
460         SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
461                         "max_request_segments", CTLFLAG_RD,
462                         &xb->max_request_segments, 0,
463                         "maximum number of pages per requests (negotiated)");
464
465         SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
466                         "max_request_size", CTLFLAG_RD,
467                         &xb->max_request_size, 0,
468                         "maximum size in bytes of a request (negotiated)");
469
470         SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
471                         "ring_pages", CTLFLAG_RD,
472                         &xb->ring_pages, 0,
473                         "communication channel pages (negotiated)");
474 }
475
476 /*
477  * Setup supplies the backend dir, virtual device.  We place an event
478  * channel and shared frame entries.  We watch backend to wait if it's
479  * ok.
480  */
481 static int
482 blkfront_attach(device_t dev)
483 {
484         struct xb_softc *sc;
485         const char *name;
486         uint32_t vdevice;
487         int error;
488         int i;
489         int unit;
490
491         /* FIXME: Use dynamic device id if this is not set. */
492         error = xs_scanf(XST_NIL, xenbus_get_node(dev),
493             "virtual-device", NULL, "%" PRIu32, &vdevice);
494         if (error) {
495                 xenbus_dev_fatal(dev, error, "reading virtual-device");
496                 device_printf(dev, "Couldn't determine virtual device.\n");
497                 return (error);
498         }
499
500         blkfront_vdevice_to_unit(vdevice, &unit, &name);
501         if (!strcmp(name, "xbd"))
502                 device_set_unit(dev, unit);
503
504         sc = device_get_softc(dev);
505         mtx_init(&sc->xb_io_lock, "blkfront i/o lock", NULL, MTX_DEF);
506         xb_initq_free(sc);
507         xb_initq_busy(sc);
508         xb_initq_ready(sc);
509         xb_initq_complete(sc);
510         xb_initq_bio(sc);
511         for (i = 0; i < XBF_MAX_RING_PAGES; i++)
512                 sc->ring_ref[i] = GRANT_INVALID_REF;
513
514         sc->xb_dev = dev;
515         sc->vdevice = vdevice;
516         sc->connected = BLKIF_STATE_DISCONNECTED;
517
518         xb_setup_sysctl(sc);
519
520         /* Wait for backend device to publish its protocol capabilities. */
521         xenbus_set_state(dev, XenbusStateInitialising);
522
523         return (0);
524 }
525
526 static int
527 blkfront_suspend(device_t dev)
528 {
529         struct xb_softc *sc = device_get_softc(dev);
530         int retval;
531         int saved_state;
532
533         /* Prevent new requests being issued until we fix things up. */
534         mtx_lock(&sc->xb_io_lock);
535         saved_state = sc->connected;
536         sc->connected = BLKIF_STATE_SUSPENDED;
537
538         /* Wait for outstanding I/O to drain. */
539         retval = 0;
540         while (TAILQ_EMPTY(&sc->cm_busy) == 0) {
541                 if (msleep(&sc->cm_busy, &sc->xb_io_lock,
542                            PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
543                         retval = EBUSY;
544                         break;
545                 }
546         }
547         mtx_unlock(&sc->xb_io_lock);
548
549         if (retval != 0)
550                 sc->connected = saved_state;
551
552         return (retval);
553 }
554
555 static int
556 blkfront_resume(device_t dev)
557 {
558         struct xb_softc *sc = device_get_softc(dev);
559
560         DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
561
562         blkif_free(sc);
563         blkfront_initialize(sc);
564         return (0);
565 }
566
567 static void
568 blkfront_initialize(struct xb_softc *sc)
569 {
570         const char *otherend_path;
571         const char *node_path;
572         uint32_t max_ring_page_order;
573         int error;
574         int i;
575
576         if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising) {
577                 /* Initialization has already been performed. */
578                 return;
579         }
580
581         /*
582          * Protocol defaults valid even if negotiation for a
583          * setting fails.
584          */
585         max_ring_page_order = 0;
586         sc->ring_pages = 1;
587         sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
588         sc->max_request_size = XBF_SEGS_TO_SIZE(sc->max_request_segments);
589         sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
590
591         /*
592          * Protocol negotiation.
593          *
594          * \note xs_gather() returns on the first encountered error, so
595          *       we must use independant calls in order to guarantee
596          *       we don't miss information in a sparsly populated back-end
597          *       tree.
598          *
599          * \note xs_scanf() does not update variables for unmatched
600          *       fields.
601          */
602         otherend_path = xenbus_get_otherend_path(sc->xb_dev);
603         node_path = xenbus_get_node(sc->xb_dev);
604
605         /* Support both backend schemes for relaying ring page limits. */
606         (void)xs_scanf(XST_NIL, otherend_path,
607                        "max-ring-page-order", NULL, "%" PRIu32,
608                        &max_ring_page_order);
609         sc->ring_pages = 1 << max_ring_page_order;
610         (void)xs_scanf(XST_NIL, otherend_path,
611                        "max-ring-pages", NULL, "%" PRIu32,
612                        &sc->ring_pages);
613         if (sc->ring_pages < 1)
614                 sc->ring_pages = 1;
615
616         sc->max_requests = BLKIF_MAX_RING_REQUESTS(sc->ring_pages * PAGE_SIZE);
617         (void)xs_scanf(XST_NIL, otherend_path,
618                        "max-requests", NULL, "%" PRIu32,
619                        &sc->max_requests);
620
621         (void)xs_scanf(XST_NIL, otherend_path,
622                        "max-request-segments", NULL, "%" PRIu32,
623                        &sc->max_request_segments);
624
625         (void)xs_scanf(XST_NIL, otherend_path,
626                        "max-request-size", NULL, "%" PRIu32,
627                        &sc->max_request_size);
628
629         if (sc->ring_pages > XBF_MAX_RING_PAGES) {
630                 device_printf(sc->xb_dev, "Back-end specified ring-pages of "
631                               "%u limited to front-end limit of %zu.\n",
632                               sc->ring_pages, XBF_MAX_RING_PAGES);
633                 sc->ring_pages = XBF_MAX_RING_PAGES;
634         }
635
636         if (powerof2(sc->ring_pages) == 0) {
637                 uint32_t new_page_limit;
638
639                 new_page_limit = 0x01 << (fls(sc->ring_pages) - 1);
640                 device_printf(sc->xb_dev, "Back-end specified ring-pages of "
641                               "%u is not a power of 2. Limited to %u.\n",
642                               sc->ring_pages, new_page_limit);
643                 sc->ring_pages = new_page_limit;
644         }
645
646         if (sc->max_requests > XBF_MAX_REQUESTS) {
647                 device_printf(sc->xb_dev, "Back-end specified max_requests of "
648                               "%u limited to front-end limit of %u.\n",
649                               sc->max_requests, XBF_MAX_REQUESTS);
650                 sc->max_requests = XBF_MAX_REQUESTS;
651         }
652
653         if (sc->max_request_segments > XBF_MAX_SEGMENTS_PER_REQUEST) {
654                 device_printf(sc->xb_dev, "Back-end specified "
655                               "max_request_segments of %u limited to "
656                               "front-end limit of %u.\n",
657                               sc->max_request_segments,
658                               XBF_MAX_SEGMENTS_PER_REQUEST);
659                 sc->max_request_segments = XBF_MAX_SEGMENTS_PER_REQUEST;
660         }
661
662         if (sc->max_request_size > XBF_MAX_REQUEST_SIZE) {
663                 device_printf(sc->xb_dev, "Back-end specified "
664                               "max_request_size of %u limited to front-end "
665                               "limit of %u.\n", sc->max_request_size,
666                               XBF_MAX_REQUEST_SIZE);
667                 sc->max_request_size = XBF_MAX_REQUEST_SIZE;
668         }
669  
670         if (sc->max_request_size > XBF_SEGS_TO_SIZE(sc->max_request_segments)) {
671                 device_printf(sc->xb_dev, "Back-end specified "
672                               "max_request_size of %u limited to front-end "
673                               "limit of %u.  (Too few segments.)\n",
674                               sc->max_request_size,
675                               XBF_SEGS_TO_SIZE(sc->max_request_segments));
676                 sc->max_request_size =
677                     XBF_SEGS_TO_SIZE(sc->max_request_segments);
678         }
679
680         sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
681
682         /* Allocate datastructures based on negotiated values. */
683         error = bus_dma_tag_create(bus_get_dma_tag(sc->xb_dev), /* parent */
684                                    512, PAGE_SIZE,      /* algnmnt, boundary */
685                                    BUS_SPACE_MAXADDR,   /* lowaddr */
686                                    BUS_SPACE_MAXADDR,   /* highaddr */
687                                    NULL, NULL,          /* filter, filterarg */
688                                    sc->max_request_size,
689                                    sc->max_request_segments,
690                                    PAGE_SIZE,           /* maxsegsize */
691                                    BUS_DMA_ALLOCNOW,    /* flags */
692                                    busdma_lock_mutex,   /* lockfunc */
693                                    &sc->xb_io_lock,     /* lockarg */
694                                    &sc->xb_io_dmat);
695         if (error != 0) {
696                 xenbus_dev_fatal(sc->xb_dev, error,
697                                  "Cannot allocate parent DMA tag\n");
698                 return;
699         }
700
701         /* Per-transaction data allocation. */
702         sc->shadow = malloc(sizeof(*sc->shadow) * sc->max_requests,
703                             M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
704         if (sc->shadow == NULL) {
705                 bus_dma_tag_destroy(sc->xb_io_dmat);
706                 xenbus_dev_fatal(sc->xb_dev, error,
707                                  "Cannot allocate request structures\n");
708                 return;
709         }
710
711         for (i = 0; i < sc->max_requests; i++) {
712                 struct xb_command *cm;
713
714                 cm = &sc->shadow[i];
715                 cm->sg_refs = malloc(sizeof(grant_ref_t)
716                                    * sc->max_request_segments,
717                                      M_XENBLOCKFRONT, M_NOWAIT);
718                 if (cm->sg_refs == NULL)
719                         break;
720                 cm->id = i;
721                 cm->cm_sc = sc;
722                 if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0)
723                         break;
724                 xb_free_command(cm);
725         }
726
727         if (setup_blkring(sc) != 0)
728                 return;
729
730         /* Support both backend schemes for relaying ring page limits. */
731         if (sc->ring_pages > 1) {
732                 error = xs_printf(XST_NIL, node_path,
733                                  "num-ring-pages","%u", sc->ring_pages);
734                 if (error) {
735                         xenbus_dev_fatal(sc->xb_dev, error,
736                                          "writing %s/num-ring-pages",
737                                          node_path);
738                         return;
739                 }
740
741                 error = xs_printf(XST_NIL, node_path,
742                                  "ring-page-order", "%u",
743                                  fls(sc->ring_pages) - 1);
744                 if (error) {
745                         xenbus_dev_fatal(sc->xb_dev, error,
746                                          "writing %s/ring-page-order",
747                                          node_path);
748                         return;
749                 }
750         }
751
752         error = xs_printf(XST_NIL, node_path,
753                          "max-requests","%u", sc->max_requests);
754         if (error) {
755                 xenbus_dev_fatal(sc->xb_dev, error,
756                                  "writing %s/max-requests",
757                                  node_path);
758                 return;
759         }
760
761         error = xs_printf(XST_NIL, node_path,
762                          "max-request-segments","%u", sc->max_request_segments);
763         if (error) {
764                 xenbus_dev_fatal(sc->xb_dev, error,
765                                  "writing %s/max-request-segments",
766                                  node_path);
767                 return;
768         }
769
770         error = xs_printf(XST_NIL, node_path,
771                          "max-request-size","%u", sc->max_request_size);
772         if (error) {
773                 xenbus_dev_fatal(sc->xb_dev, error,
774                                  "writing %s/max-request-size",
775                                  node_path);
776                 return;
777         }
778
779         error = xs_printf(XST_NIL, node_path, "event-channel",
780                           "%u", irq_to_evtchn_port(sc->irq));
781         if (error) {
782                 xenbus_dev_fatal(sc->xb_dev, error,
783                                  "writing %s/event-channel",
784                                  node_path);
785                 return;
786         }
787
788         error = xs_printf(XST_NIL, node_path,
789                           "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
790         if (error) {
791                 xenbus_dev_fatal(sc->xb_dev, error,
792                                  "writing %s/protocol",
793                                  node_path);
794                 return;
795         }
796
797         xenbus_set_state(sc->xb_dev, XenbusStateInitialised);
798 }
799
800 static int 
801 setup_blkring(struct xb_softc *sc)
802 {
803         blkif_sring_t *sring;
804         uintptr_t sring_page_addr;
805         int error;
806         int i;
807
808         sring = malloc(sc->ring_pages * PAGE_SIZE, M_XENBLOCKFRONT,
809                        M_NOWAIT|M_ZERO);
810         if (sring == NULL) {
811                 xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring");
812                 return (ENOMEM);
813         }
814         SHARED_RING_INIT(sring);
815         FRONT_RING_INIT(&sc->ring, sring, sc->ring_pages * PAGE_SIZE);
816
817         for (i = 0, sring_page_addr = (uintptr_t)sring;
818              i < sc->ring_pages;
819              i++, sring_page_addr += PAGE_SIZE) {
820
821                 error = xenbus_grant_ring(sc->xb_dev,
822                     (vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->ring_ref[i]);
823                 if (error) {
824                         xenbus_dev_fatal(sc->xb_dev, error,
825                                          "granting ring_ref(%d)", i);
826                         return (error);
827                 }
828         }
829         if (sc->ring_pages == 1) {
830                 error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
831                                   "ring-ref", "%u", sc->ring_ref[0]);
832                 if (error) {
833                         xenbus_dev_fatal(sc->xb_dev, error,
834                                          "writing %s/ring-ref",
835                                          xenbus_get_node(sc->xb_dev));
836                         return (error);
837                 }
838         } else {
839                 for (i = 0; i < sc->ring_pages; i++) {
840                         char ring_ref_name[]= "ring_refXX";
841
842                         snprintf(ring_ref_name, sizeof(ring_ref_name),
843                                  "ring-ref%u", i);
844                         error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
845                                          ring_ref_name, "%u", sc->ring_ref[i]);
846                         if (error) {
847                                 xenbus_dev_fatal(sc->xb_dev, error,
848                                                  "writing %s/%s",
849                                                  xenbus_get_node(sc->xb_dev),
850                                                  ring_ref_name);
851                                 return (error);
852                         }
853                 }
854         }
855
856         error = bind_listening_port_to_irqhandler(
857             xenbus_get_otherend_id(sc->xb_dev),
858             "xbd", (driver_intr_t *)blkif_int, sc,
859             INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq);
860         if (error) {
861                 xenbus_dev_fatal(sc->xb_dev, error,
862                     "bind_evtchn_to_irqhandler failed");
863                 return (error);
864         }
865
866         return (0);
867 }
868
869 /**
870  * Callback received when the backend's state changes.
871  */
872 static void
873 blkfront_backend_changed(device_t dev, XenbusState backend_state)
874 {
875         struct xb_softc *sc = device_get_softc(dev);
876
877         DPRINTK("backend_state=%d\n", backend_state);
878
879         switch (backend_state) {
880         case XenbusStateUnknown:
881         case XenbusStateInitialising:
882         case XenbusStateReconfigured:
883         case XenbusStateReconfiguring:
884         case XenbusStateClosed:
885                 break;
886
887         case XenbusStateInitWait:
888         case XenbusStateInitialised:
889                 blkfront_initialize(sc);
890                 break;
891
892         case XenbusStateConnected:
893                 blkfront_initialize(sc);
894                 blkfront_connect(sc);
895                 break;
896
897         case XenbusStateClosing:
898                 if (sc->users > 0)
899                         xenbus_dev_error(dev, -EBUSY,
900                                          "Device in use; refusing to close");
901                 else
902                         blkfront_closing(dev);
903                 break;  
904         }
905 }
906
907 /* 
908 ** Invoked when the backend is finally 'ready' (and has published
909 ** the details about the physical device - #sectors, size, etc). 
910 */
911 static void 
912 blkfront_connect(struct xb_softc *sc)
913 {
914         device_t dev = sc->xb_dev;
915         unsigned long sectors, sector_size;
916         unsigned int binfo;
917         int err, feature_barrier;
918
919         if( (sc->connected == BLKIF_STATE_CONNECTED) || 
920             (sc->connected == BLKIF_STATE_SUSPENDED) )
921                 return;
922
923         DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
924
925         err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
926                         "sectors", "%lu", &sectors,
927                         "info", "%u", &binfo,
928                         "sector-size", "%lu", &sector_size,
929                         NULL);
930         if (err) {
931                 xenbus_dev_fatal(dev, err,
932                     "reading backend fields at %s",
933                     xenbus_get_otherend_path(dev));
934                 return;
935         }
936         if ((sectors == 0) || (sector_size == 0)) {
937                 xenbus_dev_fatal(dev, 0,
938                     "invalid parameters from %s:"
939                     " sectors = %lu, sector_size = %lu",
940                     xenbus_get_otherend_path(dev),
941                     sectors, sector_size);
942                 return;
943         }
944         err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
945                         "feature-barrier", "%d", &feature_barrier,
946                         NULL);
947         if (!err || feature_barrier)
948                 sc->xb_flags |= XB_BARRIER;
949
950         if (sc->xb_disk == NULL) {
951                 device_printf(dev, "%juMB <%s> at %s",
952                     (uintmax_t) sectors / (1048576 / sector_size),
953                     device_get_desc(dev),
954                     xenbus_get_node(dev));
955                 bus_print_child_footer(device_get_parent(dev), dev);
956
957                 xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size);
958         }
959
960         (void)xenbus_set_state(dev, XenbusStateConnected); 
961
962         /* Kick pending requests. */
963         mtx_lock(&sc->xb_io_lock);
964         sc->connected = BLKIF_STATE_CONNECTED;
965         xb_startio(sc);
966         sc->xb_flags |= XB_READY;
967         mtx_unlock(&sc->xb_io_lock);
968 }
969
970 /**
971  * Handle the change of state of the backend to Closing.  We must delete our
972  * device-layer structures now, to ensure that writes are flushed through to
973  * the backend.  Once this is done, we can switch to Closed in
974  * acknowledgement.
975  */
976 static void
977 blkfront_closing(device_t dev)
978 {
979         struct xb_softc *sc = device_get_softc(dev);
980
981         xenbus_set_state(dev, XenbusStateClosing);
982
983         DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
984
985         if (sc->xb_disk != NULL) {
986                 disk_destroy(sc->xb_disk);
987                 sc->xb_disk = NULL;
988         }
989
990         xenbus_set_state(dev, XenbusStateClosed); 
991 }
992
993
994 static int
995 blkfront_detach(device_t dev)
996 {
997         struct xb_softc *sc = device_get_softc(dev);
998
999         DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
1000
1001         blkif_free(sc);
1002         mtx_destroy(&sc->xb_io_lock);
1003
1004         return 0;
1005 }
1006
1007
1008 static inline void 
1009 flush_requests(struct xb_softc *sc)
1010 {
1011         int notify;
1012
1013         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify);
1014
1015         if (notify)
1016                 notify_remote_via_irq(sc->irq);
1017 }
1018
1019 static void
1020 blkif_restart_queue_callback(void *arg)
1021 {
1022         struct xb_softc *sc = arg;
1023
1024         mtx_lock(&sc->xb_io_lock);
1025
1026         xb_startio(sc);
1027
1028         mtx_unlock(&sc->xb_io_lock);
1029 }
1030
1031 static int
1032 blkif_open(struct disk *dp)
1033 {
1034         struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
1035
1036         if (sc == NULL) {
1037                 printf("xb%d: not found", sc->xb_unit);
1038                 return (ENXIO);
1039         }
1040
1041         sc->xb_flags |= XB_OPEN;
1042         sc->users++;
1043         return (0);
1044 }
1045
1046 static int
1047 blkif_close(struct disk *dp)
1048 {
1049         struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
1050
1051         if (sc == NULL)
1052                 return (ENXIO);
1053         sc->xb_flags &= ~XB_OPEN;
1054         if (--(sc->users) == 0) {
1055                 /*
1056                  * Check whether we have been instructed to close.  We will
1057                  * have ignored this request initially, as the device was
1058                  * still mounted.
1059                  */
1060                 if (xenbus_get_otherend_state(sc->xb_dev) == XenbusStateClosing)
1061                         blkfront_closing(sc->xb_dev);
1062         }
1063         return (0);
1064 }
1065
1066 static int
1067 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
1068 {
1069         struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
1070
1071         if (sc == NULL)
1072                 return (ENXIO);
1073
1074         return (ENOTTY);
1075 }
1076
1077 static void
1078 xb_free_command(struct xb_command *cm)
1079 {
1080
1081         KASSERT((cm->cm_flags & XB_ON_XBQ_MASK) == 0,
1082             ("Freeing command that is still on a queue\n"));
1083
1084         cm->cm_flags = 0;
1085         cm->bp = NULL;
1086         cm->cm_complete = NULL;
1087         xb_enqueue_free(cm);
1088 }
1089
1090 /*
1091  * blkif_queue_request
1092  *
1093  * request block io
1094  * 
1095  * id: for guest use only.
1096  * operation: BLKIF_OP_{READ,WRITE,PROBE}
1097  * buffer: buffer to read/write into. this should be a
1098  *   virtual address in the guest os.
1099  */
1100 static struct xb_command *
1101 xb_bio_command(struct xb_softc *sc)
1102 {
1103         struct xb_command *cm;
1104         struct bio *bp;
1105
1106         if (unlikely(sc->connected != BLKIF_STATE_CONNECTED))
1107                 return (NULL);
1108
1109         bp = xb_dequeue_bio(sc);
1110         if (bp == NULL)
1111                 return (NULL);
1112
1113         if ((cm = xb_dequeue_free(sc)) == NULL) {
1114                 xb_requeue_bio(sc, bp);
1115                 return (NULL);
1116         }
1117
1118         if (gnttab_alloc_grant_references(sc->max_request_segments,
1119             &cm->gref_head) != 0) {
1120                 gnttab_request_free_callback(&sc->callback,
1121                         blkif_restart_queue_callback, sc,
1122                         sc->max_request_segments);
1123                 xb_requeue_bio(sc, bp);
1124                 xb_enqueue_free(cm);
1125                 sc->xb_flags |= XB_FROZEN;
1126                 return (NULL);
1127         }
1128
1129         cm->bp = bp;
1130         cm->data = bp->bio_data;
1131         cm->datalen = bp->bio_bcount;
1132         cm->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
1133             BLKIF_OP_WRITE;
1134         cm->sector_number = (blkif_sector_t)bp->bio_pblkno;
1135
1136         return (cm);
1137 }
1138
1139 static int
1140 blkif_queue_request(struct xb_softc *sc, struct xb_command *cm)
1141 {
1142         int     error;
1143
1144         error = bus_dmamap_load(sc->xb_io_dmat, cm->map, cm->data, cm->datalen,
1145             blkif_queue_cb, cm, 0);
1146         if (error == EINPROGRESS) {
1147                 printf("EINPROGRESS\n");
1148                 sc->xb_flags |= XB_FROZEN;
1149                 cm->cm_flags |= XB_CMD_FROZEN;
1150                 return (0);
1151         }
1152
1153         return (error);
1154 }
1155
1156 static void
1157 blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1158 {
1159         struct xb_softc *sc;
1160         struct xb_command *cm;
1161         blkif_request_t *ring_req;
1162         struct blkif_request_segment *sg;
1163         struct blkif_request_segment *last_block_sg;
1164         grant_ref_t *sg_ref;
1165         vm_paddr_t buffer_ma;
1166         uint64_t fsect, lsect;
1167         int ref;
1168         int op;
1169         int block_segs;
1170
1171         cm = arg;
1172         sc = cm->cm_sc;
1173
1174 //printf("%s: Start\n", __func__);
1175         if (error) {
1176                 printf("error %d in blkif_queue_cb\n", error);
1177                 cm->bp->bio_error = EIO;
1178                 biodone(cm->bp);
1179                 xb_free_command(cm);
1180                 return;
1181         }
1182
1183         /* Fill out a communications ring structure. */
1184         ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt);
1185         sc->ring.req_prod_pvt++;
1186         ring_req->id = cm->id;
1187         ring_req->operation = cm->operation;
1188         ring_req->sector_number = cm->sector_number;
1189         ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
1190         ring_req->nr_segments = nsegs;
1191         cm->nseg = nsegs;
1192
1193         block_segs    = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
1194         sg            = ring_req->seg;
1195         last_block_sg = sg + block_segs;
1196         sg_ref        = cm->sg_refs;
1197
1198         while (1) {
1199
1200                 while (sg < last_block_sg) {
1201                         buffer_ma = segs->ds_addr;
1202                         fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
1203                         lsect = fsect + (segs->ds_len  >> XBD_SECTOR_SHFT) - 1;
1204
1205                         KASSERT(lsect <= 7, ("XEN disk driver data cannot "
1206                                 "cross a page boundary"));
1207
1208                         /* install a grant reference. */
1209                         ref = gnttab_claim_grant_reference(&cm->gref_head);
1210
1211                         /*
1212                          * GNTTAB_LIST_END == 0xffffffff, but it is private
1213                          * to gnttab.c.
1214                          */
1215                         KASSERT(ref != ~0, ("grant_reference failed"));
1216
1217                         gnttab_grant_foreign_access_ref(
1218                                 ref,
1219                                 xenbus_get_otherend_id(sc->xb_dev),
1220                                 buffer_ma >> PAGE_SHIFT,
1221                                 ring_req->operation == BLKIF_OP_WRITE);
1222
1223                         *sg_ref = ref;
1224                         *sg = (struct blkif_request_segment) {
1225                                 .gref       = ref,
1226                                 .first_sect = fsect, 
1227                                 .last_sect  = lsect };
1228                         sg++;
1229                         sg_ref++;
1230                         segs++;
1231                         nsegs--;
1232                 }
1233                 block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
1234                 if (block_segs == 0)
1235                         break;
1236
1237                 sg = BLKRING_GET_SEG_BLOCK(&sc->ring, sc->ring.req_prod_pvt);
1238                 sc->ring.req_prod_pvt++;
1239                 last_block_sg = sg + block_segs;
1240         }
1241
1242         if (cm->operation == BLKIF_OP_READ)
1243                 op = BUS_DMASYNC_PREREAD;
1244         else if (cm->operation == BLKIF_OP_WRITE)
1245                 op = BUS_DMASYNC_PREWRITE;
1246         else
1247                 op = 0;
1248         bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
1249
1250         gnttab_free_grant_references(cm->gref_head);
1251
1252         xb_enqueue_busy(cm);
1253
1254         /*
1255          * This flag means that we're probably executing in the busdma swi
1256          * instead of in the startio context, so an explicit flush is needed.
1257          */
1258         if (cm->cm_flags & XB_CMD_FROZEN)
1259                 flush_requests(sc);
1260
1261 //printf("%s: Done\n", __func__);
1262         return;
1263 }
1264
1265 /*
1266  * Dequeue buffers and place them in the shared communication ring.
1267  * Return when no more requests can be accepted or all buffers have 
1268  * been queued.
1269  *
1270  * Signal XEN once the ring has been filled out.
1271  */
1272 static void
1273 xb_startio(struct xb_softc *sc)
1274 {
1275         struct xb_command *cm;
1276         int error, queued = 0;
1277
1278         mtx_assert(&sc->xb_io_lock, MA_OWNED);
1279
1280         if (sc->connected != BLKIF_STATE_CONNECTED)
1281                 return;
1282
1283         while (RING_FREE_REQUESTS(&sc->ring) >= sc->max_request_blocks) {
1284                 if (sc->xb_flags & XB_FROZEN)
1285                         break;
1286
1287                 cm = xb_dequeue_ready(sc);
1288
1289                 if (cm == NULL)
1290                     cm = xb_bio_command(sc);
1291
1292                 if (cm == NULL)
1293                         break;
1294
1295                 if ((error = blkif_queue_request(sc, cm)) != 0) {
1296                         printf("blkif_queue_request returned %d\n", error);
1297                         break;
1298                 }
1299                 queued++;
1300         }
1301
1302         if (queued != 0) 
1303                 flush_requests(sc);
1304 }
1305
1306 static void
1307 blkif_int(void *xsc)
1308 {
1309         struct xb_softc *sc = xsc;
1310         struct xb_command *cm;
1311         blkif_response_t *bret;
1312         RING_IDX i, rp;
1313         int op;
1314
1315         mtx_lock(&sc->xb_io_lock);
1316
1317         if (unlikely(sc->connected == BLKIF_STATE_DISCONNECTED)) {
1318                 mtx_unlock(&sc->xb_io_lock);
1319                 return;
1320         }
1321
1322  again:
1323         rp = sc->ring.sring->rsp_prod;
1324         rmb(); /* Ensure we see queued responses up to 'rp'. */
1325
1326         for (i = sc->ring.rsp_cons; i != rp;) {
1327                 bret = RING_GET_RESPONSE(&sc->ring, i);
1328                 cm   = &sc->shadow[bret->id];
1329
1330                 xb_remove_busy(cm);
1331                 i += blkif_completion(cm);
1332
1333                 if (cm->operation == BLKIF_OP_READ)
1334                         op = BUS_DMASYNC_POSTREAD;
1335                 else if (cm->operation == BLKIF_OP_WRITE)
1336                         op = BUS_DMASYNC_POSTWRITE;
1337                 else
1338                         op = 0;
1339                 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
1340                 bus_dmamap_unload(sc->xb_io_dmat, cm->map);
1341
1342                 /*
1343                  * If commands are completing then resources are probably
1344                  * being freed as well.  It's a cheap assumption even when
1345                  * wrong.
1346                  */
1347                 sc->xb_flags &= ~XB_FROZEN;
1348
1349                 /*
1350                  * Directly call the i/o complete routine to save an
1351                  * an indirection in the common case.
1352                  */
1353                 cm->status = bret->status;
1354                 if (cm->bp)
1355                         xb_bio_complete(sc, cm);
1356                 else if (cm->cm_complete)
1357                         (cm->cm_complete)(cm);
1358                 else
1359                         xb_free_command(cm);
1360         }
1361
1362         sc->ring.rsp_cons = i;
1363
1364         if (i != sc->ring.req_prod_pvt) {
1365                 int more_to_do;
1366                 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, more_to_do);
1367                 if (more_to_do)
1368                         goto again;
1369         } else {
1370                 sc->ring.sring->rsp_event = i + 1;
1371         }
1372
1373         xb_startio(sc);
1374
1375         if (unlikely(sc->connected == BLKIF_STATE_SUSPENDED))
1376                 wakeup(&sc->cm_busy);
1377
1378         mtx_unlock(&sc->xb_io_lock);
1379 }
1380
1381 static void 
1382 blkif_free(struct xb_softc *sc)
1383 {
1384         uint8_t *sring_page_ptr;
1385         int i;
1386         
1387         /* Prevent new requests being issued until we fix things up. */
1388         mtx_lock(&sc->xb_io_lock);
1389         sc->connected = BLKIF_STATE_DISCONNECTED; 
1390         mtx_unlock(&sc->xb_io_lock);
1391
1392         /* Free resources associated with old device channel. */
1393         if (sc->ring.sring != NULL) {
1394                 sring_page_ptr = (uint8_t *)sc->ring.sring;
1395                 for (i = 0; i < sc->ring_pages; i++) {
1396                         if (sc->ring_ref[i] != GRANT_INVALID_REF) {
1397                                 gnttab_end_foreign_access_ref(sc->ring_ref[i]);
1398                                 sc->ring_ref[i] = GRANT_INVALID_REF;
1399                         }
1400                         sring_page_ptr += PAGE_SIZE;
1401                 }
1402                 free(sc->ring.sring, M_XENBLOCKFRONT);
1403                 sc->ring.sring = NULL;
1404         }
1405
1406         if (sc->shadow) {
1407
1408                 for (i = 0; i < sc->max_requests; i++) {
1409                         struct xb_command *cm;
1410
1411                         cm = &sc->shadow[i];
1412                         if (cm->sg_refs != NULL) {
1413                                 free(cm->sg_refs, M_XENBLOCKFRONT);
1414                                 cm->sg_refs = NULL;
1415                         }
1416
1417                         bus_dmamap_destroy(sc->xb_io_dmat, cm->map);
1418                 }
1419                 free(sc->shadow, M_XENBLOCKFRONT);
1420                 sc->shadow = NULL;
1421
1422                 bus_dma_tag_destroy(sc->xb_io_dmat);
1423                 
1424                 xb_initq_free(sc);
1425                 xb_initq_ready(sc);
1426                 xb_initq_complete(sc);
1427         }
1428                 
1429         if (sc->irq) {
1430                 unbind_from_irqhandler(sc->irq);
1431                 sc->irq = 0;
1432         }
1433 }
1434
1435 static int
1436 blkif_completion(struct xb_command *s)
1437 {
1438 //printf("%s: Req %p(%d)\n", __func__, s, s->nseg);
1439         gnttab_end_foreign_access_references(s->nseg, s->sg_refs);
1440         return (BLKIF_SEGS_TO_BLOCKS(s->nseg));
1441 }
1442
1443 /* ** Driver registration ** */
1444 static device_method_t blkfront_methods[] = { 
1445         /* Device interface */ 
1446         DEVMETHOD(device_probe,         blkfront_probe), 
1447         DEVMETHOD(device_attach,        blkfront_attach), 
1448         DEVMETHOD(device_detach,        blkfront_detach), 
1449         DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
1450         DEVMETHOD(device_suspend,       blkfront_suspend), 
1451         DEVMETHOD(device_resume,        blkfront_resume), 
1452  
1453         /* Xenbus interface */
1454         DEVMETHOD(xenbus_otherend_changed, blkfront_backend_changed),
1455
1456         { 0, 0 } 
1457 }; 
1458
1459 static driver_t blkfront_driver = { 
1460         "xbd", 
1461         blkfront_methods, 
1462         sizeof(struct xb_softc),                      
1463 }; 
1464 devclass_t blkfront_devclass; 
1465  
1466 DRIVER_MODULE(xbd, xenbusb_front, blkfront_driver, blkfront_devclass, 0, 0);