]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/dev/xen/blkfront/blkfront.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / sys / dev / xen / blkfront / blkfront.c
1 /*
2  * XenBSD block device driver
3  *
4  * Copyright (c) 2009 Scott Long, Yahoo!
5  * Copyright (c) 2009 Frank Suchomel, Citrix
6  * Copyright (c) 2009 Doug F. Rabson, Citrix
7  * Copyright (c) 2005 Kip Macy
8  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
9  * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
10  *
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a copy
13  * of this software and associated documentation files (the "Software"), to
14  * deal in the Software without restriction, including without limitation the
15  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
16  * sell copies of the Software, and to permit persons to whom the Software is
17  * furnished to do so, subject to the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included in
20  * all copies or substantial portions of the Software.
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/kernel.h>
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39
40 #include <sys/bio.h>
41 #include <sys/bus.h>
42 #include <sys/conf.h>
43 #include <sys/module.h>
44 #include <sys/sysctl.h>
45
46 #include <machine/bus.h>
47 #include <sys/rman.h>
48 #include <machine/resource.h>
49 #include <machine/intr_machdep.h>
50 #include <machine/vmparam.h>
51 #include <sys/bus_dma.h>
52
53 #include <machine/_inttypes.h>
54 #include <machine/xen/xen-os.h>
55 #include <machine/xen/xenvar.h>
56 #include <machine/xen/xenfunc.h>
57
58 #include <xen/hypervisor.h>
59 #include <xen/xen_intr.h>
60 #include <xen/evtchn.h>
61 #include <xen/gnttab.h>
62 #include <xen/interface/grant_table.h>
63 #include <xen/interface/io/protocols.h>
64 #include <xen/xenbus/xenbusvar.h>
65
66 #include <geom/geom_disk.h>
67
68 #include <dev/xen/blkfront/block.h>
69
70 #include "xenbus_if.h"
71
72 /* prototypes */
73 static void xb_free_command(struct xb_command *cm);
74 static void xb_startio(struct xb_softc *sc);
75 static void blkfront_connect(struct xb_softc *);
76 static void blkfront_closing(device_t);
77 static int blkfront_detach(device_t);
78 static int setup_blkring(struct xb_softc *);
79 static void blkif_int(void *);
80 static void blkfront_initialize(struct xb_softc *);
81 static int blkif_completion(struct xb_command *);
82 static void blkif_free(struct xb_softc *);
83 static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int);
84
85 static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data");
86
87 #define GRANT_INVALID_REF 0
88
89 /* Control whether runtime update of vbds is enabled. */
90 #define ENABLE_VBD_UPDATE 0
91
92 #if ENABLE_VBD_UPDATE
93 static void vbd_update(void);
94 #endif
95
96 #define BLKIF_STATE_DISCONNECTED 0
97 #define BLKIF_STATE_CONNECTED    1
98 #define BLKIF_STATE_SUSPENDED    2
99
100 #ifdef notyet
101 static char *blkif_state_name[] = {
102         [BLKIF_STATE_DISCONNECTED] = "disconnected",
103         [BLKIF_STATE_CONNECTED]    = "connected",
104         [BLKIF_STATE_SUSPENDED]    = "closed",
105 };
106
107 static char * blkif_status_name[] = {
108         [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
109         [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
110         [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
111         [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
112 };
113 #endif
114
115 #if 0
116 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
117 #else
118 #define DPRINTK(fmt, args...) 
119 #endif
120
121 static int blkif_open(struct disk *dp);
122 static int blkif_close(struct disk *dp);
123 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
124 static int blkif_queue_request(struct xb_softc *sc, struct xb_command *cm);
125 static void xb_strategy(struct bio *bp);
126
127 // In order to quiesce the device during kernel dumps, outstanding requests to
128 // DOM0 for disk reads/writes need to be accounted for.
129 static  int     xb_dump(void *, void *, vm_offset_t, off_t, size_t);
130
131 /* XXX move to xb_vbd.c when VBD update support is added */
132 #define MAX_VBDS 64
133
134 #define XBD_SECTOR_SIZE         512     /* XXX: assume for now */
135 #define XBD_SECTOR_SHFT         9
136
137 /*
138  * Translate Linux major/minor to an appropriate name and unit
139  * number. For HVM guests, this allows us to use the same drive names
140  * with blkfront as the emulated drives, easing transition slightly.
141  */
142 static void
143 blkfront_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
144 {
145         static struct vdev_info {
146                 int major;
147                 int shift;
148                 int base;
149                 const char *name;
150         } info[] = {
151                 {3,     6,      0,      "ada"}, /* ide0 */
152                 {22,    6,      2,      "ada"}, /* ide1 */
153                 {33,    6,      4,      "ada"}, /* ide2 */
154                 {34,    6,      6,      "ada"}, /* ide3 */
155                 {56,    6,      8,      "ada"}, /* ide4 */
156                 {57,    6,      10,     "ada"}, /* ide5 */
157                 {88,    6,      12,     "ada"}, /* ide6 */
158                 {89,    6,      14,     "ada"}, /* ide7 */
159                 {90,    6,      16,     "ada"}, /* ide8 */
160                 {91,    6,      18,     "ada"}, /* ide9 */
161
162                 {8,     4,      0,      "da"},  /* scsi disk0 */
163                 {65,    4,      16,     "da"},  /* scsi disk1 */
164                 {66,    4,      32,     "da"},  /* scsi disk2 */
165                 {67,    4,      48,     "da"},  /* scsi disk3 */
166                 {68,    4,      64,     "da"},  /* scsi disk4 */
167                 {69,    4,      80,     "da"},  /* scsi disk5 */
168                 {70,    4,      96,     "da"},  /* scsi disk6 */
169                 {71,    4,      112,    "da"},  /* scsi disk7 */
170                 {128,   4,      128,    "da"},  /* scsi disk8 */
171                 {129,   4,      144,    "da"},  /* scsi disk9 */
172                 {130,   4,      160,    "da"},  /* scsi disk10 */
173                 {131,   4,      176,    "da"},  /* scsi disk11 */
174                 {132,   4,      192,    "da"},  /* scsi disk12 */
175                 {133,   4,      208,    "da"},  /* scsi disk13 */
176                 {134,   4,      224,    "da"},  /* scsi disk14 */
177                 {135,   4,      240,    "da"},  /* scsi disk15 */
178
179                 {202,   4,      0,      "xbd"}, /* xbd */
180
181                 {0,     0,      0,      NULL},
182         };
183         int major = vdevice >> 8;
184         int minor = vdevice & 0xff;
185         int i;
186
187         if (vdevice & (1 << 28)) {
188                 *unit = (vdevice & ((1 << 28) - 1)) >> 8;
189                 *name = "xbd";
190                 return;
191         }
192
193         for (i = 0; info[i].major; i++) {
194                 if (info[i].major == major) {
195                         *unit = info[i].base + (minor >> info[i].shift);
196                         *name = info[i].name;
197                         return;
198                 }
199         }
200
201         *unit = minor >> 4;
202         *name = "xbd";
203 }
204
205 int
206 xlvbd_add(struct xb_softc *sc, blkif_sector_t sectors,
207     int vdevice, uint16_t vdisk_info, unsigned long sector_size)
208 {
209         int     unit, error = 0;
210         const char *name;
211
212         blkfront_vdevice_to_unit(vdevice, &unit, &name);
213
214         sc->xb_unit = unit;
215
216         if (strcmp(name, "xbd"))
217                 device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit);
218
219         sc->xb_disk = disk_alloc();
220         sc->xb_disk->d_unit = sc->xb_unit;
221         sc->xb_disk->d_open = blkif_open;
222         sc->xb_disk->d_close = blkif_close;
223         sc->xb_disk->d_ioctl = blkif_ioctl;
224         sc->xb_disk->d_strategy = xb_strategy;
225         sc->xb_disk->d_dump = xb_dump;
226         sc->xb_disk->d_name = name;
227         sc->xb_disk->d_drv1 = sc;
228         sc->xb_disk->d_sectorsize = sector_size;
229
230         sc->xb_disk->d_mediasize = sectors * sector_size;
231         sc->xb_disk->d_maxsize = sc->max_request_size;
232         sc->xb_disk->d_flags = 0;
233         disk_create(sc->xb_disk, DISK_VERSION);
234
235         return error;
236 }
237
238 /************************ end VBD support *****************/
239
240 /*
241  * Read/write routine for a buffer.  Finds the proper unit, place it on
242  * the sortq and kick the controller.
243  */
244 static void
245 xb_strategy(struct bio *bp)
246 {
247         struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
248
249         /* bogus disk? */
250         if (sc == NULL) {
251                 bp->bio_error = EINVAL;
252                 bp->bio_flags |= BIO_ERROR;
253                 bp->bio_resid = bp->bio_bcount;
254                 biodone(bp);
255                 return;
256         }
257
258         /*
259          * Place it in the queue of disk activities for this disk
260          */
261         mtx_lock(&sc->xb_io_lock);
262
263         xb_enqueue_bio(sc, bp);
264         xb_startio(sc);
265
266         mtx_unlock(&sc->xb_io_lock);
267         return;
268 }
269
270 static void
271 xb_bio_complete(struct xb_softc *sc, struct xb_command *cm)
272 {
273         struct bio *bp;
274
275         bp = cm->bp;
276
277         if ( unlikely(cm->status != BLKIF_RSP_OKAY) ) {
278                 disk_err(bp, "disk error" , -1, 0);
279                 printf(" status: %x\n", cm->status);
280                 bp->bio_flags |= BIO_ERROR;
281         }
282
283         if (bp->bio_flags & BIO_ERROR)
284                 bp->bio_error = EIO;
285         else
286                 bp->bio_resid = 0;
287
288         xb_free_command(cm);
289         biodone(bp);
290 }
291
292 // Quiesce the disk writes for a dump file before allowing the next buffer.
293 static void
294 xb_quiesce(struct xb_softc *sc)
295 {
296         int             mtd;
297
298         // While there are outstanding requests
299         while (!TAILQ_EMPTY(&sc->cm_busy)) {
300                 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, mtd);
301                 if (mtd) {
302                         /* Recieved request completions, update queue. */
303                         blkif_int(sc);
304                 }
305                 if (!TAILQ_EMPTY(&sc->cm_busy)) {
306                         /*
307                          * Still pending requests, wait for the disk i/o
308                          * to complete.
309                          */
310                         HYPERVISOR_yield();
311                 }
312         }
313 }
314
315 /* Kernel dump function for a paravirtualized disk device */
316 static void
317 xb_dump_complete(struct xb_command *cm)
318 {
319
320         xb_enqueue_complete(cm);
321 }
322
323 static int
324 xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
325         size_t length)
326 {
327         struct  disk    *dp = arg;
328         struct xb_softc *sc = (struct xb_softc *) dp->d_drv1;
329         struct xb_command *cm;
330         size_t          chunk;
331         int             sbp;
332         int             rc = 0;
333
334         if (length <= 0)
335                 return (rc);
336
337         xb_quiesce(sc); /* All quiet on the western front. */
338
339         /*
340          * If this lock is held, then this module is failing, and a
341          * successful kernel dump is highly unlikely anyway.
342          */
343         mtx_lock(&sc->xb_io_lock);
344
345         /* Split the 64KB block as needed */
346         for (sbp=0; length > 0; sbp++) {
347                 cm = xb_dequeue_free(sc);
348                 if (cm == NULL) {
349                         mtx_unlock(&sc->xb_io_lock);
350                         device_printf(sc->xb_dev, "dump: no more commands?\n");
351                         return (EBUSY);
352                 }
353
354                 if (gnttab_alloc_grant_references(sc->max_request_segments,
355                                                   &cm->gref_head) != 0) {
356                         xb_free_command(cm);
357                         mtx_unlock(&sc->xb_io_lock);
358                         device_printf(sc->xb_dev, "no more grant allocs?\n");
359                         return (EBUSY);
360                 }
361
362                 chunk = length > sc->max_request_size
363                       ? sc->max_request_size : length;
364                 cm->data = virtual;
365                 cm->datalen = chunk;
366                 cm->operation = BLKIF_OP_WRITE;
367                 cm->sector_number = offset / dp->d_sectorsize;
368                 cm->cm_complete = xb_dump_complete;
369
370                 xb_enqueue_ready(cm);
371
372                 length -= chunk;
373                 offset += chunk;
374                 virtual = (char *) virtual + chunk;
375         }
376
377         /* Tell DOM0 to do the I/O */
378         xb_startio(sc);
379         mtx_unlock(&sc->xb_io_lock);
380
381         /* Poll for the completion. */
382         xb_quiesce(sc); /* All quite on the eastern front */
383
384         /* If there were any errors, bail out... */
385         while ((cm = xb_dequeue_complete(sc)) != NULL) {
386                 if (cm->status != BLKIF_RSP_OKAY) {
387                         device_printf(sc->xb_dev,
388                             "Dump I/O failed at sector %jd\n",
389                             cm->sector_number);
390                         rc = EIO;
391                 }
392                 xb_free_command(cm);
393         }
394
395         return (rc);
396 }
397
398
399 static int
400 blkfront_probe(device_t dev)
401 {
402
403         if (!strcmp(xenbus_get_type(dev), "vbd")) {
404                 device_set_desc(dev, "Virtual Block Device");
405                 device_quiet(dev);
406                 return (0);
407         }
408
409         return (ENXIO);
410 }
411
412 static void
413 xb_setup_sysctl(struct xb_softc *xb)
414 {
415         struct sysctl_ctx_list *sysctl_ctx = NULL;
416         struct sysctl_oid      *sysctl_tree = NULL;
417         
418         sysctl_ctx = device_get_sysctl_ctx(xb->xb_dev);
419         if (sysctl_ctx == NULL)
420                 return;
421
422         sysctl_tree = device_get_sysctl_tree(xb->xb_dev);
423         if (sysctl_tree == NULL)
424                 return;
425
426         SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
427                         "max_requests", CTLFLAG_RD, &xb->max_requests, -1,
428                         "maximum outstanding requests (negotiated)");
429
430         SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
431                         "max_request_segments", CTLFLAG_RD,
432                         &xb->max_request_segments, 0,
433                         "maximum number of pages per requests (negotiated)");
434
435         SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
436                         "max_request_size", CTLFLAG_RD,
437                         &xb->max_request_size, 0,
438                         "maximum size in bytes of a request (negotiated)");
439
440         SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
441                         "ring_pages", CTLFLAG_RD,
442                         &xb->ring_pages, 0,
443                         "communication channel pages (negotiated)");
444 }
445
446 /*
447  * Setup supplies the backend dir, virtual device.  We place an event
448  * channel and shared frame entries.  We watch backend to wait if it's
449  * ok.
450  */
451 static int
452 blkfront_attach(device_t dev)
453 {
454         struct xb_softc *sc;
455         const char *name;
456         uint32_t vdevice;
457         int error;
458         int i;
459         int unit;
460
461         /* FIXME: Use dynamic device id if this is not set. */
462         error = xs_scanf(XST_NIL, xenbus_get_node(dev),
463             "virtual-device", NULL, "%" PRIu32, &vdevice);
464         if (error) {
465                 xenbus_dev_fatal(dev, error, "reading virtual-device");
466                 device_printf(dev, "Couldn't determine virtual device.\n");
467                 return (error);
468         }
469
470         blkfront_vdevice_to_unit(vdevice, &unit, &name);
471         if (!strcmp(name, "xbd"))
472                 device_set_unit(dev, unit);
473
474         sc = device_get_softc(dev);
475         mtx_init(&sc->xb_io_lock, "blkfront i/o lock", NULL, MTX_DEF);
476         xb_initq_free(sc);
477         xb_initq_busy(sc);
478         xb_initq_ready(sc);
479         xb_initq_complete(sc);
480         xb_initq_bio(sc);
481         for (i = 0; i < XBF_MAX_RING_PAGES; i++)
482                 sc->ring_ref[i] = GRANT_INVALID_REF;
483
484         sc->xb_dev = dev;
485         sc->vdevice = vdevice;
486         sc->connected = BLKIF_STATE_DISCONNECTED;
487
488         xb_setup_sysctl(sc);
489
490         /* Wait for backend device to publish its protocol capabilities. */
491         xenbus_set_state(dev, XenbusStateInitialising);
492
493         return (0);
494 }
495
496 static int
497 blkfront_suspend(device_t dev)
498 {
499         struct xb_softc *sc = device_get_softc(dev);
500         int retval;
501         int saved_state;
502
503         /* Prevent new requests being issued until we fix things up. */
504         mtx_lock(&sc->xb_io_lock);
505         saved_state = sc->connected;
506         sc->connected = BLKIF_STATE_SUSPENDED;
507
508         /* Wait for outstanding I/O to drain. */
509         retval = 0;
510         while (TAILQ_EMPTY(&sc->cm_busy) == 0) {
511                 if (msleep(&sc->cm_busy, &sc->xb_io_lock,
512                            PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
513                         retval = EBUSY;
514                         break;
515                 }
516         }
517         mtx_unlock(&sc->xb_io_lock);
518
519         if (retval != 0)
520                 sc->connected = saved_state;
521
522         return (retval);
523 }
524
525 static int
526 blkfront_resume(device_t dev)
527 {
528         struct xb_softc *sc = device_get_softc(dev);
529
530         DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
531
532         blkif_free(sc);
533         blkfront_initialize(sc);
534         return (0);
535 }
536
537 static void
538 blkfront_initialize(struct xb_softc *sc)
539 {
540         const char *otherend_path;
541         const char *node_path;
542         uint32_t max_ring_page_order;
543         int error;
544         int i;
545
546         if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising) {
547                 /* Initialization has already been performed. */
548                 return;
549         }
550
551         /*
552          * Protocol defaults valid even if negotiation for a
553          * setting fails.
554          */
555         max_ring_page_order = 0;
556         sc->ring_pages = 1;
557         sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
558         sc->max_request_size = XBF_SEGS_TO_SIZE(sc->max_request_segments);
559         sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
560
561         /*
562          * Protocol negotiation.
563          *
564          * \note xs_gather() returns on the first encountered error, so
565          *       we must use independant calls in order to guarantee
566          *       we don't miss information in a sparsly populated back-end
567          *       tree.
568          *
569          * \note xs_scanf() does not update variables for unmatched
570          *       fields.
571          */
572         otherend_path = xenbus_get_otherend_path(sc->xb_dev);
573         node_path = xenbus_get_node(sc->xb_dev);
574
575         /* Support both backend schemes for relaying ring page limits. */
576         (void)xs_scanf(XST_NIL, otherend_path,
577                        "max-ring-page-order", NULL, "%" PRIu32,
578                        &max_ring_page_order);
579         sc->ring_pages = 1 << max_ring_page_order;
580         (void)xs_scanf(XST_NIL, otherend_path,
581                        "max-ring-pages", NULL, "%" PRIu32,
582                        &sc->ring_pages);
583         if (sc->ring_pages < 1)
584                 sc->ring_pages = 1;
585
586         sc->max_requests = BLKIF_MAX_RING_REQUESTS(sc->ring_pages * PAGE_SIZE);
587         (void)xs_scanf(XST_NIL, otherend_path,
588                        "max-requests", NULL, "%" PRIu32,
589                        &sc->max_requests);
590
591         (void)xs_scanf(XST_NIL, otherend_path,
592                        "max-request-segments", NULL, "%" PRIu32,
593                        &sc->max_request_segments);
594
595         (void)xs_scanf(XST_NIL, otherend_path,
596                        "max-request-size", NULL, "%" PRIu32,
597                        &sc->max_request_size);
598
599         if (sc->ring_pages > XBF_MAX_RING_PAGES) {
600                 device_printf(sc->xb_dev, "Back-end specified ring-pages of "
601                               "%u limited to front-end limit of %zu.\n",
602                               sc->ring_pages, XBF_MAX_RING_PAGES);
603                 sc->ring_pages = XBF_MAX_RING_PAGES;
604         }
605
606         if (powerof2(sc->ring_pages) == 0) {
607                 uint32_t new_page_limit;
608
609                 new_page_limit = 0x01 << (fls(sc->ring_pages) - 1);
610                 device_printf(sc->xb_dev, "Back-end specified ring-pages of "
611                               "%u is not a power of 2. Limited to %u.\n",
612                               sc->ring_pages, new_page_limit);
613                 sc->ring_pages = new_page_limit;
614         }
615
616         if (sc->max_requests > XBF_MAX_REQUESTS) {
617                 device_printf(sc->xb_dev, "Back-end specified max_requests of "
618                               "%u limited to front-end limit of %u.\n",
619                               sc->max_requests, XBF_MAX_REQUESTS);
620                 sc->max_requests = XBF_MAX_REQUESTS;
621         }
622
623         if (sc->max_request_segments > XBF_MAX_SEGMENTS_PER_REQUEST) {
624                 device_printf(sc->xb_dev, "Back-end specified "
625                               "max_request_segments of %u limited to "
626                               "front-end limit of %u.\n",
627                               sc->max_request_segments,
628                               XBF_MAX_SEGMENTS_PER_REQUEST);
629                 sc->max_request_segments = XBF_MAX_SEGMENTS_PER_REQUEST;
630         }
631
632         if (sc->max_request_size > XBF_MAX_REQUEST_SIZE) {
633                 device_printf(sc->xb_dev, "Back-end specified "
634                               "max_request_size of %u limited to front-end "
635                               "limit of %u.\n", sc->max_request_size,
636                               XBF_MAX_REQUEST_SIZE);
637                 sc->max_request_size = XBF_MAX_REQUEST_SIZE;
638         }
639  
640         if (sc->max_request_size > XBF_SEGS_TO_SIZE(sc->max_request_segments)) {
641                 device_printf(sc->xb_dev, "Back-end specified "
642                               "max_request_size of %u limited to front-end "
643                               "limit of %u.  (Too few segments.)\n",
644                               sc->max_request_size,
645                               XBF_SEGS_TO_SIZE(sc->max_request_segments));
646                 sc->max_request_size =
647                     XBF_SEGS_TO_SIZE(sc->max_request_segments);
648         }
649
650         sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
651
652         /* Allocate datastructures based on negotiated values. */
653         error = bus_dma_tag_create(bus_get_dma_tag(sc->xb_dev), /* parent */
654                                    512, PAGE_SIZE,      /* algnmnt, boundary */
655                                    BUS_SPACE_MAXADDR,   /* lowaddr */
656                                    BUS_SPACE_MAXADDR,   /* highaddr */
657                                    NULL, NULL,          /* filter, filterarg */
658                                    sc->max_request_size,
659                                    sc->max_request_segments,
660                                    PAGE_SIZE,           /* maxsegsize */
661                                    BUS_DMA_ALLOCNOW,    /* flags */
662                                    busdma_lock_mutex,   /* lockfunc */
663                                    &sc->xb_io_lock,     /* lockarg */
664                                    &sc->xb_io_dmat);
665         if (error != 0) {
666                 xenbus_dev_fatal(sc->xb_dev, error,
667                                  "Cannot allocate parent DMA tag\n");
668                 return;
669         }
670
671         /* Per-transaction data allocation. */
672         sc->shadow = malloc(sizeof(*sc->shadow) * sc->max_requests,
673                             M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
674         if (sc->shadow == NULL) {
675                 bus_dma_tag_destroy(sc->xb_io_dmat);
676                 xenbus_dev_fatal(sc->xb_dev, error,
677                                  "Cannot allocate request structures\n");
678                 return;
679         }
680
681         for (i = 0; i < sc->max_requests; i++) {
682                 struct xb_command *cm;
683
684                 cm = &sc->shadow[i];
685                 cm->sg_refs = malloc(sizeof(grant_ref_t)
686                                    * sc->max_request_segments,
687                                      M_XENBLOCKFRONT, M_NOWAIT);
688                 if (cm->sg_refs == NULL)
689                         break;
690                 cm->id = i;
691                 cm->cm_sc = sc;
692                 if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0)
693                         break;
694                 xb_free_command(cm);
695         }
696
697         if (setup_blkring(sc) != 0)
698                 return;
699
700         /* Support both backend schemes for relaying ring page limits. */
701         if (sc->ring_pages > 1) {
702                 error = xs_printf(XST_NIL, node_path,
703                                  "num-ring-pages","%u", sc->ring_pages);
704                 if (error) {
705                         xenbus_dev_fatal(sc->xb_dev, error,
706                                          "writing %s/num-ring-pages",
707                                          node_path);
708                         return;
709                 }
710
711                 error = xs_printf(XST_NIL, node_path,
712                                  "ring-page-order", "%u",
713                                  fls(sc->ring_pages) - 1);
714                 if (error) {
715                         xenbus_dev_fatal(sc->xb_dev, error,
716                                          "writing %s/ring-page-order",
717                                          node_path);
718                         return;
719                 }
720         }
721
722         error = xs_printf(XST_NIL, node_path,
723                          "max-requests","%u", sc->max_requests);
724         if (error) {
725                 xenbus_dev_fatal(sc->xb_dev, error,
726                                  "writing %s/max-requests",
727                                  node_path);
728                 return;
729         }
730
731         error = xs_printf(XST_NIL, node_path,
732                          "max-request-segments","%u", sc->max_request_segments);
733         if (error) {
734                 xenbus_dev_fatal(sc->xb_dev, error,
735                                  "writing %s/max-request-segments",
736                                  node_path);
737                 return;
738         }
739
740         error = xs_printf(XST_NIL, node_path,
741                          "max-request-size","%u", sc->max_request_size);
742         if (error) {
743                 xenbus_dev_fatal(sc->xb_dev, error,
744                                  "writing %s/max-request-size",
745                                  node_path);
746                 return;
747         }
748
749         error = xs_printf(XST_NIL, node_path, "event-channel",
750                           "%u", irq_to_evtchn_port(sc->irq));
751         if (error) {
752                 xenbus_dev_fatal(sc->xb_dev, error,
753                                  "writing %s/event-channel",
754                                  node_path);
755                 return;
756         }
757
758         error = xs_printf(XST_NIL, node_path,
759                           "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
760         if (error) {
761                 xenbus_dev_fatal(sc->xb_dev, error,
762                                  "writing %s/protocol",
763                                  node_path);
764                 return;
765         }
766
767         xenbus_set_state(sc->xb_dev, XenbusStateInitialised);
768 }
769
770 static int 
771 setup_blkring(struct xb_softc *sc)
772 {
773         blkif_sring_t *sring;
774         uintptr_t sring_page_addr;
775         int error;
776         int i;
777
778         sring = malloc(sc->ring_pages * PAGE_SIZE, M_XENBLOCKFRONT,
779                        M_NOWAIT|M_ZERO);
780         if (sring == NULL) {
781                 xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring");
782                 return (ENOMEM);
783         }
784         SHARED_RING_INIT(sring);
785         FRONT_RING_INIT(&sc->ring, sring, sc->ring_pages * PAGE_SIZE);
786
787         for (i = 0, sring_page_addr = (uintptr_t)sring;
788              i < sc->ring_pages;
789              i++, sring_page_addr += PAGE_SIZE) {
790
791                 error = xenbus_grant_ring(sc->xb_dev,
792                     (vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->ring_ref[i]);
793                 if (error) {
794                         xenbus_dev_fatal(sc->xb_dev, error,
795                                          "granting ring_ref(%d)", i);
796                         return (error);
797                 }
798         }
799         if (sc->ring_pages == 1) {
800                 error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
801                                   "ring-ref", "%u", sc->ring_ref[0]);
802                 if (error) {
803                         xenbus_dev_fatal(sc->xb_dev, error,
804                                          "writing %s/ring-ref",
805                                          xenbus_get_node(sc->xb_dev));
806                         return (error);
807                 }
808         } else {
809                 for (i = 0; i < sc->ring_pages; i++) {
810                         char ring_ref_name[]= "ring_refXX";
811
812                         snprintf(ring_ref_name, sizeof(ring_ref_name),
813                                  "ring-ref%u", i);
814                         error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
815                                          ring_ref_name, "%u", sc->ring_ref[i]);
816                         if (error) {
817                                 xenbus_dev_fatal(sc->xb_dev, error,
818                                                  "writing %s/%s",
819                                                  xenbus_get_node(sc->xb_dev),
820                                                  ring_ref_name);
821                                 return (error);
822                         }
823                 }
824         }
825
826         error = bind_listening_port_to_irqhandler(
827             xenbus_get_otherend_id(sc->xb_dev),
828             "xbd", (driver_intr_t *)blkif_int, sc,
829             INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq);
830         if (error) {
831                 xenbus_dev_fatal(sc->xb_dev, error,
832                     "bind_evtchn_to_irqhandler failed");
833                 return (error);
834         }
835
836         return (0);
837 }
838
839 /**
840  * Callback received when the backend's state changes.
841  */
842 static void
843 blkfront_backend_changed(device_t dev, XenbusState backend_state)
844 {
845         struct xb_softc *sc = device_get_softc(dev);
846
847         DPRINTK("backend_state=%d\n", backend_state);
848
849         switch (backend_state) {
850         case XenbusStateUnknown:
851         case XenbusStateInitialising:
852         case XenbusStateReconfigured:
853         case XenbusStateReconfiguring:
854         case XenbusStateClosed:
855                 break;
856
857         case XenbusStateInitWait:
858         case XenbusStateInitialised:
859                 blkfront_initialize(sc);
860                 break;
861
862         case XenbusStateConnected:
863                 blkfront_initialize(sc);
864                 blkfront_connect(sc);
865                 break;
866
867         case XenbusStateClosing:
868                 if (sc->users > 0)
869                         xenbus_dev_error(dev, -EBUSY,
870                                          "Device in use; refusing to close");
871                 else
872                         blkfront_closing(dev);
873                 break;  
874         }
875 }
876
877 /* 
878 ** Invoked when the backend is finally 'ready' (and has published
879 ** the details about the physical device - #sectors, size, etc). 
880 */
881 static void 
882 blkfront_connect(struct xb_softc *sc)
883 {
884         device_t dev = sc->xb_dev;
885         unsigned long sectors, sector_size;
886         unsigned int binfo;
887         int err, feature_barrier;
888
889         if( (sc->connected == BLKIF_STATE_CONNECTED) || 
890             (sc->connected == BLKIF_STATE_SUSPENDED) )
891                 return;
892
893         DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
894
895         err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
896                         "sectors", "%lu", &sectors,
897                         "info", "%u", &binfo,
898                         "sector-size", "%lu", &sector_size,
899                         NULL);
900         if (err) {
901                 xenbus_dev_fatal(dev, err,
902                     "reading backend fields at %s",
903                     xenbus_get_otherend_path(dev));
904                 return;
905         }
906         err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
907                         "feature-barrier", "%lu", &feature_barrier,
908                         NULL);
909         if (!err || feature_barrier)
910                 sc->xb_flags |= XB_BARRIER;
911
912         if (sc->xb_disk == NULL) {
913                 device_printf(dev, "%juMB <%s> at %s",
914                     (uintmax_t) sectors / (1048576 / sector_size),
915                     device_get_desc(dev),
916                     xenbus_get_node(dev));
917                 bus_print_child_footer(device_get_parent(dev), dev);
918
919                 xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size);
920         }
921
922         (void)xenbus_set_state(dev, XenbusStateConnected); 
923
924         /* Kick pending requests. */
925         mtx_lock(&sc->xb_io_lock);
926         sc->connected = BLKIF_STATE_CONNECTED;
927         xb_startio(sc);
928         sc->xb_flags |= XB_READY;
929         mtx_unlock(&sc->xb_io_lock);
930 }
931
932 /**
933  * Handle the change of state of the backend to Closing.  We must delete our
934  * device-layer structures now, to ensure that writes are flushed through to
935  * the backend.  Once this is done, we can switch to Closed in
936  * acknowledgement.
937  */
938 static void
939 blkfront_closing(device_t dev)
940 {
941         struct xb_softc *sc = device_get_softc(dev);
942
943         xenbus_set_state(dev, XenbusStateClosing);
944
945         DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
946
947         if (sc->xb_disk != NULL) {
948                 disk_destroy(sc->xb_disk);
949                 sc->xb_disk = NULL;
950         }
951
952         xenbus_set_state(dev, XenbusStateClosed); 
953 }
954
955
956 static int
957 blkfront_detach(device_t dev)
958 {
959         struct xb_softc *sc = device_get_softc(dev);
960
961         DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
962
963         blkif_free(sc);
964         mtx_destroy(&sc->xb_io_lock);
965
966         return 0;
967 }
968
969
970 static inline void 
971 flush_requests(struct xb_softc *sc)
972 {
973         int notify;
974
975         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify);
976
977         if (notify)
978                 notify_remote_via_irq(sc->irq);
979 }
980
981 static void
982 blkif_restart_queue_callback(void *arg)
983 {
984         struct xb_softc *sc = arg;
985
986         mtx_lock(&sc->xb_io_lock);
987
988         xb_startio(sc);
989
990         mtx_unlock(&sc->xb_io_lock);
991 }
992
993 static int
994 blkif_open(struct disk *dp)
995 {
996         struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
997
998         if (sc == NULL) {
999                 printf("xb%d: not found", sc->xb_unit);
1000                 return (ENXIO);
1001         }
1002
1003         sc->xb_flags |= XB_OPEN;
1004         sc->users++;
1005         return (0);
1006 }
1007
1008 static int
1009 blkif_close(struct disk *dp)
1010 {
1011         struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
1012
1013         if (sc == NULL)
1014                 return (ENXIO);
1015         sc->xb_flags &= ~XB_OPEN;
1016         if (--(sc->users) == 0) {
1017                 /*
1018                  * Check whether we have been instructed to close.  We will
1019                  * have ignored this request initially, as the device was
1020                  * still mounted.
1021                  */
1022                 if (xenbus_get_otherend_state(sc->xb_dev) == XenbusStateClosing)
1023                         blkfront_closing(sc->xb_dev);
1024         }
1025         return (0);
1026 }
1027
1028 static int
1029 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
1030 {
1031         struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
1032
1033         if (sc == NULL)
1034                 return (ENXIO);
1035
1036         return (ENOTTY);
1037 }
1038
1039 static void
1040 xb_free_command(struct xb_command *cm)
1041 {
1042
1043         KASSERT((cm->cm_flags & XB_ON_XBQ_MASK) == 0,
1044             ("Freeing command that is still on a queue\n"));
1045
1046         cm->cm_flags = 0;
1047         cm->bp = NULL;
1048         cm->cm_complete = NULL;
1049         xb_enqueue_free(cm);
1050 }
1051
1052 /*
1053  * blkif_queue_request
1054  *
1055  * request block io
1056  * 
1057  * id: for guest use only.
1058  * operation: BLKIF_OP_{READ,WRITE,PROBE}
1059  * buffer: buffer to read/write into. this should be a
1060  *   virtual address in the guest os.
1061  */
1062 static struct xb_command *
1063 xb_bio_command(struct xb_softc *sc)
1064 {
1065         struct xb_command *cm;
1066         struct bio *bp;
1067
1068         if (unlikely(sc->connected != BLKIF_STATE_CONNECTED))
1069                 return (NULL);
1070
1071         bp = xb_dequeue_bio(sc);
1072         if (bp == NULL)
1073                 return (NULL);
1074
1075         if ((cm = xb_dequeue_free(sc)) == NULL) {
1076                 xb_requeue_bio(sc, bp);
1077                 return (NULL);
1078         }
1079
1080         if (gnttab_alloc_grant_references(sc->max_request_segments,
1081             &cm->gref_head) != 0) {
1082                 gnttab_request_free_callback(&sc->callback,
1083                         blkif_restart_queue_callback, sc,
1084                         sc->max_request_segments);
1085                 xb_requeue_bio(sc, bp);
1086                 xb_enqueue_free(cm);
1087                 sc->xb_flags |= XB_FROZEN;
1088                 return (NULL);
1089         }
1090
1091         cm->bp = bp;
1092         cm->data = bp->bio_data;
1093         cm->datalen = bp->bio_bcount;
1094         cm->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
1095             BLKIF_OP_WRITE;
1096         cm->sector_number = (blkif_sector_t)bp->bio_pblkno;
1097
1098         return (cm);
1099 }
1100
1101 static int
1102 blkif_queue_request(struct xb_softc *sc, struct xb_command *cm)
1103 {
1104         int     error;
1105
1106         error = bus_dmamap_load(sc->xb_io_dmat, cm->map, cm->data, cm->datalen,
1107             blkif_queue_cb, cm, 0);
1108         if (error == EINPROGRESS) {
1109                 printf("EINPROGRESS\n");
1110                 sc->xb_flags |= XB_FROZEN;
1111                 cm->cm_flags |= XB_CMD_FROZEN;
1112                 return (0);
1113         }
1114
1115         return (error);
1116 }
1117
1118 static void
1119 blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1120 {
1121         struct xb_softc *sc;
1122         struct xb_command *cm;
1123         blkif_request_t *ring_req;
1124         struct blkif_request_segment *sg;
1125         struct blkif_request_segment *last_block_sg;
1126         grant_ref_t *sg_ref;
1127         vm_paddr_t buffer_ma;
1128         uint64_t fsect, lsect;
1129         int ref;
1130         int op;
1131         int block_segs;
1132
1133         cm = arg;
1134         sc = cm->cm_sc;
1135
1136 //printf("%s: Start\n", __func__);
1137         if (error) {
1138                 printf("error %d in blkif_queue_cb\n", error);
1139                 cm->bp->bio_error = EIO;
1140                 biodone(cm->bp);
1141                 xb_free_command(cm);
1142                 return;
1143         }
1144
1145         /* Fill out a communications ring structure. */
1146         ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt);
1147         sc->ring.req_prod_pvt++;
1148         ring_req->id = cm->id;
1149         ring_req->operation = cm->operation;
1150         ring_req->sector_number = cm->sector_number;
1151         ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
1152         ring_req->nr_segments = nsegs;
1153         cm->nseg = nsegs;
1154
1155         block_segs    = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
1156         sg            = ring_req->seg;
1157         last_block_sg = sg + block_segs;
1158         sg_ref        = cm->sg_refs;
1159
1160         while (1) {
1161
1162                 while (sg < last_block_sg) {
1163                         buffer_ma = segs->ds_addr;
1164                         fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
1165                         lsect = fsect + (segs->ds_len  >> XBD_SECTOR_SHFT) - 1;
1166
1167                         KASSERT(lsect <= 7, ("XEN disk driver data cannot "
1168                                 "cross a page boundary"));
1169
1170                         /* install a grant reference. */
1171                         ref = gnttab_claim_grant_reference(&cm->gref_head);
1172
1173                         /*
1174                          * GNTTAB_LIST_END == 0xffffffff, but it is private
1175                          * to gnttab.c.
1176                          */
1177                         KASSERT(ref != ~0, ("grant_reference failed"));
1178
1179                         gnttab_grant_foreign_access_ref(
1180                                 ref,
1181                                 xenbus_get_otherend_id(sc->xb_dev),
1182                                 buffer_ma >> PAGE_SHIFT,
1183                                 ring_req->operation == BLKIF_OP_WRITE);
1184
1185                         *sg_ref = ref;
1186                         *sg = (struct blkif_request_segment) {
1187                                 .gref       = ref,
1188                                 .first_sect = fsect, 
1189                                 .last_sect  = lsect };
1190                         sg++;
1191                         sg_ref++;
1192                         segs++;
1193                         nsegs--;
1194                 }
1195                 block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
1196                 if (block_segs == 0)
1197                         break;
1198
1199                 sg = BLKRING_GET_SEG_BLOCK(&sc->ring, sc->ring.req_prod_pvt);
1200                 sc->ring.req_prod_pvt++;
1201                 last_block_sg = sg + block_segs;
1202         }
1203
1204         if (cm->operation == BLKIF_OP_READ)
1205                 op = BUS_DMASYNC_PREREAD;
1206         else if (cm->operation == BLKIF_OP_WRITE)
1207                 op = BUS_DMASYNC_PREWRITE;
1208         else
1209                 op = 0;
1210         bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
1211
1212         gnttab_free_grant_references(cm->gref_head);
1213
1214         xb_enqueue_busy(cm);
1215
1216         /*
1217          * This flag means that we're probably executing in the busdma swi
1218          * instead of in the startio context, so an explicit flush is needed.
1219          */
1220         if (cm->cm_flags & XB_CMD_FROZEN)
1221                 flush_requests(sc);
1222
1223 //printf("%s: Done\n", __func__);
1224         return;
1225 }
1226
1227 /*
1228  * Dequeue buffers and place them in the shared communication ring.
1229  * Return when no more requests can be accepted or all buffers have 
1230  * been queued.
1231  *
1232  * Signal XEN once the ring has been filled out.
1233  */
1234 static void
1235 xb_startio(struct xb_softc *sc)
1236 {
1237         struct xb_command *cm;
1238         int error, queued = 0;
1239
1240         mtx_assert(&sc->xb_io_lock, MA_OWNED);
1241
1242         if (sc->connected != BLKIF_STATE_CONNECTED)
1243                 return;
1244
1245         while (RING_FREE_REQUESTS(&sc->ring) >= sc->max_request_blocks) {
1246                 if (sc->xb_flags & XB_FROZEN)
1247                         break;
1248
1249                 cm = xb_dequeue_ready(sc);
1250
1251                 if (cm == NULL)
1252                     cm = xb_bio_command(sc);
1253
1254                 if (cm == NULL)
1255                         break;
1256
1257                 if ((error = blkif_queue_request(sc, cm)) != 0) {
1258                         printf("blkif_queue_request returned %d\n", error);
1259                         break;
1260                 }
1261                 queued++;
1262         }
1263
1264         if (queued != 0) 
1265                 flush_requests(sc);
1266 }
1267
1268 static void
1269 blkif_int(void *xsc)
1270 {
1271         struct xb_softc *sc = xsc;
1272         struct xb_command *cm;
1273         blkif_response_t *bret;
1274         RING_IDX i, rp;
1275         int op;
1276
1277         mtx_lock(&sc->xb_io_lock);
1278
1279         if (unlikely(sc->connected == BLKIF_STATE_DISCONNECTED)) {
1280                 mtx_unlock(&sc->xb_io_lock);
1281                 return;
1282         }
1283
1284  again:
1285         rp = sc->ring.sring->rsp_prod;
1286         rmb(); /* Ensure we see queued responses up to 'rp'. */
1287
1288         for (i = sc->ring.rsp_cons; i != rp;) {
1289                 bret = RING_GET_RESPONSE(&sc->ring, i);
1290                 cm   = &sc->shadow[bret->id];
1291
1292                 xb_remove_busy(cm);
1293                 i += blkif_completion(cm);
1294
1295                 if (cm->operation == BLKIF_OP_READ)
1296                         op = BUS_DMASYNC_POSTREAD;
1297                 else if (cm->operation == BLKIF_OP_WRITE)
1298                         op = BUS_DMASYNC_POSTWRITE;
1299                 else
1300                         op = 0;
1301                 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
1302                 bus_dmamap_unload(sc->xb_io_dmat, cm->map);
1303
1304                 /*
1305                  * If commands are completing then resources are probably
1306                  * being freed as well.  It's a cheap assumption even when
1307                  * wrong.
1308                  */
1309                 sc->xb_flags &= ~XB_FROZEN;
1310
1311                 /*
1312                  * Directly call the i/o complete routine to save an
1313                  * an indirection in the common case.
1314                  */
1315                 cm->status = bret->status;
1316                 if (cm->bp)
1317                         xb_bio_complete(sc, cm);
1318                 else if (cm->cm_complete)
1319                         (cm->cm_complete)(cm);
1320                 else
1321                         xb_free_command(cm);
1322         }
1323
1324         sc->ring.rsp_cons = i;
1325
1326         if (i != sc->ring.req_prod_pvt) {
1327                 int more_to_do;
1328                 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, more_to_do);
1329                 if (more_to_do)
1330                         goto again;
1331         } else {
1332                 sc->ring.sring->rsp_event = i + 1;
1333         }
1334
1335         xb_startio(sc);
1336
1337         if (unlikely(sc->connected == BLKIF_STATE_SUSPENDED))
1338                 wakeup(&sc->cm_busy);
1339
1340         mtx_unlock(&sc->xb_io_lock);
1341 }
1342
1343 static void 
1344 blkif_free(struct xb_softc *sc)
1345 {
1346         uint8_t *sring_page_ptr;
1347         int i;
1348         
1349         /* Prevent new requests being issued until we fix things up. */
1350         mtx_lock(&sc->xb_io_lock);
1351         sc->connected = BLKIF_STATE_DISCONNECTED; 
1352         mtx_unlock(&sc->xb_io_lock);
1353
1354         /* Free resources associated with old device channel. */
1355         if (sc->ring.sring != NULL) {
1356                 sring_page_ptr = (uint8_t *)sc->ring.sring;
1357                 for (i = 0; i < sc->ring_pages; i++) {
1358                         if (sc->ring_ref[i] != GRANT_INVALID_REF) {
1359                                 gnttab_end_foreign_access_ref(sc->ring_ref[i]);
1360                                 sc->ring_ref[i] = GRANT_INVALID_REF;
1361                         }
1362                         sring_page_ptr += PAGE_SIZE;
1363                 }
1364                 free(sc->ring.sring, M_XENBLOCKFRONT);
1365                 sc->ring.sring = NULL;
1366         }
1367
1368         if (sc->shadow) {
1369
1370                 for (i = 0; i < sc->max_requests; i++) {
1371                         struct xb_command *cm;
1372
1373                         cm = &sc->shadow[i];
1374                         if (cm->sg_refs != NULL) {
1375                                 free(cm->sg_refs, M_XENBLOCKFRONT);
1376                                 cm->sg_refs = NULL;
1377                         }
1378
1379                         bus_dmamap_destroy(sc->xb_io_dmat, cm->map);
1380                 }
1381                 free(sc->shadow, M_XENBLOCKFRONT);
1382                 sc->shadow = NULL;
1383
1384                 bus_dma_tag_destroy(sc->xb_io_dmat);
1385                 
1386                 xb_initq_free(sc);
1387                 xb_initq_ready(sc);
1388                 xb_initq_complete(sc);
1389         }
1390                 
1391         if (sc->irq) {
1392                 unbind_from_irqhandler(sc->irq);
1393                 sc->irq = 0;
1394         }
1395 }
1396
1397 static int
1398 blkif_completion(struct xb_command *s)
1399 {
1400 //printf("%s: Req %p(%d)\n", __func__, s, s->nseg);
1401         gnttab_end_foreign_access_references(s->nseg, s->sg_refs);
1402         return (BLKIF_SEGS_TO_BLOCKS(s->nseg));
1403 }
1404
1405 /* ** Driver registration ** */
1406 static device_method_t blkfront_methods[] = { 
1407         /* Device interface */ 
1408         DEVMETHOD(device_probe,         blkfront_probe), 
1409         DEVMETHOD(device_attach,        blkfront_attach), 
1410         DEVMETHOD(device_detach,        blkfront_detach), 
1411         DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
1412         DEVMETHOD(device_suspend,       blkfront_suspend), 
1413         DEVMETHOD(device_resume,        blkfront_resume), 
1414  
1415         /* Xenbus interface */
1416         DEVMETHOD(xenbus_otherend_changed, blkfront_backend_changed),
1417
1418         { 0, 0 } 
1419 }; 
1420
1421 static driver_t blkfront_driver = { 
1422         "xbd", 
1423         blkfront_methods, 
1424         sizeof(struct xb_softc),                      
1425 }; 
1426 devclass_t blkfront_devclass; 
1427  
1428 DRIVER_MODULE(xbd, xenbusb_front, blkfront_driver, blkfront_devclass, 0, 0);