]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/virtio/block/virtio_blk.c
Fix virtio_blk(4) failing to attach on some hypervisors.
[FreeBSD/FreeBSD.git] / sys / dev / virtio / block / virtio_blk.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 /* Driver for VirtIO block devices. */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/bio.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/sglist.h>
41 #include <sys/sysctl.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/queue.h>
45
46 #include <geom/geom.h>
47 #include <geom/geom_disk.h>
48
49 #include <machine/bus.h>
50 #include <machine/resource.h>
51 #include <sys/bus.h>
52 #include <sys/rman.h>
53
54 #include <dev/virtio/virtio.h>
55 #include <dev/virtio/virtqueue.h>
56 #include <dev/virtio/block/virtio_blk.h>
57
58 #include "virtio_if.h"
59
60 struct vtblk_request {
61         struct virtio_blk_outhdr         vbr_hdr;
62         struct bio                      *vbr_bp;
63         uint8_t                          vbr_ack;
64         TAILQ_ENTRY(vtblk_request)       vbr_link;
65 };
66
67 enum vtblk_cache_mode {
68         VTBLK_CACHE_WRITETHROUGH,
69         VTBLK_CACHE_WRITEBACK,
70         VTBLK_CACHE_MAX
71 };
72
73 struct vtblk_softc {
74         device_t                 vtblk_dev;
75         struct mtx               vtblk_mtx;
76         uint64_t                 vtblk_features;
77         uint32_t                 vtblk_flags;
78 #define VTBLK_FLAG_INDIRECT     0x0001
79 #define VTBLK_FLAG_DETACH       0x0002
80 #define VTBLK_FLAG_SUSPEND      0x0004
81 #define VTBLK_FLAG_BARRIER      0x0008
82 #define VTBLK_FLAG_WCE_CONFIG   0x0010
83
84         struct virtqueue        *vtblk_vq;
85         struct sglist           *vtblk_sglist;
86         struct disk             *vtblk_disk;
87
88         struct bio_queue_head    vtblk_bioq;
89         TAILQ_HEAD(, vtblk_request)
90                                  vtblk_req_free;
91         TAILQ_HEAD(, vtblk_request)
92                                  vtblk_req_ready;
93         struct vtblk_request    *vtblk_req_ordered;
94
95         int                      vtblk_max_nsegs;
96         int                      vtblk_request_count;
97         enum vtblk_cache_mode    vtblk_write_cache;
98
99         struct bio_queue         vtblk_dump_queue;
100         struct vtblk_request     vtblk_dump_request;
101 };
102
103 static struct virtio_feature_desc vtblk_feature_desc[] = {
104         { VIRTIO_BLK_F_BARRIER,         "HostBarrier"   },
105         { VIRTIO_BLK_F_SIZE_MAX,        "MaxSegSize"    },
106         { VIRTIO_BLK_F_SEG_MAX,         "MaxNumSegs"    },
107         { VIRTIO_BLK_F_GEOMETRY,        "DiskGeometry"  },
108         { VIRTIO_BLK_F_RO,              "ReadOnly"      },
109         { VIRTIO_BLK_F_BLK_SIZE,        "BlockSize"     },
110         { VIRTIO_BLK_F_SCSI,            "SCSICmds"      },
111         { VIRTIO_BLK_F_FLUSH,           "FlushCmd"      },
112         { VIRTIO_BLK_F_TOPOLOGY,        "Topology"      },
113         { VIRTIO_BLK_F_CONFIG_WCE,      "ConfigWCE"     },
114         { VIRTIO_BLK_F_MQ,              "Multiqueue"    },
115         { VIRTIO_BLK_F_DISCARD,         "Discard"       },
116         { VIRTIO_BLK_F_WRITE_ZEROES,    "WriteZeros"    },
117
118         { 0, NULL }
119 };
120
121 static int      vtblk_modevent(module_t, int, void *);
122
123 static int      vtblk_probe(device_t);
124 static int      vtblk_attach(device_t);
125 static int      vtblk_detach(device_t);
126 static int      vtblk_suspend(device_t);
127 static int      vtblk_resume(device_t);
128 static int      vtblk_shutdown(device_t);
129 static int      vtblk_attach_completed(device_t);
130 static int      vtblk_config_change(device_t);
131
132 static int      vtblk_open(struct disk *);
133 static int      vtblk_close(struct disk *);
134 static int      vtblk_ioctl(struct disk *, u_long, void *, int,
135                     struct thread *);
136 static int      vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
137 static void     vtblk_strategy(struct bio *);
138
139 static int      vtblk_negotiate_features(struct vtblk_softc *);
140 static int      vtblk_setup_features(struct vtblk_softc *);
141 static int      vtblk_maximum_segments(struct vtblk_softc *,
142                     struct virtio_blk_config *);
143 static int      vtblk_alloc_virtqueue(struct vtblk_softc *);
144 static void     vtblk_resize_disk(struct vtblk_softc *, uint64_t);
145 static void     vtblk_alloc_disk(struct vtblk_softc *,
146                     struct virtio_blk_config *);
147 static void     vtblk_create_disk(struct vtblk_softc *);
148
149 static int      vtblk_request_prealloc(struct vtblk_softc *);
150 static void     vtblk_request_free(struct vtblk_softc *);
151 static struct vtblk_request *
152                 vtblk_request_dequeue(struct vtblk_softc *);
153 static void     vtblk_request_enqueue(struct vtblk_softc *,
154                     struct vtblk_request *);
155 static struct vtblk_request *
156                 vtblk_request_next_ready(struct vtblk_softc *);
157 static void     vtblk_request_requeue_ready(struct vtblk_softc *,
158                     struct vtblk_request *);
159 static struct vtblk_request *
160                 vtblk_request_next(struct vtblk_softc *);
161 static struct vtblk_request *
162                 vtblk_request_bio(struct vtblk_softc *);
163 static int      vtblk_request_execute(struct vtblk_softc *,
164                     struct vtblk_request *);
165 static int      vtblk_request_error(struct vtblk_request *);
166
167 static void     vtblk_queue_completed(struct vtblk_softc *,
168                     struct bio_queue *);
169 static void     vtblk_done_completed(struct vtblk_softc *,
170                     struct bio_queue *);
171 static void     vtblk_drain_vq(struct vtblk_softc *);
172 static void     vtblk_drain(struct vtblk_softc *);
173
174 static void     vtblk_startio(struct vtblk_softc *);
175 static void     vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
176
177 static void     vtblk_read_config(struct vtblk_softc *,
178                     struct virtio_blk_config *);
179 static void     vtblk_ident(struct vtblk_softc *);
180 static int      vtblk_poll_request(struct vtblk_softc *,
181                     struct vtblk_request *);
182 static int      vtblk_quiesce(struct vtblk_softc *);
183 static void     vtblk_vq_intr(void *);
184 static void     vtblk_stop(struct vtblk_softc *);
185
186 static void     vtblk_dump_quiesce(struct vtblk_softc *);
187 static int      vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
188 static int      vtblk_dump_flush(struct vtblk_softc *);
189 static void     vtblk_dump_complete(struct vtblk_softc *);
190
191 static void     vtblk_set_write_cache(struct vtblk_softc *, int);
192 static int      vtblk_write_cache_enabled(struct vtblk_softc *sc,
193                     struct virtio_blk_config *);
194 static int      vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
195
196 static void     vtblk_setup_sysctl(struct vtblk_softc *);
197 static int      vtblk_tunable_int(struct vtblk_softc *, const char *, int);
198
199 #define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0)
200 #define vtblk_htog16(_sc, _val) virtio_htog16(vtblk_modern(_sc), _val)
201 #define vtblk_htog32(_sc, _val) virtio_htog32(vtblk_modern(_sc), _val)
202 #define vtblk_htog64(_sc, _val) virtio_htog64(vtblk_modern(_sc), _val)
203 #define vtblk_gtoh16(_sc, _val) virtio_gtoh16(vtblk_modern(_sc), _val)
204 #define vtblk_gtoh32(_sc, _val) virtio_gtoh32(vtblk_modern(_sc), _val)
205 #define vtblk_gtoh64(_sc, _val) virtio_gtoh64(vtblk_modern(_sc), _val)
206
207 /* Tunables. */
208 static int vtblk_no_ident = 0;
209 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
210 static int vtblk_writecache_mode = -1;
211 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
212
213 #define VTBLK_COMMON_FEATURES \
214     (VIRTIO_BLK_F_SIZE_MAX              | \
215      VIRTIO_BLK_F_SEG_MAX               | \
216      VIRTIO_BLK_F_GEOMETRY              | \
217      VIRTIO_BLK_F_RO                    | \
218      VIRTIO_BLK_F_BLK_SIZE              | \
219      VIRTIO_BLK_F_FLUSH                 | \
220      VIRTIO_BLK_F_TOPOLOGY              | \
221      VIRTIO_BLK_F_CONFIG_WCE            | \
222      VIRTIO_BLK_F_DISCARD               | \
223      VIRTIO_RING_F_INDIRECT_DESC)
224
225 #define VTBLK_MODERN_FEATURES   (VTBLK_COMMON_FEATURES)
226 #define VTBLK_LEGACY_FEATURES   (VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES)
227
228 #define VTBLK_MTX(_sc)          &(_sc)->vtblk_mtx
229 #define VTBLK_LOCK_INIT(_sc, _name) \
230                                 mtx_init(VTBLK_MTX((_sc)), (_name), \
231                                     "VirtIO Block Lock", MTX_DEF)
232 #define VTBLK_LOCK(_sc)         mtx_lock(VTBLK_MTX((_sc)))
233 #define VTBLK_UNLOCK(_sc)       mtx_unlock(VTBLK_MTX((_sc)))
234 #define VTBLK_LOCK_DESTROY(_sc) mtx_destroy(VTBLK_MTX((_sc)))
235 #define VTBLK_LOCK_ASSERT(_sc)  mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
236 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
237                                 mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
238
239 #define VTBLK_DISK_NAME         "vtbd"
240 #define VTBLK_QUIESCE_TIMEOUT   (30 * hz)
241 #define VTBLK_BSIZE             512
242
243 /*
244  * Each block request uses at least two segments - one for the header
245  * and one for the status.
246  */
247 #define VTBLK_MIN_SEGMENTS      2
248
249 static device_method_t vtblk_methods[] = {
250         /* Device methods. */
251         DEVMETHOD(device_probe,         vtblk_probe),
252         DEVMETHOD(device_attach,        vtblk_attach),
253         DEVMETHOD(device_detach,        vtblk_detach),
254         DEVMETHOD(device_suspend,       vtblk_suspend),
255         DEVMETHOD(device_resume,        vtblk_resume),
256         DEVMETHOD(device_shutdown,      vtblk_shutdown),
257
258         /* VirtIO methods. */
259         DEVMETHOD(virtio_attach_completed, vtblk_attach_completed),
260         DEVMETHOD(virtio_config_change, vtblk_config_change),
261
262         DEVMETHOD_END
263 };
264
265 static driver_t vtblk_driver = {
266         "vtblk",
267         vtblk_methods,
268         sizeof(struct vtblk_softc)
269 };
270 static devclass_t vtblk_devclass;
271
272 VIRTIO_DRIVER_MODULE(virtio_blk, vtblk_driver, vtblk_devclass,
273     vtblk_modevent, 0);
274 MODULE_VERSION(virtio_blk, 1);
275 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
276
277 VIRTIO_SIMPLE_PNPINFO(virtio_blk, VIRTIO_ID_BLOCK, "VirtIO Block Adapter");
278
279 static int
280 vtblk_modevent(module_t mod, int type, void *unused)
281 {
282         int error;
283
284         error = 0;
285
286         switch (type) {
287         case MOD_LOAD:
288         case MOD_QUIESCE:
289         case MOD_UNLOAD:
290         case MOD_SHUTDOWN:
291                 break;
292         default:
293                 error = EOPNOTSUPP;
294                 break;
295         }
296
297         return (error);
298 }
299
300 static int
301 vtblk_probe(device_t dev)
302 {
303         return (VIRTIO_SIMPLE_PROBE(dev, virtio_blk));
304 }
305
306 static int
307 vtblk_attach(device_t dev)
308 {
309         struct vtblk_softc *sc;
310         struct virtio_blk_config blkcfg;
311         int error;
312
313         sc = device_get_softc(dev);
314         sc->vtblk_dev = dev;
315         virtio_set_feature_desc(dev, vtblk_feature_desc);
316
317         VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
318         bioq_init(&sc->vtblk_bioq);
319         TAILQ_INIT(&sc->vtblk_dump_queue);
320         TAILQ_INIT(&sc->vtblk_req_free);
321         TAILQ_INIT(&sc->vtblk_req_ready);
322
323         vtblk_setup_sysctl(sc);
324
325         error = vtblk_setup_features(sc);
326         if (error) {
327                 device_printf(dev, "cannot setup features\n");
328                 goto fail;
329         }
330
331         vtblk_read_config(sc, &blkcfg);
332
333         /*
334          * With the current sglist(9) implementation, it is not easy
335          * for us to support a maximum segment size as adjacent
336          * segments are coalesced. For now, just make sure it's larger
337          * than the maximum supported transfer size.
338          */
339         if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
340                 if (blkcfg.size_max < maxphys) {
341                         error = ENOTSUP;
342                         device_printf(dev, "host requires unsupported "
343                             "maximum segment size feature\n");
344                         goto fail;
345                 }
346         }
347
348         sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
349         if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
350                 error = EINVAL;
351                 device_printf(dev, "fewer than minimum number of segments "
352                     "allowed: %d\n", sc->vtblk_max_nsegs);
353                 goto fail;
354         }
355
356         sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
357         if (sc->vtblk_sglist == NULL) {
358                 error = ENOMEM;
359                 device_printf(dev, "cannot allocate sglist\n");
360                 goto fail;
361         }
362
363         error = vtblk_alloc_virtqueue(sc);
364         if (error) {
365                 device_printf(dev, "cannot allocate virtqueue\n");
366                 goto fail;
367         }
368
369         error = vtblk_request_prealloc(sc);
370         if (error) {
371                 device_printf(dev, "cannot preallocate requests\n");
372                 goto fail;
373         }
374
375         vtblk_alloc_disk(sc, &blkcfg);
376
377         error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
378         if (error) {
379                 device_printf(dev, "cannot setup virtqueue interrupt\n");
380                 goto fail;
381         }
382
383         virtqueue_enable_intr(sc->vtblk_vq);
384
385 fail:
386         if (error)
387                 vtblk_detach(dev);
388
389         return (error);
390 }
391
392 static int
393 vtblk_detach(device_t dev)
394 {
395         struct vtblk_softc *sc;
396
397         sc = device_get_softc(dev);
398
399         VTBLK_LOCK(sc);
400         sc->vtblk_flags |= VTBLK_FLAG_DETACH;
401         if (device_is_attached(dev))
402                 vtblk_stop(sc);
403         VTBLK_UNLOCK(sc);
404
405         vtblk_drain(sc);
406
407         if (sc->vtblk_disk != NULL) {
408                 disk_destroy(sc->vtblk_disk);
409                 sc->vtblk_disk = NULL;
410         }
411
412         if (sc->vtblk_sglist != NULL) {
413                 sglist_free(sc->vtblk_sglist);
414                 sc->vtblk_sglist = NULL;
415         }
416
417         VTBLK_LOCK_DESTROY(sc);
418
419         return (0);
420 }
421
422 static int
423 vtblk_suspend(device_t dev)
424 {
425         struct vtblk_softc *sc;
426         int error;
427
428         sc = device_get_softc(dev);
429
430         VTBLK_LOCK(sc);
431         sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
432         /* XXX BMV: virtio_stop(), etc needed here? */
433         error = vtblk_quiesce(sc);
434         if (error)
435                 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
436         VTBLK_UNLOCK(sc);
437
438         return (error);
439 }
440
441 static int
442 vtblk_resume(device_t dev)
443 {
444         struct vtblk_softc *sc;
445
446         sc = device_get_softc(dev);
447
448         VTBLK_LOCK(sc);
449         /* XXX BMV: virtio_reinit(), etc needed here? */
450         sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
451         vtblk_startio(sc);
452         VTBLK_UNLOCK(sc);
453
454         return (0);
455 }
456
457 static int
458 vtblk_shutdown(device_t dev)
459 {
460
461         return (0);
462 }
463
464 static int
465 vtblk_attach_completed(device_t dev)
466 {
467         struct vtblk_softc *sc;
468
469         sc = device_get_softc(dev);
470
471         /*
472          * Create disk after attach as VIRTIO_BLK_T_GET_ID can only be
473          * processed after the device acknowledged
474          * VIRTIO_CONFIG_STATUS_DRIVER_OK.
475          */
476         vtblk_create_disk(sc);
477         return (0);
478 }
479
480 static int
481 vtblk_config_change(device_t dev)
482 {
483         struct vtblk_softc *sc;
484         struct virtio_blk_config blkcfg;
485         uint64_t capacity;
486
487         sc = device_get_softc(dev);
488
489         vtblk_read_config(sc, &blkcfg);
490
491         /* Capacity is always in 512-byte units. */
492         capacity = blkcfg.capacity * VTBLK_BSIZE;
493
494         if (sc->vtblk_disk->d_mediasize != capacity)
495                 vtblk_resize_disk(sc, capacity);
496
497         return (0);
498 }
499
500 static int
501 vtblk_open(struct disk *dp)
502 {
503         struct vtblk_softc *sc;
504
505         if ((sc = dp->d_drv1) == NULL)
506                 return (ENXIO);
507
508         return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
509 }
510
511 static int
512 vtblk_close(struct disk *dp)
513 {
514         struct vtblk_softc *sc;
515
516         if ((sc = dp->d_drv1) == NULL)
517                 return (ENXIO);
518
519         return (0);
520 }
521
522 static int
523 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
524     struct thread *td)
525 {
526         struct vtblk_softc *sc;
527
528         if ((sc = dp->d_drv1) == NULL)
529                 return (ENXIO);
530
531         return (ENOTTY);
532 }
533
534 static int
535 vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
536     size_t length)
537 {
538         struct disk *dp;
539         struct vtblk_softc *sc;
540         int error;
541
542         dp = arg;
543         error = 0;
544
545         if ((sc = dp->d_drv1) == NULL)
546                 return (ENXIO);
547
548         VTBLK_LOCK(sc);
549
550         vtblk_dump_quiesce(sc);
551
552         if (length > 0)
553                 error = vtblk_dump_write(sc, virtual, offset, length);
554         if (error || (virtual == NULL && offset == 0))
555                 vtblk_dump_complete(sc);
556
557         VTBLK_UNLOCK(sc);
558
559         return (error);
560 }
561
562 static void
563 vtblk_strategy(struct bio *bp)
564 {
565         struct vtblk_softc *sc;
566
567         if ((sc = bp->bio_disk->d_drv1) == NULL) {
568                 vtblk_bio_done(NULL, bp, EINVAL);
569                 return;
570         }
571
572         if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
573             (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
574                 vtblk_bio_done(sc, bp, EOPNOTSUPP);
575                 return;
576         }
577
578         VTBLK_LOCK(sc);
579
580         if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
581                 VTBLK_UNLOCK(sc);
582                 vtblk_bio_done(sc, bp, ENXIO);
583                 return;
584         }
585
586         bioq_insert_tail(&sc->vtblk_bioq, bp);
587         vtblk_startio(sc);
588
589         VTBLK_UNLOCK(sc);
590 }
591
592 static int
593 vtblk_negotiate_features(struct vtblk_softc *sc)
594 {
595         device_t dev;
596         uint64_t features;
597
598         dev = sc->vtblk_dev;
599         features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES :
600             VTBLK_LEGACY_FEATURES;
601
602         sc->vtblk_features = virtio_negotiate_features(dev, features);
603         return (virtio_finalize_features(dev));
604 }
605
606 static int
607 vtblk_setup_features(struct vtblk_softc *sc)
608 {
609         device_t dev;
610         int error;
611
612         dev = sc->vtblk_dev;
613
614         error = vtblk_negotiate_features(sc);
615         if (error)
616                 return (error);
617
618         if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
619                 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
620         if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
621                 sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG;
622
623         /* Legacy. */
624         if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
625                 sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
626
627         return (0);
628 }
629
630 static int
631 vtblk_maximum_segments(struct vtblk_softc *sc,
632     struct virtio_blk_config *blkcfg)
633 {
634         device_t dev;
635         int nsegs;
636
637         dev = sc->vtblk_dev;
638         nsegs = VTBLK_MIN_SEGMENTS;
639
640         if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
641                 nsegs += MIN(blkcfg->seg_max, maxphys / PAGE_SIZE + 1);
642                 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
643                         nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
644         } else
645                 nsegs += 1;
646
647         return (nsegs);
648 }
649
650 static int
651 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
652 {
653         device_t dev;
654         struct vq_alloc_info vq_info;
655
656         dev = sc->vtblk_dev;
657
658         VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
659             vtblk_vq_intr, sc, &sc->vtblk_vq,
660             "%s request", device_get_nameunit(dev));
661
662         return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
663 }
664
665 static void
666 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
667 {
668         device_t dev;
669         struct disk *dp;
670         int error;
671
672         dev = sc->vtblk_dev;
673         dp = sc->vtblk_disk;
674
675         dp->d_mediasize = new_capacity;
676         if (bootverbose) {
677                 device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
678                     (uintmax_t) dp->d_mediasize >> 20,
679                     (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
680                     dp->d_sectorsize);
681         }
682
683         error = disk_resize(dp, M_NOWAIT);
684         if (error) {
685                 device_printf(dev,
686                     "disk_resize(9) failed, error: %d\n", error);
687         }
688 }
689
690 static void
691 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
692 {
693         device_t dev;
694         struct disk *dp;
695
696         dev = sc->vtblk_dev;
697
698         sc->vtblk_disk = dp = disk_alloc();
699         dp->d_open = vtblk_open;
700         dp->d_close = vtblk_close;
701         dp->d_ioctl = vtblk_ioctl;
702         dp->d_strategy = vtblk_strategy;
703         dp->d_name = VTBLK_DISK_NAME;
704         dp->d_unit = device_get_unit(dev);
705         dp->d_drv1 = sc;
706         dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION;
707         dp->d_hba_vendor = virtio_get_vendor(dev);
708         dp->d_hba_device = virtio_get_device(dev);
709         dp->d_hba_subvendor = virtio_get_subvendor(dev);
710         dp->d_hba_subdevice = virtio_get_subdevice(dev);
711
712         if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
713                 dp->d_flags |= DISKFLAG_WRITE_PROTECT;
714         else {
715                 if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
716                         dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
717                 dp->d_dump = vtblk_dump;
718         }
719
720         /* Capacity is always in 512-byte units. */
721         dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
722
723         if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
724                 dp->d_sectorsize = blkcfg->blk_size;
725         else
726                 dp->d_sectorsize = VTBLK_BSIZE;
727
728         /*
729          * The VirtIO maximum I/O size is given in terms of segments.
730          * However, FreeBSD limits I/O size by logical buffer size, not
731          * by physically contiguous pages. Therefore, we have to assume
732          * no pages are contiguous. This may impose an artificially low
733          * maximum I/O size. But in practice, since QEMU advertises 128
734          * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
735          * which is typically greater than maxphys. Eventually we should
736          * just advertise maxphys and split buffers that are too big.
737          *
738          * Note we must subtract one additional segment in case of non
739          * page aligned buffers.
740          */
741         dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) *
742             PAGE_SIZE;
743         if (dp->d_maxsize < PAGE_SIZE)
744                 dp->d_maxsize = PAGE_SIZE; /* XXX */
745
746         if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
747                 dp->d_fwsectors = blkcfg->geometry.sectors;
748                 dp->d_fwheads = blkcfg->geometry.heads;
749         }
750
751         if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
752             blkcfg->topology.physical_block_exp > 0) {
753                 dp->d_stripesize = dp->d_sectorsize *
754                     (1 << blkcfg->topology.physical_block_exp);
755                 dp->d_stripeoffset = (dp->d_stripesize -
756                     blkcfg->topology.alignment_offset * dp->d_sectorsize) %
757                     dp->d_stripesize;
758         }
759
760         if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
761                 dp->d_flags |= DISKFLAG_CANDELETE;
762                 dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
763         }
764
765         if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
766                 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
767         else
768                 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
769 }
770
771 static void
772 vtblk_create_disk(struct vtblk_softc *sc)
773 {
774         struct disk *dp;
775
776         dp = sc->vtblk_disk;
777
778         vtblk_ident(sc);
779
780         device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
781             (uintmax_t) dp->d_mediasize >> 20,
782             (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
783             dp->d_sectorsize);
784
785         disk_create(dp, DISK_VERSION);
786 }
787
788 static int
789 vtblk_request_prealloc(struct vtblk_softc *sc)
790 {
791         struct vtblk_request *req;
792         int i, nreqs;
793
794         nreqs = virtqueue_size(sc->vtblk_vq);
795
796         /*
797          * Preallocate sufficient requests to keep the virtqueue full. Each
798          * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
799          * the number allocated when indirect descriptors are not available.
800          */
801         if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
802                 nreqs /= VTBLK_MIN_SEGMENTS;
803
804         for (i = 0; i < nreqs; i++) {
805                 req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
806                 if (req == NULL)
807                         return (ENOMEM);
808
809                 MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
810                 MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
811
812                 sc->vtblk_request_count++;
813                 vtblk_request_enqueue(sc, req);
814         }
815
816         return (0);
817 }
818
819 static void
820 vtblk_request_free(struct vtblk_softc *sc)
821 {
822         struct vtblk_request *req;
823
824         MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
825
826         while ((req = vtblk_request_dequeue(sc)) != NULL) {
827                 sc->vtblk_request_count--;
828                 free(req, M_DEVBUF);
829         }
830
831         KASSERT(sc->vtblk_request_count == 0,
832             ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
833 }
834
835 static struct vtblk_request *
836 vtblk_request_dequeue(struct vtblk_softc *sc)
837 {
838         struct vtblk_request *req;
839
840         req = TAILQ_FIRST(&sc->vtblk_req_free);
841         if (req != NULL) {
842                 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
843                 bzero(req, sizeof(struct vtblk_request));
844         }
845
846         return (req);
847 }
848
849 static void
850 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
851 {
852
853         TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
854 }
855
856 static struct vtblk_request *
857 vtblk_request_next_ready(struct vtblk_softc *sc)
858 {
859         struct vtblk_request *req;
860
861         req = TAILQ_FIRST(&sc->vtblk_req_ready);
862         if (req != NULL)
863                 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
864
865         return (req);
866 }
867
868 static void
869 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
870 {
871
872         /* NOTE: Currently, there will be at most one request in the queue. */
873         TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
874 }
875
876 static struct vtblk_request *
877 vtblk_request_next(struct vtblk_softc *sc)
878 {
879         struct vtblk_request *req;
880
881         req = vtblk_request_next_ready(sc);
882         if (req != NULL)
883                 return (req);
884
885         return (vtblk_request_bio(sc));
886 }
887
888 static struct vtblk_request *
889 vtblk_request_bio(struct vtblk_softc *sc)
890 {
891         struct bio_queue_head *bioq;
892         struct vtblk_request *req;
893         struct bio *bp;
894
895         bioq = &sc->vtblk_bioq;
896
897         if (bioq_first(bioq) == NULL)
898                 return (NULL);
899
900         req = vtblk_request_dequeue(sc);
901         if (req == NULL)
902                 return (NULL);
903
904         bp = bioq_takefirst(bioq);
905         req->vbr_bp = bp;
906         req->vbr_ack = -1;
907         req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
908
909         switch (bp->bio_cmd) {
910         case BIO_FLUSH:
911                 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
912                 req->vbr_hdr.sector = 0;
913                 break;
914         case BIO_READ:
915                 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN);
916                 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
917                 break;
918         case BIO_WRITE:
919                 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
920                 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
921                 break;
922         case BIO_DELETE:
923                 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_DISCARD);
924                 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
925                 break;
926         default:
927                 panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
928         }
929
930         if (bp->bio_flags & BIO_ORDERED)
931                 req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER);
932
933         return (req);
934 }
935
936 static int
937 vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req)
938 {
939         struct virtqueue *vq;
940         struct sglist *sg;
941         struct bio *bp;
942         int ordered, readable, writable, error;
943
944         vq = sc->vtblk_vq;
945         sg = sc->vtblk_sglist;
946         bp = req->vbr_bp;
947         ordered = 0;
948         writable = 0;
949
950         /*
951          * Some hosts (such as bhyve) do not implement the barrier feature,
952          * so we emulate it in the driver by allowing the barrier request
953          * to be the only one in flight.
954          */
955         if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
956                 if (sc->vtblk_req_ordered != NULL)
957                         return (EBUSY);
958                 if (bp->bio_flags & BIO_ORDERED) {
959                         if (!virtqueue_empty(vq))
960                                 return (EBUSY);
961                         ordered = 1;
962                         req->vbr_hdr.type &= vtblk_gtoh32(sc,
963                                 ~VIRTIO_BLK_T_BARRIER);
964                 }
965         }
966
967         sglist_reset(sg);
968         sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
969
970         if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
971                 error = sglist_append_bio(sg, bp);
972                 if (error || sg->sg_nseg == sg->sg_maxseg) {
973                         panic("%s: bio %p data buffer too big %d",
974                             __func__, bp, error);
975                 }
976
977                 /* BIO_READ means the host writes into our buffer. */
978                 if (bp->bio_cmd == BIO_READ)
979                         writable = sg->sg_nseg - 1;
980         } else if (bp->bio_cmd == BIO_DELETE) {
981                 struct virtio_blk_discard_write_zeroes *discard;
982
983                 discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
984                 if (discard == NULL)
985                         return (ENOMEM);
986
987                 bp->bio_driver1 = discard;
988                 discard->sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
989                 discard->num_sectors = vtblk_gtoh32(sc, bp->bio_bcount / VTBLK_BSIZE);
990                 error = sglist_append(sg, discard, sizeof(*discard));
991                 if (error || sg->sg_nseg == sg->sg_maxseg) {
992                         panic("%s: bio %p data buffer too big %d",
993                             __func__, bp, error);
994                 }
995         }
996
997         writable++;
998         sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
999         readable = sg->sg_nseg - writable;
1000
1001         error = virtqueue_enqueue(vq, req, sg, readable, writable);
1002         if (error == 0 && ordered)
1003                 sc->vtblk_req_ordered = req;
1004
1005         return (error);
1006 }
1007
1008 static int
1009 vtblk_request_error(struct vtblk_request *req)
1010 {
1011         int error;
1012
1013         switch (req->vbr_ack) {
1014         case VIRTIO_BLK_S_OK:
1015                 error = 0;
1016                 break;
1017         case VIRTIO_BLK_S_UNSUPP:
1018                 error = ENOTSUP;
1019                 break;
1020         default:
1021                 error = EIO;
1022                 break;
1023         }
1024
1025         return (error);
1026 }
1027
1028 static void
1029 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1030 {
1031         struct vtblk_request *req;
1032         struct bio *bp;
1033
1034         while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
1035                 if (sc->vtblk_req_ordered != NULL) {
1036                         MPASS(sc->vtblk_req_ordered == req);
1037                         sc->vtblk_req_ordered = NULL;
1038                 }
1039
1040                 bp = req->vbr_bp;
1041                 bp->bio_error = vtblk_request_error(req);
1042                 TAILQ_INSERT_TAIL(queue, bp, bio_queue);
1043
1044                 vtblk_request_enqueue(sc, req);
1045         }
1046 }
1047
1048 static void
1049 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1050 {
1051         struct bio *bp, *tmp;
1052
1053         TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
1054                 if (bp->bio_error != 0)
1055                         disk_err(bp, "hard error", -1, 1);
1056                 vtblk_bio_done(sc, bp, bp->bio_error);
1057         }
1058 }
1059
1060 static void
1061 vtblk_drain_vq(struct vtblk_softc *sc)
1062 {
1063         struct virtqueue *vq;
1064         struct vtblk_request *req;
1065         int last;
1066
1067         vq = sc->vtblk_vq;
1068         last = 0;
1069
1070         while ((req = virtqueue_drain(vq, &last)) != NULL) {
1071                 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1072                 vtblk_request_enqueue(sc, req);
1073         }
1074
1075         sc->vtblk_req_ordered = NULL;
1076         KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1077 }
1078
1079 static void
1080 vtblk_drain(struct vtblk_softc *sc)
1081 {
1082         struct bio_queue_head *bioq;
1083         struct vtblk_request *req;
1084         struct bio *bp;
1085
1086         bioq = &sc->vtblk_bioq;
1087
1088         if (sc->vtblk_vq != NULL) {
1089                 struct bio_queue queue;
1090
1091                 TAILQ_INIT(&queue);
1092                 vtblk_queue_completed(sc, &queue);
1093                 vtblk_done_completed(sc, &queue);
1094
1095                 vtblk_drain_vq(sc);
1096         }
1097
1098         while ((req = vtblk_request_next_ready(sc)) != NULL) {
1099                 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1100                 vtblk_request_enqueue(sc, req);
1101         }
1102
1103         while (bioq_first(bioq) != NULL) {
1104                 bp = bioq_takefirst(bioq);
1105                 vtblk_bio_done(sc, bp, ENXIO);
1106         }
1107
1108         vtblk_request_free(sc);
1109 }
1110
1111 static void
1112 vtblk_startio(struct vtblk_softc *sc)
1113 {
1114         struct virtqueue *vq;
1115         struct vtblk_request *req;
1116         int enq;
1117
1118         VTBLK_LOCK_ASSERT(sc);
1119         vq = sc->vtblk_vq;
1120         enq = 0;
1121
1122         if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1123                 return;
1124
1125         while (!virtqueue_full(vq)) {
1126                 req = vtblk_request_next(sc);
1127                 if (req == NULL)
1128                         break;
1129
1130                 if (vtblk_request_execute(sc, req) != 0) {
1131                         vtblk_request_requeue_ready(sc, req);
1132                         break;
1133                 }
1134
1135                 enq++;
1136         }
1137
1138         if (enq > 0)
1139                 virtqueue_notify(vq);
1140 }
1141
1142 static void
1143 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1144 {
1145
1146         /* Because of GEOM direct dispatch, we cannot hold any locks. */
1147         if (sc != NULL)
1148                 VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1149
1150         if (error) {
1151                 bp->bio_resid = bp->bio_bcount;
1152                 bp->bio_error = error;
1153                 bp->bio_flags |= BIO_ERROR;
1154         }
1155
1156         if (bp->bio_driver1 != NULL) {
1157                 free(bp->bio_driver1, M_DEVBUF);
1158                 bp->bio_driver1 = NULL;
1159         }
1160
1161         biodone(bp);
1162 }
1163
1164 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)                  \
1165         if (virtio_with_feature(_dev, _feature)) {                      \
1166                 virtio_read_device_config(_dev,                         \
1167                     offsetof(struct virtio_blk_config, _field),         \
1168                     &(_cfg)->_field, sizeof((_cfg)->_field));           \
1169         }
1170
1171 static void
1172 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1173 {
1174         device_t dev;
1175
1176         dev = sc->vtblk_dev;
1177
1178         bzero(blkcfg, sizeof(struct virtio_blk_config));
1179
1180         /* The capacity is always available. */
1181         virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1182             capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1183
1184         /* Read the configuration if the feature was negotiated. */
1185         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1186         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1187         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1188             geometry.cylinders, blkcfg);
1189         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1190             geometry.heads, blkcfg);
1191         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1192             geometry.sectors, blkcfg);
1193         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1194         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1195             topology.physical_block_exp, blkcfg);
1196         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1197             topology.alignment_offset, blkcfg);
1198         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1199             topology.min_io_size, blkcfg);
1200         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1201             topology.opt_io_size, blkcfg);
1202         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
1203         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
1204             blkcfg);
1205         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
1206         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
1207             blkcfg);
1208 }
1209
1210 #undef VTBLK_GET_CONFIG
1211
1212 static void
1213 vtblk_ident(struct vtblk_softc *sc)
1214 {
1215         struct bio buf;
1216         struct disk *dp;
1217         struct vtblk_request *req;
1218         int len, error;
1219
1220         dp = sc->vtblk_disk;
1221         len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1222
1223         if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1224                 return;
1225
1226         req = vtblk_request_dequeue(sc);
1227         if (req == NULL)
1228                 return;
1229
1230         req->vbr_ack = -1;
1231         req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID);
1232         req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1233         req->vbr_hdr.sector = 0;
1234
1235         req->vbr_bp = &buf;
1236         g_reset_bio(&buf);
1237
1238         buf.bio_cmd = BIO_READ;
1239         buf.bio_data = dp->d_ident;
1240         buf.bio_bcount = len;
1241
1242         VTBLK_LOCK(sc);
1243         error = vtblk_poll_request(sc, req);
1244         VTBLK_UNLOCK(sc);
1245
1246         vtblk_request_enqueue(sc, req);
1247
1248         if (error) {
1249                 device_printf(sc->vtblk_dev,
1250                     "error getting device identifier: %d\n", error);
1251         }
1252 }
1253
1254 static int
1255 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1256 {
1257         struct virtqueue *vq;
1258         int error;
1259
1260         vq = sc->vtblk_vq;
1261
1262         if (!virtqueue_empty(vq))
1263                 return (EBUSY);
1264
1265         error = vtblk_request_execute(sc, req);
1266         if (error)
1267                 return (error);
1268
1269         virtqueue_notify(vq);
1270         virtqueue_poll(vq, NULL);
1271
1272         error = vtblk_request_error(req);
1273         if (error && bootverbose) {
1274                 device_printf(sc->vtblk_dev,
1275                     "%s: IO error: %d\n", __func__, error);
1276         }
1277
1278         return (error);
1279 }
1280
1281 static int
1282 vtblk_quiesce(struct vtblk_softc *sc)
1283 {
1284         int error;
1285
1286         VTBLK_LOCK_ASSERT(sc);
1287         error = 0;
1288
1289         while (!virtqueue_empty(sc->vtblk_vq)) {
1290                 if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1291                     VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1292                         error = EBUSY;
1293                         break;
1294                 }
1295         }
1296
1297         return (error);
1298 }
1299
1300 static void
1301 vtblk_vq_intr(void *xsc)
1302 {
1303         struct vtblk_softc *sc;
1304         struct virtqueue *vq;
1305         struct bio_queue queue;
1306
1307         sc = xsc;
1308         vq = sc->vtblk_vq;
1309         TAILQ_INIT(&queue);
1310
1311         VTBLK_LOCK(sc);
1312
1313 again:
1314         if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1315                 goto out;
1316
1317         vtblk_queue_completed(sc, &queue);
1318         vtblk_startio(sc);
1319
1320         if (virtqueue_enable_intr(vq) != 0) {
1321                 virtqueue_disable_intr(vq);
1322                 goto again;
1323         }
1324
1325         if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1326                 wakeup(&sc->vtblk_vq);
1327
1328 out:
1329         VTBLK_UNLOCK(sc);
1330         vtblk_done_completed(sc, &queue);
1331 }
1332
1333 static void
1334 vtblk_stop(struct vtblk_softc *sc)
1335 {
1336
1337         virtqueue_disable_intr(sc->vtblk_vq);
1338         virtio_stop(sc->vtblk_dev);
1339 }
1340
1341 static void
1342 vtblk_dump_quiesce(struct vtblk_softc *sc)
1343 {
1344
1345         /*
1346          * Spin here until all the requests in-flight at the time of the
1347          * dump are completed and queued. The queued requests will be
1348          * biodone'd once the dump is finished.
1349          */
1350         while (!virtqueue_empty(sc->vtblk_vq))
1351                 vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1352 }
1353
1354 static int
1355 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1356     size_t length)
1357 {
1358         struct bio buf;
1359         struct vtblk_request *req;
1360
1361         req = &sc->vtblk_dump_request;
1362         req->vbr_ack = -1;
1363         req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
1364         req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1365         req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / VTBLK_BSIZE);
1366
1367         req->vbr_bp = &buf;
1368         g_reset_bio(&buf);
1369
1370         buf.bio_cmd = BIO_WRITE;
1371         buf.bio_data = virtual;
1372         buf.bio_bcount = length;
1373
1374         return (vtblk_poll_request(sc, req));
1375 }
1376
1377 static int
1378 vtblk_dump_flush(struct vtblk_softc *sc)
1379 {
1380         struct bio buf;
1381         struct vtblk_request *req;
1382
1383         req = &sc->vtblk_dump_request;
1384         req->vbr_ack = -1;
1385         req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
1386         req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1387         req->vbr_hdr.sector = 0;
1388
1389         req->vbr_bp = &buf;
1390         g_reset_bio(&buf);
1391
1392         buf.bio_cmd = BIO_FLUSH;
1393
1394         return (vtblk_poll_request(sc, req));
1395 }
1396
1397 static void
1398 vtblk_dump_complete(struct vtblk_softc *sc)
1399 {
1400
1401         vtblk_dump_flush(sc);
1402
1403         VTBLK_UNLOCK(sc);
1404         vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1405         VTBLK_LOCK(sc);
1406 }
1407
1408 static void
1409 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1410 {
1411
1412         /* Set either writeback (1) or writethrough (0) mode. */
1413         virtio_write_dev_config_1(sc->vtblk_dev,
1414             offsetof(struct virtio_blk_config, wce), wc);
1415 }
1416
1417 static int
1418 vtblk_write_cache_enabled(struct vtblk_softc *sc,
1419     struct virtio_blk_config *blkcfg)
1420 {
1421         int wc;
1422
1423         if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) {
1424                 wc = vtblk_tunable_int(sc, "writecache_mode",
1425                     vtblk_writecache_mode);
1426                 if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1427                         vtblk_set_write_cache(sc, wc);
1428                 else
1429                         wc = blkcfg->wce;
1430         } else
1431                 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH);
1432
1433         return (wc);
1434 }
1435
1436 static int
1437 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1438 {
1439         struct vtblk_softc *sc;
1440         int wc, error;
1441
1442         sc = oidp->oid_arg1;
1443         wc = sc->vtblk_write_cache;
1444
1445         error = sysctl_handle_int(oidp, &wc, 0, req);
1446         if (error || req->newptr == NULL)
1447                 return (error);
1448         if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0)
1449                 return (EPERM);
1450         if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1451                 return (EINVAL);
1452
1453         VTBLK_LOCK(sc);
1454         sc->vtblk_write_cache = wc;
1455         vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1456         VTBLK_UNLOCK(sc);
1457
1458         return (0);
1459 }
1460
1461 static void
1462 vtblk_setup_sysctl(struct vtblk_softc *sc)
1463 {
1464         device_t dev;
1465         struct sysctl_ctx_list *ctx;
1466         struct sysctl_oid *tree;
1467         struct sysctl_oid_list *child;
1468
1469         dev = sc->vtblk_dev;
1470         ctx = device_get_sysctl_ctx(dev);
1471         tree = device_get_sysctl_tree(dev);
1472         child = SYSCTL_CHILDREN(tree);
1473
1474         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1475             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1476             vtblk_write_cache_sysctl, "I",
1477             "Write cache mode (writethrough (0) or writeback (1))");
1478 }
1479
1480 static int
1481 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1482 {
1483         char path[64];
1484
1485         snprintf(path, sizeof(path),
1486             "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1487         TUNABLE_INT_FETCH(path, &def);
1488
1489         return (def);
1490 }