]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/cam/ctl/ctl_backend_block.c
zfs: merge openzfs/zfs@ad0a55461
[FreeBSD/FreeBSD.git] / sys / cam / ctl / ctl_backend_block.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2003 Silicon Graphics International Corp.
5  * Copyright (c) 2009-2011 Spectra Logic Corporation
6  * Copyright (c) 2012,2021 The FreeBSD Foundation
7  * Copyright (c) 2014-2021 Alexander Motin <mav@FreeBSD.org>
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Edward Tomasz Napierala
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Portions of this software were developed by Ka Ho Ng <khng@FreeBSD.org>
14  * under sponsorship from the FreeBSD Foundation.
15  *
16  * Redistribution and use in source and binary forms, with or without
17  * modification, are permitted provided that the following conditions
18  * are met:
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions, and the following disclaimer,
21  *    without modification.
22  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
23  *    substantially similar to the "NO WARRANTY" disclaimer below
24  *    ("Disclaimer") and any redistribution must be conditioned upon
25  *    including a substantially similar Disclaimer requirement for further
26  *    binary redistribution.
27  *
28  * NO WARRANTY
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
37  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
38  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39  * POSSIBILITY OF SUCH DAMAGES.
40  *
41  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
42  */
43 /*
44  * CAM Target Layer driver backend for block devices.
45  *
46  * Author: Ken Merry <ken@FreeBSD.org>
47  */
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
50
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/types.h>
55 #include <sys/kthread.h>
56 #include <sys/bio.h>
57 #include <sys/fcntl.h>
58 #include <sys/limits.h>
59 #include <sys/lock.h>
60 #include <sys/mutex.h>
61 #include <sys/condvar.h>
62 #include <sys/malloc.h>
63 #include <sys/conf.h>
64 #include <sys/ioccom.h>
65 #include <sys/queue.h>
66 #include <sys/sbuf.h>
67 #include <sys/endian.h>
68 #include <sys/uio.h>
69 #include <sys/buf.h>
70 #include <sys/taskqueue.h>
71 #include <sys/vnode.h>
72 #include <sys/namei.h>
73 #include <sys/mount.h>
74 #include <sys/disk.h>
75 #include <sys/fcntl.h>
76 #include <sys/filedesc.h>
77 #include <sys/filio.h>
78 #include <sys/proc.h>
79 #include <sys/pcpu.h>
80 #include <sys/module.h>
81 #include <sys/sdt.h>
82 #include <sys/devicestat.h>
83 #include <sys/sysctl.h>
84 #include <sys/nv.h>
85 #include <sys/dnv.h>
86 #include <sys/sx.h>
87 #include <sys/unistd.h>
88
89 #include <geom/geom.h>
90
91 #include <cam/cam.h>
92 #include <cam/scsi/scsi_all.h>
93 #include <cam/scsi/scsi_da.h>
94 #include <cam/ctl/ctl_io.h>
95 #include <cam/ctl/ctl.h>
96 #include <cam/ctl/ctl_backend.h>
97 #include <cam/ctl/ctl_ioctl.h>
98 #include <cam/ctl/ctl_ha.h>
99 #include <cam/ctl/ctl_scsi_all.h>
100 #include <cam/ctl/ctl_private.h>
101 #include <cam/ctl/ctl_error.h>
102
103 /*
104  * The idea here is to allocate enough S/G space to handle at least 1MB I/Os.
105  * On systems with small maxphys it can be 8 128KB segments.  On large systems
106  * it can be up to 8 1MB segments.  I/Os larger than that we'll split.
107  */
108 #define CTLBLK_MAX_SEGS         8
109 #define CTLBLK_HALF_SEGS        (CTLBLK_MAX_SEGS / 2)
110 #define CTLBLK_MIN_SEG          (128 * 1024)
111 #define CTLBLK_MAX_SEG          MIN(1024 * 1024, MAX(CTLBLK_MIN_SEG, maxphys))
112 #define CTLBLK_MAX_IO_SIZE      (CTLBLK_MAX_SEG * CTLBLK_MAX_SEGS)
113
114 #ifdef CTLBLK_DEBUG
115 #define DPRINTF(fmt, args...) \
116     printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
117 #else
118 #define DPRINTF(fmt, args...) do {} while(0)
119 #endif
120
121 #define PRIV(io)        \
122     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
123 #define ARGS(io)        \
124     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
125
126 SDT_PROVIDER_DEFINE(cbb);
127
128 typedef enum {
129         CTL_BE_BLOCK_LUN_UNCONFIGURED   = 0x01,
130         CTL_BE_BLOCK_LUN_WAITING        = 0x04,
131 } ctl_be_block_lun_flags;
132
133 typedef enum {
134         CTL_BE_BLOCK_NONE,
135         CTL_BE_BLOCK_DEV,
136         CTL_BE_BLOCK_FILE
137 } ctl_be_block_type;
138
139 struct ctl_be_block_filedata {
140         struct ucred *cred;
141 };
142
143 union ctl_be_block_bedata {
144         struct ctl_be_block_filedata file;
145 };
146
147 struct ctl_be_block_io;
148 struct ctl_be_block_lun;
149
150 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
151                                struct ctl_be_block_io *beio);
152 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
153                                   const char *attrname);
154
155 /*
156  * Backend LUN structure.  There is a 1:1 mapping between a block device
157  * and a backend block LUN, and between a backend block LUN and a CTL LUN.
158  */
159 struct ctl_be_block_lun {
160         struct ctl_be_lun cbe_lun;              /* Must be first element. */
161         struct ctl_lun_create_params params;
162         char *dev_path;
163         ctl_be_block_type dev_type;
164         struct vnode *vn;
165         union ctl_be_block_bedata backend;
166         cbb_dispatch_t dispatch;
167         cbb_dispatch_t lun_flush;
168         cbb_dispatch_t unmap;
169         cbb_dispatch_t get_lba_status;
170         cbb_getattr_t getattr;
171         uint64_t size_blocks;
172         uint64_t size_bytes;
173         struct ctl_be_block_softc *softc;
174         struct devstat *disk_stats;
175         ctl_be_block_lun_flags flags;
176         SLIST_ENTRY(ctl_be_block_lun) links;
177         struct taskqueue *io_taskqueue;
178         struct task io_task;
179         int num_threads;
180         STAILQ_HEAD(, ctl_io_hdr) input_queue;
181         STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
182         STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
183         STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
184         struct mtx_padalign io_lock;
185         struct mtx_padalign queue_lock;
186 };
187
188 /*
189  * Overall softc structure for the block backend module.
190  */
191 struct ctl_be_block_softc {
192         struct sx                        modify_lock;
193         struct mtx                       lock;
194         int                              num_luns;
195         SLIST_HEAD(, ctl_be_block_lun)   lun_list;
196         uma_zone_t                       beio_zone;
197         uma_zone_t                       bufmin_zone;
198         uma_zone_t                       bufmax_zone;
199 };
200
201 static struct ctl_be_block_softc backend_block_softc;
202
203 /*
204  * Per-I/O information.
205  */
206 struct ctl_be_block_io {
207         union ctl_io                    *io;
208         struct ctl_sg_entry             sg_segs[CTLBLK_MAX_SEGS];
209         struct iovec                    xiovecs[CTLBLK_MAX_SEGS];
210         int                             refcnt;
211         int                             bio_cmd;
212         int                             two_sglists;
213         int                             num_segs;
214         int                             num_bios_sent;
215         int                             num_bios_done;
216         int                             send_complete;
217         int                             first_error;
218         uint64_t                        first_error_offset;
219         struct bintime                  ds_t0;
220         devstat_tag_type                ds_tag_type;
221         devstat_trans_flags             ds_trans_type;
222         uint64_t                        io_len;
223         uint64_t                        io_offset;
224         int                             io_arg;
225         struct ctl_be_block_softc       *softc;
226         struct ctl_be_block_lun         *lun;
227         void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
228 };
229
230 extern struct ctl_softc *control_softc;
231
232 static int cbb_num_threads = 32;
233 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
234             "CAM Target Layer Block Backend");
235 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
236            &cbb_num_threads, 0, "Number of threads per backing file");
237
238 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
239 static void ctl_free_beio(struct ctl_be_block_io *beio);
240 static void ctl_complete_beio(struct ctl_be_block_io *beio);
241 static int ctl_be_block_move_done(union ctl_io *io, bool samethr);
242 static void ctl_be_block_biodone(struct bio *bio);
243 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
244                                     struct ctl_be_block_io *beio);
245 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
246                                        struct ctl_be_block_io *beio);
247 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
248                                   struct ctl_be_block_io *beio);
249 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
250                                          const char *attrname);
251 static void ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
252                                     struct ctl_be_block_io *beio);
253 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
254                                    struct ctl_be_block_io *beio);
255 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
256                                    struct ctl_be_block_io *beio);
257 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
258                                       struct ctl_be_block_io *beio);
259 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
260                                          const char *attrname);
261 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
262                                     union ctl_io *io);
263 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
264                                     union ctl_io *io);
265 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
266                                   union ctl_io *io);
267 static void ctl_be_block_worker(void *context, int pending);
268 static int ctl_be_block_submit(union ctl_io *io);
269 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
270                                    int flag, struct thread *td);
271 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
272                                   struct ctl_lun_req *req);
273 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
274                                  struct ctl_lun_req *req);
275 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
276 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
277                              struct ctl_lun_req *req);
278 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
279                                struct ctl_lun_req *req);
280 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
281                            struct ctl_lun_req *req);
282 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
283                            struct ctl_lun_req *req);
284 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun);
285 static int ctl_be_block_config_write(union ctl_io *io);
286 static int ctl_be_block_config_read(union ctl_io *io);
287 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb);
288 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname);
289 static int ctl_be_block_init(void);
290 static int ctl_be_block_shutdown(void);
291
292 static struct ctl_backend_driver ctl_be_block_driver = 
293 {
294         .name = "block",
295         .flags = CTL_BE_FLAG_HAS_CONFIG,
296         .init = ctl_be_block_init,
297         .shutdown = ctl_be_block_shutdown,
298         .data_submit = ctl_be_block_submit,
299         .config_read = ctl_be_block_config_read,
300         .config_write = ctl_be_block_config_write,
301         .ioctl = ctl_be_block_ioctl,
302         .lun_info = ctl_be_block_lun_info,
303         .lun_attr = ctl_be_block_lun_attr
304 };
305
306 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend");
307 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
308
309 static void
310 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg,
311     size_t len)
312 {
313
314         if (len <= CTLBLK_MIN_SEG) {
315                 sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK);
316         } else {
317                 KASSERT(len <= CTLBLK_MAX_SEG,
318                     ("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG));
319                 sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK);
320         }
321         sg->len = len;
322 }
323
324 static void
325 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg)
326 {
327
328         if (sg->len <= CTLBLK_MIN_SEG) {
329                 uma_zfree(softc->bufmin_zone, sg->addr);
330         } else {
331                 KASSERT(sg->len <= CTLBLK_MAX_SEG,
332                     ("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG));
333                 uma_zfree(softc->bufmax_zone, sg->addr);
334         }
335 }
336
337 static struct ctl_be_block_io *
338 ctl_alloc_beio(struct ctl_be_block_softc *softc)
339 {
340         struct ctl_be_block_io *beio;
341
342         beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO);
343         beio->softc = softc;
344         beio->refcnt = 1;
345         return (beio);
346 }
347
348 static void
349 ctl_real_free_beio(struct ctl_be_block_io *beio)
350 {
351         struct ctl_be_block_softc *softc = beio->softc;
352         int i;
353
354         for (i = 0; i < beio->num_segs; i++) {
355                 ctl_free_seg(softc, &beio->sg_segs[i]);
356
357                 /* For compare we had two equal S/G lists. */
358                 if (beio->two_sglists) {
359                         ctl_free_seg(softc,
360                             &beio->sg_segs[i + CTLBLK_HALF_SEGS]);
361                 }
362         }
363
364         uma_zfree(softc->beio_zone, beio);
365 }
366
367 static void
368 ctl_refcnt_beio(void *arg, int diff)
369 {
370         struct ctl_be_block_io *beio = arg;
371
372         if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0)
373                 ctl_real_free_beio(beio);
374 }
375
376 static void
377 ctl_free_beio(struct ctl_be_block_io *beio)
378 {
379
380         ctl_refcnt_beio(beio, -1);
381 }
382
383 static void
384 ctl_complete_beio(struct ctl_be_block_io *beio)
385 {
386         union ctl_io *io = beio->io;
387
388         if (beio->beio_cont != NULL) {
389                 beio->beio_cont(beio);
390         } else {
391                 ctl_free_beio(beio);
392                 ctl_data_submit_done(io);
393         }
394 }
395
396 static size_t
397 cmp(uint8_t *a, uint8_t *b, size_t size)
398 {
399         size_t i;
400
401         for (i = 0; i < size; i++) {
402                 if (a[i] != b[i])
403                         break;
404         }
405         return (i);
406 }
407
408 static void
409 ctl_be_block_compare(union ctl_io *io)
410 {
411         struct ctl_be_block_io *beio;
412         uint64_t off, res;
413         int i;
414         uint8_t info[8];
415
416         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
417         off = 0;
418         for (i = 0; i < beio->num_segs; i++) {
419                 res = cmp(beio->sg_segs[i].addr,
420                     beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
421                     beio->sg_segs[i].len);
422                 off += res;
423                 if (res < beio->sg_segs[i].len)
424                         break;
425         }
426         if (i < beio->num_segs) {
427                 scsi_u64to8b(off, info);
428                 ctl_set_sense(&io->scsiio, /*current_error*/ 1,
429                     /*sense_key*/ SSD_KEY_MISCOMPARE,
430                     /*asc*/ 0x1D, /*ascq*/ 0x00,
431                     /*type*/ SSD_ELEM_INFO,
432                     /*size*/ sizeof(info), /*data*/ &info,
433                     /*type*/ SSD_ELEM_NONE);
434         } else
435                 ctl_set_success(&io->scsiio);
436 }
437
438 static int
439 ctl_be_block_move_done(union ctl_io *io, bool samethr)
440 {
441         struct ctl_be_block_io *beio;
442         struct ctl_be_block_lun *be_lun;
443         struct ctl_lba_len_flags *lbalen;
444
445         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
446
447         DPRINTF("entered\n");
448         io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
449
450         /*
451          * We set status at this point for read and compare commands.
452          */
453         if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
454             (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) {
455                 lbalen = ARGS(io);
456                 if (lbalen->flags & CTL_LLF_READ) {
457                         ctl_set_success(&io->scsiio);
458                 } else if (lbalen->flags & CTL_LLF_COMPARE) {
459                         /* We have two data blocks ready for comparison. */
460                         ctl_be_block_compare(io);
461                 }
462         }
463
464         /*
465          * If this is a read, or a write with errors, it is done.
466          */
467         if ((beio->bio_cmd == BIO_READ)
468          || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
469          || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
470                 ctl_complete_beio(beio);
471                 return (0);
472         }
473
474         /*
475          * At this point, we have a write and the DMA completed successfully.
476          * If we were called synchronously in the original thread then just
477          * dispatch, otherwise we now have to queue it to the task queue to
478          * execute the backend I/O.  That is because we do blocking
479          * memory allocations, and in the file backing case, blocking I/O.
480          * This move done routine is generally called in the SIM's
481          * interrupt context, and therefore we cannot block.
482          */
483         be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
484         if (samethr) {
485                 be_lun->dispatch(be_lun, beio);
486         } else {
487                 mtx_lock(&be_lun->queue_lock);
488                 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
489                 mtx_unlock(&be_lun->queue_lock);
490                 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
491         }
492         return (0);
493 }
494
495 static void
496 ctl_be_block_biodone(struct bio *bio)
497 {
498         struct ctl_be_block_io *beio = bio->bio_caller1;
499         struct ctl_be_block_lun *be_lun = beio->lun;
500         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
501         union ctl_io *io;
502         int error;
503
504         io = beio->io;
505
506         DPRINTF("entered\n");
507
508         error = bio->bio_error;
509         mtx_lock(&be_lun->io_lock);
510         if (error != 0 &&
511             (beio->first_error == 0 ||
512              bio->bio_offset < beio->first_error_offset)) {
513                 beio->first_error = error;
514                 beio->first_error_offset = bio->bio_offset;
515         }
516
517         beio->num_bios_done++;
518
519         /*
520          * XXX KDM will this cause WITNESS to complain?  Holding a lock
521          * during the free might cause it to complain.
522          */
523         g_destroy_bio(bio);
524
525         /*
526          * If the send complete bit isn't set, or we aren't the last I/O to
527          * complete, then we're done.
528          */
529         if ((beio->send_complete == 0)
530          || (beio->num_bios_done < beio->num_bios_sent)) {
531                 mtx_unlock(&be_lun->io_lock);
532                 return;
533         }
534
535         /*
536          * At this point, we've verified that we are the last I/O to
537          * complete, so it's safe to drop the lock.
538          */
539         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
540             beio->ds_tag_type, beio->ds_trans_type,
541             /*now*/ NULL, /*then*/&beio->ds_t0);
542         mtx_unlock(&be_lun->io_lock);
543
544         /*
545          * If there are any errors from the backing device, we fail the
546          * entire I/O with a medium error.
547          */
548         error = beio->first_error;
549         if (error != 0) {
550                 if (error == EOPNOTSUPP) {
551                         ctl_set_invalid_opcode(&io->scsiio);
552                 } else if (error == ENOSPC || error == EDQUOT) {
553                         ctl_set_space_alloc_fail(&io->scsiio);
554                 } else if (error == EROFS || error == EACCES) {
555                         ctl_set_hw_write_protected(&io->scsiio);
556                 } else if (beio->bio_cmd == BIO_FLUSH) {
557                         /* XXX KDM is there is a better error here? */
558                         ctl_set_internal_failure(&io->scsiio,
559                                                  /*sks_valid*/ 1,
560                                                  /*retry_count*/ 0xbad2);
561                 } else {
562                         ctl_set_medium_error(&io->scsiio,
563                             beio->bio_cmd == BIO_READ);
564                 }
565                 ctl_complete_beio(beio);
566                 return;
567         }
568
569         /*
570          * If this is a write, a flush, a delete or verify, we're all done.
571          * If this is a read, we can now send the data to the user.
572          */
573         if ((beio->bio_cmd == BIO_WRITE)
574          || (beio->bio_cmd == BIO_FLUSH)
575          || (beio->bio_cmd == BIO_DELETE)
576          || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
577                 ctl_set_success(&io->scsiio);
578                 ctl_complete_beio(beio);
579         } else {
580                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
581                     beio->beio_cont == NULL) {
582                         ctl_set_success(&io->scsiio);
583                         if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT)
584                                 ctl_serseq_done(io);
585                 }
586                 ctl_datamove(io);
587         }
588 }
589
590 static void
591 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
592                         struct ctl_be_block_io *beio)
593 {
594         union ctl_io *io = beio->io;
595         struct mount *mountpoint;
596         int error;
597
598         DPRINTF("entered\n");
599
600         binuptime(&beio->ds_t0);
601         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
602
603         (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
604
605         vn_lock(be_lun->vn, vn_lktype_write(mountpoint, be_lun->vn) |
606             LK_RETRY);
607         error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
608             curthread);
609         VOP_UNLOCK(be_lun->vn);
610
611         vn_finished_write(mountpoint);
612
613         mtx_lock(&be_lun->io_lock);
614         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
615             beio->ds_tag_type, beio->ds_trans_type,
616             /*now*/ NULL, /*then*/&beio->ds_t0);
617         mtx_unlock(&be_lun->io_lock);
618
619         if (error == 0)
620                 ctl_set_success(&io->scsiio);
621         else {
622                 /* XXX KDM is there is a better error here? */
623                 ctl_set_internal_failure(&io->scsiio,
624                                          /*sks_valid*/ 1,
625                                          /*retry_count*/ 0xbad1);
626         }
627
628         ctl_complete_beio(beio);
629 }
630
631 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
632 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
633 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
634 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
635
636 static void
637 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
638                            struct ctl_be_block_io *beio)
639 {
640         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
641         struct ctl_be_block_filedata *file_data;
642         union ctl_io *io;
643         struct uio xuio;
644         struct iovec *xiovec;
645         size_t s;
646         int error, flags, i;
647
648         DPRINTF("entered\n");
649
650         file_data = &be_lun->backend.file;
651         io = beio->io;
652         flags = 0;
653         if (ARGS(io)->flags & CTL_LLF_DPO)
654                 flags |= IO_DIRECT;
655         if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
656                 flags |= IO_SYNC;
657
658         bzero(&xuio, sizeof(xuio));
659         if (beio->bio_cmd == BIO_READ) {
660                 SDT_PROBE0(cbb, , read, file_start);
661                 xuio.uio_rw = UIO_READ;
662         } else {
663                 SDT_PROBE0(cbb, , write, file_start);
664                 xuio.uio_rw = UIO_WRITE;
665         }
666         xuio.uio_offset = beio->io_offset;
667         xuio.uio_resid = beio->io_len;
668         xuio.uio_segflg = UIO_SYSSPACE;
669         xuio.uio_iov = beio->xiovecs;
670         xuio.uio_iovcnt = beio->num_segs;
671         xuio.uio_td = curthread;
672
673         for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
674                 xiovec->iov_base = beio->sg_segs[i].addr;
675                 xiovec->iov_len = beio->sg_segs[i].len;
676         }
677
678         binuptime(&beio->ds_t0);
679         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
680
681         if (beio->bio_cmd == BIO_READ) {
682                 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
683
684                 if (beio->beio_cont == NULL &&
685                     cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
686                         ctl_serseq_done(io);
687                 /*
688                  * UFS pays attention to IO_DIRECT for reads.  If the
689                  * DIRECTIO option is configured into the kernel, it calls
690                  * ffs_rawread().  But that only works for single-segment
691                  * uios with user space addresses.  In our case, with a
692                  * kernel uio, it still reads into the buffer cache, but it
693                  * will just try to release the buffer from the cache later
694                  * on in ffs_read().
695                  *
696                  * ZFS does not pay attention to IO_DIRECT for reads.
697                  *
698                  * UFS does not pay attention to IO_SYNC for reads.
699                  *
700                  * ZFS pays attention to IO_SYNC (which translates into the
701                  * Solaris define FRSYNC for zfs_read()) for reads.  It
702                  * attempts to sync the file before reading.
703                  */
704                 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
705
706                 VOP_UNLOCK(be_lun->vn);
707                 SDT_PROBE0(cbb, , read, file_done);
708                 if (error == 0 && xuio.uio_resid > 0) {
709                         /*
710                          * If we red less then requested (EOF), then
711                          * we should clean the rest of the buffer.
712                          */
713                         s = beio->io_len - xuio.uio_resid;
714                         for (i = 0; i < beio->num_segs; i++) {
715                                 if (s >= beio->sg_segs[i].len) {
716                                         s -= beio->sg_segs[i].len;
717                                         continue;
718                                 }
719                                 bzero((uint8_t *)beio->sg_segs[i].addr + s,
720                                     beio->sg_segs[i].len - s);
721                                 s = 0;
722                         }
723                 }
724         } else {
725                 struct mount *mountpoint;
726
727                 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
728                 vn_lock(be_lun->vn, vn_lktype_write(mountpoint,
729                     be_lun->vn) | LK_RETRY);
730
731                 /*
732                  * UFS pays attention to IO_DIRECT for writes.  The write
733                  * is done asynchronously.  (Normally the write would just
734                  * get put into cache.
735                  *
736                  * UFS pays attention to IO_SYNC for writes.  It will
737                  * attempt to write the buffer out synchronously if that
738                  * flag is set.
739                  *
740                  * ZFS does not pay attention to IO_DIRECT for writes.
741                  *
742                  * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
743                  * for writes.  It will flush the transaction from the
744                  * cache before returning.
745                  */
746                 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
747                 VOP_UNLOCK(be_lun->vn);
748
749                 vn_finished_write(mountpoint);
750                 SDT_PROBE0(cbb, , write, file_done);
751         }
752
753         mtx_lock(&be_lun->io_lock);
754         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
755             beio->ds_tag_type, beio->ds_trans_type,
756             /*now*/ NULL, /*then*/&beio->ds_t0);
757         mtx_unlock(&be_lun->io_lock);
758
759         /*
760          * If we got an error, set the sense data to "MEDIUM ERROR" and
761          * return the I/O to the user.
762          */
763         if (error != 0) {
764                 if (error == ENOSPC || error == EDQUOT) {
765                         ctl_set_space_alloc_fail(&io->scsiio);
766                 } else if (error == EROFS || error == EACCES) {
767                         ctl_set_hw_write_protected(&io->scsiio);
768                 } else {
769                         ctl_set_medium_error(&io->scsiio,
770                             beio->bio_cmd == BIO_READ);
771                 }
772                 ctl_complete_beio(beio);
773                 return;
774         }
775
776         /*
777          * If this is a write or a verify, we're all done.
778          * If this is a read, we can now send the data to the user.
779          */
780         if ((beio->bio_cmd == BIO_WRITE) ||
781             (ARGS(io)->flags & CTL_LLF_VERIFY)) {
782                 ctl_set_success(&io->scsiio);
783                 ctl_complete_beio(beio);
784         } else {
785                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
786                     beio->beio_cont == NULL) {
787                         ctl_set_success(&io->scsiio);
788                         if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
789                                 ctl_serseq_done(io);
790                 }
791                 ctl_datamove(io);
792         }
793 }
794
795 static void
796 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
797                         struct ctl_be_block_io *beio)
798 {
799         union ctl_io *io = beio->io;
800         struct ctl_lba_len_flags *lbalen = ARGS(io);
801         struct scsi_get_lba_status_data *data;
802         off_t roff, off;
803         int error, status;
804
805         DPRINTF("entered\n");
806
807         off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
808         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
809         error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
810             0, curthread->td_ucred, curthread);
811         if (error == 0 && off > roff)
812                 status = 0;     /* mapped up to off */
813         else {
814                 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
815                     0, curthread->td_ucred, curthread);
816                 if (error == 0 && off > roff)
817                         status = 1;     /* deallocated up to off */
818                 else {
819                         status = 0;     /* unknown up to the end */
820                         off = be_lun->size_bytes;
821                 }
822         }
823         VOP_UNLOCK(be_lun->vn);
824
825         data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
826         scsi_u64to8b(lbalen->lba, data->descr[0].addr);
827         scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
828             lbalen->lba), data->descr[0].length);
829         data->descr[0].status = status;
830
831         ctl_complete_beio(beio);
832 }
833
834 static uint64_t
835 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
836 {
837         struct vattr            vattr;
838         struct statfs           statfs;
839         uint64_t                val;
840         int                     error;
841
842         val = UINT64_MAX;
843         if (be_lun->vn == NULL)
844                 return (val);
845         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
846         if (strcmp(attrname, "blocksused") == 0) {
847                 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
848                 if (error == 0)
849                         val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
850         }
851         if (strcmp(attrname, "blocksavail") == 0 &&
852             !VN_IS_DOOMED(be_lun->vn)) {
853                 error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
854                 if (error == 0)
855                         val = statfs.f_bavail * statfs.f_bsize /
856                             be_lun->cbe_lun.blocksize;
857         }
858         VOP_UNLOCK(be_lun->vn);
859         return (val);
860 }
861
862 static void
863 ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
864                         struct ctl_be_block_io *beio)
865 {
866         struct ctl_be_block_filedata *file_data;
867         union ctl_io *io;
868         struct ctl_ptr_len_flags *ptrlen;
869         struct scsi_unmap_desc *buf, *end;
870         struct mount *mp;
871         off_t off, len;
872         int error;
873
874         io = beio->io;
875         file_data = &be_lun->backend.file;
876         mp = NULL;
877         error = 0;
878
879         binuptime(&beio->ds_t0);
880         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
881
882         (void)vn_start_write(be_lun->vn, &mp, V_WAIT);
883         vn_lock(be_lun->vn, vn_lktype_write(mp, be_lun->vn) | LK_RETRY);
884         if (beio->io_offset == -1) {
885                 beio->io_len = 0;
886                 ptrlen = (struct ctl_ptr_len_flags *)
887                     &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
888                 buf = (struct scsi_unmap_desc *)ptrlen->ptr;
889                 end = buf + ptrlen->len / sizeof(*buf);
890                 for (; buf < end; buf++) {
891                         off = (off_t)scsi_8btou64(buf->lba) *
892                             be_lun->cbe_lun.blocksize;
893                         len = (off_t)scsi_4btoul(buf->length) *
894                             be_lun->cbe_lun.blocksize;
895                         beio->io_len += len;
896                         error = vn_deallocate(be_lun->vn, &off, &len,
897                             0, IO_NOMACCHECK | IO_NODELOCKED, file_data->cred,
898                             NOCRED);
899                         if (error != 0)
900                                 break;
901                 }
902         } else {
903                 /* WRITE_SAME */
904                 off = beio->io_offset;
905                 len = beio->io_len;
906                 error = vn_deallocate(be_lun->vn, &off, &len, 0,
907                     IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, NOCRED);
908         }
909         VOP_UNLOCK(be_lun->vn);
910         vn_finished_write(mp);
911
912         mtx_lock(&be_lun->io_lock);
913         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
914             beio->ds_tag_type, beio->ds_trans_type,
915             /*now*/ NULL, /*then*/&beio->ds_t0);
916         mtx_unlock(&be_lun->io_lock);
917
918         /*
919          * If we got an error, set the sense data to "MEDIUM ERROR" and
920          * return the I/O to the user.
921          */
922         switch (error) {
923         case 0:
924                 ctl_set_success(&io->scsiio);
925                 break;
926         case ENOSPC:
927         case EDQUOT:
928                 ctl_set_space_alloc_fail(&io->scsiio);
929                 break;
930         case EROFS:
931         case EACCES:
932                 ctl_set_hw_write_protected(&io->scsiio);
933                 break;
934         default:
935                 ctl_set_medium_error(&io->scsiio, false);
936         }
937         ctl_complete_beio(beio);
938 }
939
940 static void
941 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
942                            struct ctl_be_block_io *beio)
943 {
944         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
945         union ctl_io *io;
946         struct cdevsw *csw;
947         struct cdev *dev;
948         struct uio xuio;
949         struct iovec *xiovec;
950         int error, flags, i, ref;
951
952         DPRINTF("entered\n");
953
954         io = beio->io;
955         flags = 0;
956         if (ARGS(io)->flags & CTL_LLF_DPO)
957                 flags |= IO_DIRECT;
958         if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
959                 flags |= IO_SYNC;
960
961         bzero(&xuio, sizeof(xuio));
962         if (beio->bio_cmd == BIO_READ) {
963                 SDT_PROBE0(cbb, , read, file_start);
964                 xuio.uio_rw = UIO_READ;
965         } else {
966                 SDT_PROBE0(cbb, , write, file_start);
967                 xuio.uio_rw = UIO_WRITE;
968         }
969         xuio.uio_offset = beio->io_offset;
970         xuio.uio_resid = beio->io_len;
971         xuio.uio_segflg = UIO_SYSSPACE;
972         xuio.uio_iov = beio->xiovecs;
973         xuio.uio_iovcnt = beio->num_segs;
974         xuio.uio_td = curthread;
975
976         for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
977                 xiovec->iov_base = beio->sg_segs[i].addr;
978                 xiovec->iov_len = beio->sg_segs[i].len;
979         }
980
981         binuptime(&beio->ds_t0);
982         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
983
984         csw = devvn_refthread(be_lun->vn, &dev, &ref);
985         if (csw) {
986                 if (beio->bio_cmd == BIO_READ) {
987                         if (beio->beio_cont == NULL &&
988                             cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
989                                 ctl_serseq_done(io);
990                         error = csw->d_read(dev, &xuio, flags);
991                 } else
992                         error = csw->d_write(dev, &xuio, flags);
993                 dev_relthread(dev, ref);
994         } else
995                 error = ENXIO;
996
997         if (beio->bio_cmd == BIO_READ)
998                 SDT_PROBE0(cbb, , read, file_done);
999         else
1000                 SDT_PROBE0(cbb, , write, file_done);
1001
1002         mtx_lock(&be_lun->io_lock);
1003         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
1004             beio->ds_tag_type, beio->ds_trans_type,
1005             /*now*/ NULL, /*then*/&beio->ds_t0);
1006         mtx_unlock(&be_lun->io_lock);
1007
1008         /*
1009          * If we got an error, set the sense data to "MEDIUM ERROR" and
1010          * return the I/O to the user.
1011          */
1012         if (error != 0) {
1013                 if (error == ENOSPC || error == EDQUOT) {
1014                         ctl_set_space_alloc_fail(&io->scsiio);
1015                 } else if (error == EROFS || error == EACCES) {
1016                         ctl_set_hw_write_protected(&io->scsiio);
1017                 } else {
1018                         ctl_set_medium_error(&io->scsiio,
1019                             beio->bio_cmd == BIO_READ);
1020                 }
1021                 ctl_complete_beio(beio);
1022                 return;
1023         }
1024
1025         /*
1026          * If this is a write or a verify, we're all done.
1027          * If this is a read, we can now send the data to the user.
1028          */
1029         if ((beio->bio_cmd == BIO_WRITE) ||
1030             (ARGS(io)->flags & CTL_LLF_VERIFY)) {
1031                 ctl_set_success(&io->scsiio);
1032                 ctl_complete_beio(beio);
1033         } else {
1034                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
1035                     beio->beio_cont == NULL) {
1036                         ctl_set_success(&io->scsiio);
1037                         if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
1038                                 ctl_serseq_done(io);
1039                 }
1040                 ctl_datamove(io);
1041         }
1042 }
1043
1044 static void
1045 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
1046                         struct ctl_be_block_io *beio)
1047 {
1048         union ctl_io *io = beio->io;
1049         struct cdevsw *csw;
1050         struct cdev *dev;
1051         struct ctl_lba_len_flags *lbalen = ARGS(io);
1052         struct scsi_get_lba_status_data *data;
1053         off_t roff, off;
1054         int error, ref, status;
1055
1056         DPRINTF("entered\n");
1057
1058         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1059         if (csw == NULL) {
1060                 status = 0;     /* unknown up to the end */
1061                 off = be_lun->size_bytes;
1062                 goto done;
1063         }
1064         off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
1065         error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
1066             curthread);
1067         if (error == 0 && off > roff)
1068                 status = 0;     /* mapped up to off */
1069         else {
1070                 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
1071                     curthread);
1072                 if (error == 0 && off > roff)
1073                         status = 1;     /* deallocated up to off */
1074                 else {
1075                         status = 0;     /* unknown up to the end */
1076                         off = be_lun->size_bytes;
1077                 }
1078         }
1079         dev_relthread(dev, ref);
1080
1081 done:
1082         data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
1083         scsi_u64to8b(lbalen->lba, data->descr[0].addr);
1084         scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
1085             lbalen->lba), data->descr[0].length);
1086         data->descr[0].status = status;
1087
1088         ctl_complete_beio(beio);
1089 }
1090
1091 static void
1092 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
1093                        struct ctl_be_block_io *beio)
1094 {
1095         struct bio *bio;
1096         struct cdevsw *csw;
1097         struct cdev *dev;
1098         int ref;
1099
1100         DPRINTF("entered\n");
1101
1102         /* This can't fail, it's a blocking allocation. */
1103         bio = g_alloc_bio();
1104
1105         bio->bio_cmd        = BIO_FLUSH;
1106         bio->bio_offset     = 0;
1107         bio->bio_data       = 0;
1108         bio->bio_done       = ctl_be_block_biodone;
1109         bio->bio_caller1    = beio;
1110         bio->bio_pblkno     = 0;
1111
1112         /*
1113          * We don't need to acquire the LUN lock here, because we are only
1114          * sending one bio, and so there is no other context to synchronize
1115          * with.
1116          */
1117         beio->num_bios_sent = 1;
1118         beio->send_complete = 1;
1119
1120         binuptime(&beio->ds_t0);
1121         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1122
1123         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1124         if (csw) {
1125                 bio->bio_dev = dev;
1126                 csw->d_strategy(bio);
1127                 dev_relthread(dev, ref);
1128         } else {
1129                 bio->bio_error = ENXIO;
1130                 ctl_be_block_biodone(bio);
1131         }
1132 }
1133
1134 static void
1135 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1136                        struct ctl_be_block_io *beio,
1137                        uint64_t off, uint64_t len, int last)
1138 {
1139         struct bio *bio;
1140         uint64_t maxlen;
1141         struct cdevsw *csw;
1142         struct cdev *dev;
1143         int ref;
1144
1145         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1146         maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1147         while (len > 0) {
1148                 bio = g_alloc_bio();
1149                 bio->bio_cmd        = BIO_DELETE;
1150                 bio->bio_dev        = dev;
1151                 bio->bio_offset     = off;
1152                 bio->bio_length     = MIN(len, maxlen);
1153                 bio->bio_data       = 0;
1154                 bio->bio_done       = ctl_be_block_biodone;
1155                 bio->bio_caller1    = beio;
1156                 bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1157
1158                 off += bio->bio_length;
1159                 len -= bio->bio_length;
1160
1161                 mtx_lock(&be_lun->io_lock);
1162                 beio->num_bios_sent++;
1163                 if (last && len == 0)
1164                         beio->send_complete = 1;
1165                 mtx_unlock(&be_lun->io_lock);
1166
1167                 if (csw) {
1168                         csw->d_strategy(bio);
1169                 } else {
1170                         bio->bio_error = ENXIO;
1171                         ctl_be_block_biodone(bio);
1172                 }
1173         }
1174         if (csw)
1175                 dev_relthread(dev, ref);
1176 }
1177
1178 static void
1179 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1180                        struct ctl_be_block_io *beio)
1181 {
1182         union ctl_io *io;
1183         struct ctl_ptr_len_flags *ptrlen;
1184         struct scsi_unmap_desc *buf, *end;
1185         uint64_t len;
1186
1187         io = beio->io;
1188
1189         DPRINTF("entered\n");
1190
1191         binuptime(&beio->ds_t0);
1192         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1193
1194         if (beio->io_offset == -1) {
1195                 beio->io_len = 0;
1196                 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1197                 buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1198                 end = buf + ptrlen->len / sizeof(*buf);
1199                 for (; buf < end; buf++) {
1200                         len = (uint64_t)scsi_4btoul(buf->length) *
1201                             be_lun->cbe_lun.blocksize;
1202                         beio->io_len += len;
1203                         ctl_be_block_unmap_dev_range(be_lun, beio,
1204                             scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1205                             len, (end - buf < 2) ? TRUE : FALSE);
1206                 }
1207         } else
1208                 ctl_be_block_unmap_dev_range(be_lun, beio,
1209                     beio->io_offset, beio->io_len, TRUE);
1210 }
1211
1212 static void
1213 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1214                           struct ctl_be_block_io *beio)
1215 {
1216         TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1217         struct bio *bio;
1218         struct cdevsw *csw;
1219         struct cdev *dev;
1220         off_t cur_offset;
1221         int i, max_iosize, ref;
1222
1223         DPRINTF("entered\n");
1224         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1225
1226         /*
1227          * We have to limit our I/O size to the maximum supported by the
1228          * backend device.
1229          */
1230         if (csw) {
1231                 max_iosize = dev->si_iosize_max;
1232                 if (max_iosize <= 0)
1233                         max_iosize = DFLTPHYS;
1234         } else
1235                 max_iosize = maxphys;
1236
1237         cur_offset = beio->io_offset;
1238         for (i = 0; i < beio->num_segs; i++) {
1239                 size_t cur_size;
1240                 uint8_t *cur_ptr;
1241
1242                 cur_size = beio->sg_segs[i].len;
1243                 cur_ptr = beio->sg_segs[i].addr;
1244
1245                 while (cur_size > 0) {
1246                         /* This can't fail, it's a blocking allocation. */
1247                         bio = g_alloc_bio();
1248
1249                         KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1250
1251                         bio->bio_cmd = beio->bio_cmd;
1252                         bio->bio_dev = dev;
1253                         bio->bio_caller1 = beio;
1254                         bio->bio_length = min(cur_size, max_iosize);
1255                         bio->bio_offset = cur_offset;
1256                         bio->bio_data = cur_ptr;
1257                         bio->bio_done = ctl_be_block_biodone;
1258                         bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1259
1260                         cur_offset += bio->bio_length;
1261                         cur_ptr += bio->bio_length;
1262                         cur_size -= bio->bio_length;
1263
1264                         TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1265                         beio->num_bios_sent++;
1266                 }
1267         }
1268         beio->send_complete = 1;
1269         binuptime(&beio->ds_t0);
1270         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1271
1272         /*
1273          * Fire off all allocated requests!
1274          */
1275         while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1276                 TAILQ_REMOVE(&queue, bio, bio_queue);
1277                 if (csw)
1278                         csw->d_strategy(bio);
1279                 else {
1280                         bio->bio_error = ENXIO;
1281                         ctl_be_block_biodone(bio);
1282                 }
1283         }
1284         if (csw)
1285                 dev_relthread(dev, ref);
1286 }
1287
1288 static uint64_t
1289 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1290 {
1291         struct diocgattr_arg    arg;
1292         struct cdevsw *csw;
1293         struct cdev *dev;
1294         int error, ref;
1295
1296         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1297         if (csw == NULL)
1298                 return (UINT64_MAX);
1299         strlcpy(arg.name, attrname, sizeof(arg.name));
1300         arg.len = sizeof(arg.value.off);
1301         if (csw->d_ioctl) {
1302                 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1303                     curthread);
1304         } else
1305                 error = ENODEV;
1306         dev_relthread(dev, ref);
1307         if (error != 0)
1308                 return (UINT64_MAX);
1309         return (arg.value.off);
1310 }
1311
1312 static void
1313 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1314                             union ctl_io *io)
1315 {
1316         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1317         struct ctl_be_block_io *beio;
1318         struct ctl_lba_len_flags *lbalen;
1319
1320         DPRINTF("entered\n");
1321         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1322         lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1323
1324         beio->io_len = lbalen->len * cbe_lun->blocksize;
1325         beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1326         beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1327         beio->bio_cmd = BIO_FLUSH;
1328         beio->ds_trans_type = DEVSTAT_NO_DATA;
1329         DPRINTF("SYNC\n");
1330         be_lun->lun_flush(be_lun, beio);
1331 }
1332
1333 static void
1334 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1335 {
1336         union ctl_io *io;
1337
1338         io = beio->io;
1339         ctl_free_beio(beio);
1340         if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1341             ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1342              (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1343                 ctl_config_write_done(io);
1344                 return;
1345         }
1346
1347         ctl_be_block_config_write(io);
1348 }
1349
1350 static void
1351 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1352                             union ctl_io *io)
1353 {
1354         struct ctl_be_block_softc *softc = be_lun->softc;
1355         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1356         struct ctl_be_block_io *beio;
1357         struct ctl_lba_len_flags *lbalen;
1358         uint64_t len_left, lba;
1359         uint32_t pb, pbo, adj;
1360         int i, seglen;
1361         uint8_t *buf, *end;
1362
1363         DPRINTF("entered\n");
1364
1365         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1366         lbalen = ARGS(io);
1367
1368         if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1369             (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1370                 ctl_free_beio(beio);
1371                 ctl_set_invalid_field(&io->scsiio,
1372                                       /*sks_valid*/ 1,
1373                                       /*command*/ 1,
1374                                       /*field*/ 1,
1375                                       /*bit_valid*/ 0,
1376                                       /*bit*/ 0);
1377                 ctl_config_write_done(io);
1378                 return;
1379         }
1380
1381         if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1382                 beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1383                 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1384                 beio->bio_cmd = BIO_DELETE;
1385                 beio->ds_trans_type = DEVSTAT_FREE;
1386
1387                 be_lun->unmap(be_lun, beio);
1388                 return;
1389         }
1390
1391         beio->bio_cmd = BIO_WRITE;
1392         beio->ds_trans_type = DEVSTAT_WRITE;
1393
1394         DPRINTF("WRITE SAME at LBA %jx len %u\n",
1395                (uintmax_t)lbalen->lba, lbalen->len);
1396
1397         pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1398         if (be_lun->cbe_lun.pblockoff > 0)
1399                 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1400         else
1401                 pbo = 0;
1402         len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1403         for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1404                 /*
1405                  * Setup the S/G entry for this chunk.
1406                  */
1407                 seglen = MIN(CTLBLK_MAX_SEG, len_left);
1408                 if (pb > cbe_lun->blocksize) {
1409                         adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1410                             seglen - pbo) % pb;
1411                         if (seglen > adj)
1412                                 seglen -= adj;
1413                         else
1414                                 seglen -= seglen % cbe_lun->blocksize;
1415                 } else
1416                         seglen -= seglen % cbe_lun->blocksize;
1417                 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen);
1418
1419                 DPRINTF("segment %d addr %p len %zd\n", i,
1420                         beio->sg_segs[i].addr, beio->sg_segs[i].len);
1421
1422                 beio->num_segs++;
1423                 len_left -= seglen;
1424
1425                 buf = beio->sg_segs[i].addr;
1426                 end = buf + seglen;
1427                 for (; buf < end; buf += cbe_lun->blocksize) {
1428                         if (lbalen->flags & SWS_NDOB) {
1429                                 memset(buf, 0, cbe_lun->blocksize);
1430                         } else {
1431                                 memcpy(buf, io->scsiio.kern_data_ptr,
1432                                     cbe_lun->blocksize);
1433                         }
1434                         if (lbalen->flags & SWS_LBDATA)
1435                                 scsi_ulto4b(lbalen->lba + lba, buf);
1436                         lba++;
1437                 }
1438         }
1439
1440         beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1441         beio->io_len = lba * cbe_lun->blocksize;
1442
1443         /* We can not do all in one run. Correct and schedule rerun. */
1444         if (len_left > 0) {
1445                 lbalen->lba += lba;
1446                 lbalen->len -= lba;
1447                 beio->beio_cont = ctl_be_block_cw_done_ws;
1448         }
1449
1450         be_lun->dispatch(be_lun, beio);
1451 }
1452
1453 static void
1454 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1455                             union ctl_io *io)
1456 {
1457         struct ctl_be_block_io *beio;
1458         struct ctl_ptr_len_flags *ptrlen;
1459
1460         DPRINTF("entered\n");
1461
1462         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1463         ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1464
1465         if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1466                 ctl_free_beio(beio);
1467                 ctl_set_invalid_field(&io->scsiio,
1468                                       /*sks_valid*/ 0,
1469                                       /*command*/ 1,
1470                                       /*field*/ 0,
1471                                       /*bit_valid*/ 0,
1472                                       /*bit*/ 0);
1473                 ctl_config_write_done(io);
1474                 return;
1475         }
1476
1477         beio->io_len = 0;
1478         beio->io_offset = -1;
1479         beio->bio_cmd = BIO_DELETE;
1480         beio->ds_trans_type = DEVSTAT_FREE;
1481         DPRINTF("UNMAP\n");
1482         be_lun->unmap(be_lun, beio);
1483 }
1484
1485 static void
1486 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1487 {
1488         union ctl_io *io;
1489
1490         io = beio->io;
1491         ctl_free_beio(beio);
1492         ctl_config_read_done(io);
1493 }
1494
1495 static void
1496 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1497                          union ctl_io *io)
1498 {
1499         struct ctl_be_block_io *beio;
1500         struct ctl_be_block_softc *softc;
1501
1502         DPRINTF("entered\n");
1503
1504         softc = be_lun->softc;
1505         beio = ctl_alloc_beio(softc);
1506         beio->io = io;
1507         beio->lun = be_lun;
1508         beio->beio_cont = ctl_be_block_cr_done;
1509         PRIV(io)->ptr = (void *)beio;
1510
1511         switch (io->scsiio.cdb[0]) {
1512         case SERVICE_ACTION_IN:         /* GET LBA STATUS */
1513                 beio->bio_cmd = -1;
1514                 beio->ds_trans_type = DEVSTAT_NO_DATA;
1515                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1516                 beio->io_len = 0;
1517                 if (be_lun->get_lba_status)
1518                         be_lun->get_lba_status(be_lun, beio);
1519                 else
1520                         ctl_be_block_cr_done(beio);
1521                 break;
1522         default:
1523                 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1524                 break;
1525         }
1526 }
1527
1528 static void
1529 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1530 {
1531         union ctl_io *io;
1532
1533         io = beio->io;
1534         ctl_free_beio(beio);
1535         ctl_config_write_done(io);
1536 }
1537
1538 static void
1539 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1540                          union ctl_io *io)
1541 {
1542         struct ctl_be_block_io *beio;
1543         struct ctl_be_block_softc *softc;
1544
1545         DPRINTF("entered\n");
1546
1547         softc = be_lun->softc;
1548         beio = ctl_alloc_beio(softc);
1549         beio->io = io;
1550         beio->lun = be_lun;
1551         beio->beio_cont = ctl_be_block_cw_done;
1552         switch (io->scsiio.tag_type) {
1553         case CTL_TAG_ORDERED:
1554                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1555                 break;
1556         case CTL_TAG_HEAD_OF_QUEUE:
1557                 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1558                 break;
1559         case CTL_TAG_UNTAGGED:
1560         case CTL_TAG_SIMPLE:
1561         case CTL_TAG_ACA:
1562         default:
1563                 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1564                 break;
1565         }
1566         PRIV(io)->ptr = (void *)beio;
1567
1568         switch (io->scsiio.cdb[0]) {
1569         case SYNCHRONIZE_CACHE:
1570         case SYNCHRONIZE_CACHE_16:
1571                 ctl_be_block_cw_dispatch_sync(be_lun, io);
1572                 break;
1573         case WRITE_SAME_10:
1574         case WRITE_SAME_16:
1575                 ctl_be_block_cw_dispatch_ws(be_lun, io);
1576                 break;
1577         case UNMAP:
1578                 ctl_be_block_cw_dispatch_unmap(be_lun, io);
1579                 break;
1580         default:
1581                 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1582                 break;
1583         }
1584 }
1585
1586 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
1587 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
1588 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
1589 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
1590
1591 static void
1592 ctl_be_block_next(struct ctl_be_block_io *beio)
1593 {
1594         struct ctl_be_block_lun *be_lun;
1595         union ctl_io *io;
1596
1597         io = beio->io;
1598         be_lun = beio->lun;
1599         ctl_free_beio(beio);
1600         if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1601             ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1602              (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1603                 ctl_data_submit_done(io);
1604                 return;
1605         }
1606
1607         io->io_hdr.status &= ~CTL_STATUS_MASK;
1608         io->io_hdr.status |= CTL_STATUS_NONE;
1609
1610         mtx_lock(&be_lun->queue_lock);
1611         STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1612         mtx_unlock(&be_lun->queue_lock);
1613         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1614 }
1615
1616 static void
1617 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1618                            union ctl_io *io)
1619 {
1620         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1621         struct ctl_be_block_io *beio;
1622         struct ctl_be_block_softc *softc;
1623         struct ctl_lba_len_flags *lbalen;
1624         struct ctl_ptr_len_flags *bptrlen;
1625         uint64_t len_left, lbas;
1626         int i;
1627
1628         softc = be_lun->softc;
1629
1630         DPRINTF("entered\n");
1631
1632         lbalen = ARGS(io);
1633         if (lbalen->flags & CTL_LLF_WRITE) {
1634                 SDT_PROBE0(cbb, , write, start);
1635         } else {
1636                 SDT_PROBE0(cbb, , read, start);
1637         }
1638
1639         beio = ctl_alloc_beio(softc);
1640         beio->io = io;
1641         beio->lun = be_lun;
1642         bptrlen = PRIV(io);
1643         bptrlen->ptr = (void *)beio;
1644
1645         switch (io->scsiio.tag_type) {
1646         case CTL_TAG_ORDERED:
1647                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1648                 break;
1649         case CTL_TAG_HEAD_OF_QUEUE:
1650                 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1651                 break;
1652         case CTL_TAG_UNTAGGED:
1653         case CTL_TAG_SIMPLE:
1654         case CTL_TAG_ACA:
1655         default:
1656                 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1657                 break;
1658         }
1659
1660         if (lbalen->flags & CTL_LLF_WRITE) {
1661                 beio->bio_cmd = BIO_WRITE;
1662                 beio->ds_trans_type = DEVSTAT_WRITE;
1663         } else {
1664                 beio->bio_cmd = BIO_READ;
1665                 beio->ds_trans_type = DEVSTAT_READ;
1666         }
1667
1668         DPRINTF("%s at LBA %jx len %u @%ju\n",
1669                (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1670                (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1671         lbas = CTLBLK_MAX_IO_SIZE;
1672         if (lbalen->flags & CTL_LLF_COMPARE) {
1673                 beio->two_sglists = 1;
1674                 lbas /= 2;
1675         }
1676         lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1677         beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1678         beio->io_len = lbas * cbe_lun->blocksize;
1679         bptrlen->len += lbas;
1680
1681         for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1682                 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1683                     i, CTLBLK_MAX_SEGS));
1684
1685                 /*
1686                  * Setup the S/G entry for this chunk.
1687                  */
1688                 ctl_alloc_seg(softc, &beio->sg_segs[i],
1689                     MIN(CTLBLK_MAX_SEG, len_left));
1690
1691                 DPRINTF("segment %d addr %p len %zd\n", i,
1692                         beio->sg_segs[i].addr, beio->sg_segs[i].len);
1693
1694                 /* Set up second segment for compare operation. */
1695                 if (beio->two_sglists) {
1696                         ctl_alloc_seg(softc,
1697                             &beio->sg_segs[i + CTLBLK_HALF_SEGS],
1698                             beio->sg_segs[i].len);
1699                 }
1700
1701                 beio->num_segs++;
1702                 len_left -= beio->sg_segs[i].len;
1703         }
1704         if (bptrlen->len < lbalen->len)
1705                 beio->beio_cont = ctl_be_block_next;
1706         io->scsiio.be_move_done = ctl_be_block_move_done;
1707         /* For compare we have separate S/G lists for read and datamove. */
1708         if (beio->two_sglists)
1709                 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1710         else
1711                 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1712         io->scsiio.kern_data_len = beio->io_len;
1713         io->scsiio.kern_sg_entries = beio->num_segs;
1714         io->scsiio.kern_data_ref = ctl_refcnt_beio;
1715         io->scsiio.kern_data_arg = beio;
1716         io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
1717
1718         /*
1719          * For the read case, we need to read the data into our buffers and
1720          * then we can send it back to the user.  For the write case, we
1721          * need to get the data from the user first.
1722          */
1723         if (beio->bio_cmd == BIO_READ) {
1724                 SDT_PROBE0(cbb, , read, alloc_done);
1725                 be_lun->dispatch(be_lun, beio);
1726         } else {
1727                 SDT_PROBE0(cbb, , write, alloc_done);
1728                 ctl_datamove(io);
1729         }
1730 }
1731
1732 static void
1733 ctl_be_block_worker(void *context, int pending)
1734 {
1735         struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1736         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1737         union ctl_io *io;
1738         struct ctl_be_block_io *beio;
1739
1740         DPRINTF("entered\n");
1741         /*
1742          * Fetch and process I/Os from all queues.  If we detect LUN
1743          * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
1744          * so make response maximally opaque to not confuse initiator.
1745          */
1746         for (;;) {
1747                 mtx_lock(&be_lun->queue_lock);
1748                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1749                 if (io != NULL) {
1750                         DPRINTF("datamove queue\n");
1751                         STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links);
1752                         mtx_unlock(&be_lun->queue_lock);
1753                         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1754                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1755                                 ctl_set_busy(&io->scsiio);
1756                                 ctl_complete_beio(beio);
1757                                 continue;
1758                         }
1759                         be_lun->dispatch(be_lun, beio);
1760                         continue;
1761                 }
1762                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1763                 if (io != NULL) {
1764                         DPRINTF("config write queue\n");
1765                         STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links);
1766                         mtx_unlock(&be_lun->queue_lock);
1767                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1768                                 ctl_set_busy(&io->scsiio);
1769                                 ctl_config_write_done(io);
1770                                 continue;
1771                         }
1772                         ctl_be_block_cw_dispatch(be_lun, io);
1773                         continue;
1774                 }
1775                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1776                 if (io != NULL) {
1777                         DPRINTF("config read queue\n");
1778                         STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links);
1779                         mtx_unlock(&be_lun->queue_lock);
1780                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1781                                 ctl_set_busy(&io->scsiio);
1782                                 ctl_config_read_done(io);
1783                                 continue;
1784                         }
1785                         ctl_be_block_cr_dispatch(be_lun, io);
1786                         continue;
1787                 }
1788                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1789                 if (io != NULL) {
1790                         DPRINTF("input queue\n");
1791                         STAILQ_REMOVE_HEAD(&be_lun->input_queue, links);
1792                         mtx_unlock(&be_lun->queue_lock);
1793                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1794                                 ctl_set_busy(&io->scsiio);
1795                                 ctl_data_submit_done(io);
1796                                 continue;
1797                         }
1798                         ctl_be_block_dispatch(be_lun, io);
1799                         continue;
1800                 }
1801
1802                 /*
1803                  * If we get here, there is no work left in the queues, so
1804                  * just break out and let the task queue go to sleep.
1805                  */
1806                 mtx_unlock(&be_lun->queue_lock);
1807                 break;
1808         }
1809 }
1810
1811 /*
1812  * Entry point from CTL to the backend for I/O.  We queue everything to a
1813  * work thread, so this just puts the I/O on a queue and wakes up the
1814  * thread.
1815  */
1816 static int
1817 ctl_be_block_submit(union ctl_io *io)
1818 {
1819         struct ctl_be_block_lun *be_lun;
1820
1821         DPRINTF("entered\n");
1822
1823         be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
1824
1825         KASSERT(io->io_hdr.io_type == CTL_IO_SCSI,
1826             ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type));
1827
1828         PRIV(io)->len = 0;
1829
1830         mtx_lock(&be_lun->queue_lock);
1831         STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1832         mtx_unlock(&be_lun->queue_lock);
1833         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1834
1835         return (CTL_RETVAL_COMPLETE);
1836 }
1837
1838 static int
1839 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1840                         int flag, struct thread *td)
1841 {
1842         struct ctl_be_block_softc *softc = &backend_block_softc;
1843         int error;
1844
1845         error = 0;
1846         switch (cmd) {
1847         case CTL_LUN_REQ: {
1848                 struct ctl_lun_req *lun_req;
1849
1850                 lun_req = (struct ctl_lun_req *)addr;
1851
1852                 switch (lun_req->reqtype) {
1853                 case CTL_LUNREQ_CREATE:
1854                         error = ctl_be_block_create(softc, lun_req);
1855                         break;
1856                 case CTL_LUNREQ_RM:
1857                         error = ctl_be_block_rm(softc, lun_req);
1858                         break;
1859                 case CTL_LUNREQ_MODIFY:
1860                         error = ctl_be_block_modify(softc, lun_req);
1861                         break;
1862                 default:
1863                         lun_req->status = CTL_LUN_ERROR;
1864                         snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1865                                  "invalid LUN request type %d",
1866                                  lun_req->reqtype);
1867                         break;
1868                 }
1869                 break;
1870         }
1871         default:
1872                 error = ENOTTY;
1873                 break;
1874         }
1875
1876         return (error);
1877 }
1878
1879 static int
1880 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1881 {
1882         struct ctl_be_lun *cbe_lun;
1883         struct ctl_be_block_filedata *file_data;
1884         struct ctl_lun_create_params *params;
1885         const char                   *value;
1886         struct vattr                  vattr;
1887         off_t                         ps, pss, po, pos, us, uss, uo, uos;
1888         int                           error;
1889         long                          pconf;
1890
1891         cbe_lun = &be_lun->cbe_lun;
1892         file_data = &be_lun->backend.file;
1893         params = &be_lun->params;
1894
1895         be_lun->dev_type = CTL_BE_BLOCK_FILE;
1896         be_lun->dispatch = ctl_be_block_dispatch_file;
1897         be_lun->lun_flush = ctl_be_block_flush_file;
1898         be_lun->get_lba_status = ctl_be_block_gls_file;
1899         be_lun->getattr = ctl_be_block_getattr_file;
1900         be_lun->unmap = ctl_be_block_unmap_file;
1901         cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1902
1903         error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1904         if (error != 0) {
1905                 snprintf(req->error_str, sizeof(req->error_str),
1906                          "error calling VOP_GETATTR() for file %s",
1907                          be_lun->dev_path);
1908                 return (error);
1909         }
1910
1911         error = VOP_PATHCONF(be_lun->vn, _PC_DEALLOC_PRESENT, &pconf);
1912         if (error != 0) {
1913                 snprintf(req->error_str, sizeof(req->error_str),
1914                     "error calling VOP_PATHCONF() for file %s",
1915                     be_lun->dev_path);
1916                 return (error);
1917         }
1918         if (pconf == 1)
1919                 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
1920
1921         file_data->cred = crhold(curthread->td_ucred);
1922         if (params->lun_size_bytes != 0)
1923                 be_lun->size_bytes = params->lun_size_bytes;
1924         else
1925                 be_lun->size_bytes = vattr.va_size;
1926
1927         /*
1928          * For files we can use any logical block size.  Prefer 512 bytes
1929          * for compatibility reasons.  If file's vattr.va_blocksize
1930          * (preferred I/O block size) is bigger and multiple to chosen
1931          * logical block size -- report it as physical block size.
1932          */
1933         if (params->blocksize_bytes != 0)
1934                 cbe_lun->blocksize = params->blocksize_bytes;
1935         else if (cbe_lun->lun_type == T_CDROM)
1936                 cbe_lun->blocksize = 2048;
1937         else
1938                 cbe_lun->blocksize = 512;
1939         be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1940         cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1941             0 : (be_lun->size_blocks - 1);
1942
1943         us = ps = vattr.va_blocksize;
1944         uo = po = 0;
1945
1946         value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
1947         if (value != NULL)
1948                 ctl_expand_number(value, &ps);
1949         value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
1950         if (value != NULL)
1951                 ctl_expand_number(value, &po);
1952         pss = ps / cbe_lun->blocksize;
1953         pos = po / cbe_lun->blocksize;
1954         if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1955             ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1956                 cbe_lun->pblockexp = fls(pss) - 1;
1957                 cbe_lun->pblockoff = (pss - pos) % pss;
1958         }
1959
1960         value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
1961         if (value != NULL)
1962                 ctl_expand_number(value, &us);
1963         value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
1964         if (value != NULL)
1965                 ctl_expand_number(value, &uo);
1966         uss = us / cbe_lun->blocksize;
1967         uos = uo / cbe_lun->blocksize;
1968         if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1969             ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1970                 cbe_lun->ublockexp = fls(uss) - 1;
1971                 cbe_lun->ublockoff = (uss - uos) % uss;
1972         }
1973
1974         /*
1975          * Sanity check.  The media size has to be at least one
1976          * sector long.
1977          */
1978         if (be_lun->size_bytes < cbe_lun->blocksize) {
1979                 error = EINVAL;
1980                 snprintf(req->error_str, sizeof(req->error_str),
1981                          "file %s size %ju < block size %u", be_lun->dev_path,
1982                          (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1983         }
1984
1985         cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1986         return (error);
1987 }
1988
1989 static int
1990 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1991 {
1992         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1993         struct ctl_lun_create_params *params;
1994         struct cdevsw                *csw;
1995         struct cdev                  *dev;
1996         const char                   *value;
1997         int                           error, atomic, maxio, ref, unmap, tmp;
1998         off_t                         ps, pss, po, pos, us, uss, uo, uos, otmp;
1999
2000         params = &be_lun->params;
2001
2002         be_lun->dev_type = CTL_BE_BLOCK_DEV;
2003         csw = devvn_refthread(be_lun->vn, &dev, &ref);
2004         if (csw == NULL)
2005                 return (ENXIO);
2006         if (strcmp(csw->d_name, "zvol") == 0) {
2007                 be_lun->dispatch = ctl_be_block_dispatch_zvol;
2008                 be_lun->get_lba_status = ctl_be_block_gls_zvol;
2009                 atomic = maxio = CTLBLK_MAX_IO_SIZE;
2010         } else {
2011                 be_lun->dispatch = ctl_be_block_dispatch_dev;
2012                 be_lun->get_lba_status = NULL;
2013                 atomic = 0;
2014                 maxio = dev->si_iosize_max;
2015                 if (maxio <= 0)
2016                         maxio = DFLTPHYS;
2017                 if (maxio > CTLBLK_MAX_SEG)
2018                         maxio = CTLBLK_MAX_SEG;
2019         }
2020         be_lun->lun_flush = ctl_be_block_flush_dev;
2021         be_lun->getattr = ctl_be_block_getattr_dev;
2022         be_lun->unmap = ctl_be_block_unmap_dev;
2023
2024         if (!csw->d_ioctl) {
2025                 dev_relthread(dev, ref);
2026                 snprintf(req->error_str, sizeof(req->error_str),
2027                          "no d_ioctl for device %s!", be_lun->dev_path);
2028                 return (ENODEV);
2029         }
2030
2031         error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
2032                                curthread);
2033         if (error) {
2034                 dev_relthread(dev, ref);
2035                 snprintf(req->error_str, sizeof(req->error_str),
2036                          "error %d returned for DIOCGSECTORSIZE ioctl "
2037                          "on %s!", error, be_lun->dev_path);
2038                 return (error);
2039         }
2040
2041         /*
2042          * If the user has asked for a blocksize that is greater than the
2043          * backing device's blocksize, we can do it only if the blocksize
2044          * the user is asking for is an even multiple of the underlying 
2045          * device's blocksize.
2046          */
2047         if ((params->blocksize_bytes != 0) &&
2048             (params->blocksize_bytes >= tmp)) {
2049                 if (params->blocksize_bytes % tmp == 0) {
2050                         cbe_lun->blocksize = params->blocksize_bytes;
2051                 } else {
2052                         dev_relthread(dev, ref);
2053                         snprintf(req->error_str, sizeof(req->error_str),
2054                                  "requested blocksize %u is not an even "
2055                                  "multiple of backing device blocksize %u",
2056                                  params->blocksize_bytes, tmp);
2057                         return (EINVAL);
2058                 }
2059         } else if (params->blocksize_bytes != 0) {
2060                 dev_relthread(dev, ref);
2061                 snprintf(req->error_str, sizeof(req->error_str),
2062                          "requested blocksize %u < backing device "
2063                          "blocksize %u", params->blocksize_bytes, tmp);
2064                 return (EINVAL);
2065         } else if (cbe_lun->lun_type == T_CDROM)
2066                 cbe_lun->blocksize = MAX(tmp, 2048);
2067         else
2068                 cbe_lun->blocksize = tmp;
2069
2070         error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
2071                              curthread);
2072         if (error) {
2073                 dev_relthread(dev, ref);
2074                 snprintf(req->error_str, sizeof(req->error_str),
2075                          "error %d returned for DIOCGMEDIASIZE "
2076                          " ioctl on %s!", error,
2077                          be_lun->dev_path);
2078                 return (error);
2079         }
2080
2081         if (params->lun_size_bytes != 0) {
2082                 if (params->lun_size_bytes > otmp) {
2083                         dev_relthread(dev, ref);
2084                         snprintf(req->error_str, sizeof(req->error_str),
2085                                  "requested LUN size %ju > backing device "
2086                                  "size %ju",
2087                                  (uintmax_t)params->lun_size_bytes,
2088                                  (uintmax_t)otmp);
2089                         return (EINVAL);
2090                 }
2091
2092                 be_lun->size_bytes = params->lun_size_bytes;
2093         } else
2094                 be_lun->size_bytes = otmp;
2095         be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2096         cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2097             0 : (be_lun->size_blocks - 1);
2098
2099         error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2100             curthread);
2101         if (error)
2102                 ps = po = 0;
2103         else {
2104                 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2105                     FREAD, curthread);
2106                 if (error)
2107                         po = 0;
2108         }
2109         us = ps;
2110         uo = po;
2111
2112         value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
2113         if (value != NULL)
2114                 ctl_expand_number(value, &ps);
2115         value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
2116         if (value != NULL)
2117                 ctl_expand_number(value, &po);
2118         pss = ps / cbe_lun->blocksize;
2119         pos = po / cbe_lun->blocksize;
2120         if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2121             ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2122                 cbe_lun->pblockexp = fls(pss) - 1;
2123                 cbe_lun->pblockoff = (pss - pos) % pss;
2124         }
2125
2126         value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
2127         if (value != NULL)
2128                 ctl_expand_number(value, &us);
2129         value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
2130         if (value != NULL)
2131                 ctl_expand_number(value, &uo);
2132         uss = us / cbe_lun->blocksize;
2133         uos = uo / cbe_lun->blocksize;
2134         if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2135             ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2136                 cbe_lun->ublockexp = fls(uss) - 1;
2137                 cbe_lun->ublockoff = (uss - uos) % uss;
2138         }
2139
2140         cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2141         cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2142
2143         if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2144                 unmap = 1;
2145         } else {
2146                 struct diocgattr_arg    arg;
2147
2148                 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2149                 arg.len = sizeof(arg.value.i);
2150                 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2151                     curthread);
2152                 unmap = (error == 0) ? arg.value.i : 0;
2153         }
2154         value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
2155         if (value != NULL)
2156                 unmap = (strcmp(value, "on") == 0);
2157         if (unmap)
2158                 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2159         else
2160                 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2161
2162         dev_relthread(dev, ref);
2163         return (0);
2164 }
2165
2166 static int
2167 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2168 {
2169         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2170         int flags;
2171
2172         if (be_lun->vn) {
2173                 flags = FREAD;
2174                 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2175                         flags |= FWRITE;
2176                 (void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2177                 be_lun->vn = NULL;
2178
2179                 switch (be_lun->dev_type) {
2180                 case CTL_BE_BLOCK_DEV:
2181                         break;
2182                 case CTL_BE_BLOCK_FILE:
2183                         if (be_lun->backend.file.cred != NULL) {
2184                                 crfree(be_lun->backend.file.cred);
2185                                 be_lun->backend.file.cred = NULL;
2186                         }
2187                         break;
2188                 case CTL_BE_BLOCK_NONE:
2189                         break;
2190                 default:
2191                         panic("Unexpected backend type %d", be_lun->dev_type);
2192                         break;
2193                 }
2194                 be_lun->dev_type = CTL_BE_BLOCK_NONE;
2195         }
2196         return (0);
2197 }
2198
2199 static int
2200 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2201 {
2202         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2203         struct nameidata nd;
2204         const char      *value;
2205         int              error, flags;
2206
2207         error = 0;
2208         if (rootvnode == NULL) {
2209                 snprintf(req->error_str, sizeof(req->error_str),
2210                          "Root filesystem is not mounted");
2211                 return (1);
2212         }
2213         pwd_ensure_dirs();
2214
2215         value = dnvlist_get_string(cbe_lun->options, "file", NULL);
2216         if (value == NULL) {
2217                 snprintf(req->error_str, sizeof(req->error_str),
2218                          "no file argument specified");
2219                 return (1);
2220         }
2221         free(be_lun->dev_path, M_CTLBLK);
2222         be_lun->dev_path = strdup(value, M_CTLBLK);
2223
2224         flags = FREAD;
2225         value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
2226         if (value != NULL) {
2227                 if (strcmp(value, "on") != 0)
2228                         flags |= FWRITE;
2229         } else if (cbe_lun->lun_type == T_DIRECT)
2230                 flags |= FWRITE;
2231
2232 again:
2233         NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path);
2234         error = vn_open(&nd, &flags, 0, NULL);
2235         if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2236                 flags &= ~FWRITE;
2237                 goto again;
2238         }
2239         if (error) {
2240                 /*
2241                  * This is the only reasonable guess we can make as far as
2242                  * path if the user doesn't give us a fully qualified path.
2243                  * If they want to specify a file, they need to specify the
2244                  * full path.
2245                  */
2246                 if (be_lun->dev_path[0] != '/') {
2247                         char *dev_name;
2248
2249                         asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2250                                 be_lun->dev_path);
2251                         free(be_lun->dev_path, M_CTLBLK);
2252                         be_lun->dev_path = dev_name;
2253                         goto again;
2254                 }
2255                 snprintf(req->error_str, sizeof(req->error_str),
2256                     "error opening %s: %d", be_lun->dev_path, error);
2257                 return (error);
2258         }
2259         if (flags & FWRITE)
2260                 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2261         else
2262                 cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2263
2264         NDFREE_PNBUF(&nd);
2265         be_lun->vn = nd.ni_vp;
2266
2267         /* We only support disks and files. */
2268         if (vn_isdisk_error(be_lun->vn, &error)) {
2269                 error = ctl_be_block_open_dev(be_lun, req);
2270         } else if (be_lun->vn->v_type == VREG) {
2271                 error = ctl_be_block_open_file(be_lun, req);
2272         } else {
2273                 error = EINVAL;
2274                 snprintf(req->error_str, sizeof(req->error_str),
2275                          "%s is not a disk or plain file", be_lun->dev_path);
2276         }
2277         VOP_UNLOCK(be_lun->vn);
2278
2279         if (error != 0)
2280                 ctl_be_block_close(be_lun);
2281         cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2282         if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2283                 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
2284         value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
2285         if (value != NULL && strcmp(value, "on") == 0)
2286                 cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2287         else if (value != NULL && strcmp(value, "read") == 0)
2288                 cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2289         else if (value != NULL && strcmp(value, "soft") == 0)
2290                 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
2291         else if (value != NULL && strcmp(value, "off") == 0)
2292                 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2293         return (0);
2294 }
2295
2296 static int
2297 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2298 {
2299         struct ctl_be_lun *cbe_lun;
2300         struct ctl_be_block_lun *be_lun;
2301         struct ctl_lun_create_params *params;
2302         char num_thread_str[16];
2303         char tmpstr[32];
2304         const char *value;
2305         int retval, num_threads;
2306         int tmp_num_threads;
2307
2308         params = &req->reqdata.create;
2309         retval = 0;
2310         req->status = CTL_LUN_OK;
2311
2312         be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2313         cbe_lun = &be_lun->cbe_lun;
2314         be_lun->params = req->reqdata.create;
2315         be_lun->softc = softc;
2316         STAILQ_INIT(&be_lun->input_queue);
2317         STAILQ_INIT(&be_lun->config_read_queue);
2318         STAILQ_INIT(&be_lun->config_write_queue);
2319         STAILQ_INIT(&be_lun->datamove_queue);
2320         mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF);
2321         mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF);
2322         cbe_lun->options = nvlist_clone(req->args_nvl);
2323
2324         if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2325                 cbe_lun->lun_type = params->device_type;
2326         else
2327                 cbe_lun->lun_type = T_DIRECT;
2328         be_lun->flags = 0;
2329         cbe_lun->flags = 0;
2330         value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2331         if (value != NULL) {
2332                 if (strcmp(value, "primary") == 0)
2333                         cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2334         } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2335                 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2336
2337         if (cbe_lun->lun_type == T_DIRECT ||
2338             cbe_lun->lun_type == T_CDROM) {
2339                 be_lun->size_bytes = params->lun_size_bytes;
2340                 if (params->blocksize_bytes != 0)
2341                         cbe_lun->blocksize = params->blocksize_bytes;
2342                 else if (cbe_lun->lun_type == T_CDROM)
2343                         cbe_lun->blocksize = 2048;
2344                 else
2345                         cbe_lun->blocksize = 512;
2346                 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2347                 cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2348                     0 : (be_lun->size_blocks - 1);
2349
2350                 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2351                     control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2352                         retval = ctl_be_block_open(be_lun, req);
2353                         if (retval != 0) {
2354                                 retval = 0;
2355                                 req->status = CTL_LUN_WARNING;
2356                         }
2357                 }
2358                 num_threads = cbb_num_threads;
2359         } else {
2360                 num_threads = 1;
2361         }
2362
2363         value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL);
2364         if (value != NULL) {
2365                 tmp_num_threads = strtol(value, NULL, 0);
2366
2367                 /*
2368                  * We don't let the user specify less than one
2369                  * thread, but hope he's clueful enough not to
2370                  * specify 1000 threads.
2371                  */
2372                 if (tmp_num_threads < 1) {
2373                         snprintf(req->error_str, sizeof(req->error_str),
2374                                  "invalid number of threads %s",
2375                                  num_thread_str);
2376                         goto bailout_error;
2377                 }
2378                 num_threads = tmp_num_threads;
2379         }
2380
2381         if (be_lun->vn == NULL)
2382                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2383         /* Tell the user the blocksize we ended up using */
2384         params->lun_size_bytes = be_lun->size_bytes;
2385         params->blocksize_bytes = cbe_lun->blocksize;
2386         if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2387                 cbe_lun->req_lun_id = params->req_lun_id;
2388                 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2389         } else
2390                 cbe_lun->req_lun_id = 0;
2391
2392         cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2393         cbe_lun->be = &ctl_be_block_driver;
2394
2395         if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2396                 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
2397                          softc->num_luns);
2398                 strncpy((char *)cbe_lun->serial_num, tmpstr,
2399                         MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2400
2401                 /* Tell the user what we used for a serial number */
2402                 strncpy((char *)params->serial_num, tmpstr,
2403                         MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2404         } else { 
2405                 strncpy((char *)cbe_lun->serial_num, params->serial_num,
2406                         MIN(sizeof(cbe_lun->serial_num),
2407                         sizeof(params->serial_num)));
2408         }
2409         if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2410                 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
2411                 strncpy((char *)cbe_lun->device_id, tmpstr,
2412                         MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2413
2414                 /* Tell the user what we used for a device ID */
2415                 strncpy((char *)params->device_id, tmpstr,
2416                         MIN(sizeof(params->device_id), sizeof(tmpstr)));
2417         } else {
2418                 strncpy((char *)cbe_lun->device_id, params->device_id,
2419                         MIN(sizeof(cbe_lun->device_id),
2420                             sizeof(params->device_id)));
2421         }
2422
2423         TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2424
2425         be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK,
2426             taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2427
2428         if (be_lun->io_taskqueue == NULL) {
2429                 snprintf(req->error_str, sizeof(req->error_str),
2430                          "unable to create taskqueue");
2431                 goto bailout_error;
2432         }
2433
2434         /*
2435          * Note that we start the same number of threads by default for
2436          * both the file case and the block device case.  For the file
2437          * case, we need multiple threads to allow concurrency, because the
2438          * vnode interface is designed to be a blocking interface.  For the
2439          * block device case, ZFS zvols at least will block the caller's
2440          * context in many instances, and so we need multiple threads to
2441          * overcome that problem.  Other block devices don't need as many
2442          * threads, but they shouldn't cause too many problems.
2443          *
2444          * If the user wants to just have a single thread for a block
2445          * device, he can specify that when the LUN is created, or change
2446          * the tunable/sysctl to alter the default number of threads.
2447          */
2448         retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue,
2449                                          /*num threads*/num_threads,
2450                                          /*priority*/PUSER,
2451                                          /*proc*/control_softc->ctl_proc,
2452                                          /*thread name*/"block");
2453
2454         if (retval != 0)
2455                 goto bailout_error;
2456
2457         be_lun->num_threads = num_threads;
2458
2459         retval = ctl_add_lun(&be_lun->cbe_lun);
2460         if (retval != 0) {
2461                 snprintf(req->error_str, sizeof(req->error_str),
2462                          "ctl_add_lun() returned error %d, see dmesg for "
2463                          "details", retval);
2464                 retval = 0;
2465                 goto bailout_error;
2466         }
2467
2468         be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id,
2469                                                cbe_lun->blocksize,
2470                                                DEVSTAT_ALL_SUPPORTED,
2471                                                cbe_lun->lun_type
2472                                                | DEVSTAT_TYPE_IF_OTHER,
2473                                                DEVSTAT_PRIORITY_OTHER);
2474
2475         mtx_lock(&softc->lock);
2476         softc->num_luns++;
2477         SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links);
2478         mtx_unlock(&softc->lock);
2479
2480         params->req_lun_id = cbe_lun->lun_id;
2481
2482         return (retval);
2483
2484 bailout_error:
2485         req->status = CTL_LUN_ERROR;
2486
2487         if (be_lun->io_taskqueue != NULL)
2488                 taskqueue_free(be_lun->io_taskqueue);
2489         ctl_be_block_close(be_lun);
2490         if (be_lun->dev_path != NULL)
2491                 free(be_lun->dev_path, M_CTLBLK);
2492         nvlist_destroy(cbe_lun->options);
2493         mtx_destroy(&be_lun->queue_lock);
2494         mtx_destroy(&be_lun->io_lock);
2495         free(be_lun, M_CTLBLK);
2496
2497         return (retval);
2498 }
2499
2500 static int
2501 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2502 {
2503         struct ctl_lun_rm_params *params;
2504         struct ctl_be_block_lun *be_lun;
2505         struct ctl_be_lun *cbe_lun;
2506         int retval;
2507
2508         params = &req->reqdata.rm;
2509
2510         sx_xlock(&softc->modify_lock);
2511         mtx_lock(&softc->lock);
2512         SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2513                 if (be_lun->cbe_lun.lun_id == params->lun_id) {
2514                         SLIST_REMOVE(&softc->lun_list, be_lun,
2515                             ctl_be_block_lun, links);
2516                         softc->num_luns--;
2517                         break;
2518                 }
2519         }
2520         mtx_unlock(&softc->lock);
2521         sx_xunlock(&softc->modify_lock);
2522         if (be_lun == NULL) {
2523                 snprintf(req->error_str, sizeof(req->error_str),
2524                          "LUN %u is not managed by the block backend",
2525                          params->lun_id);
2526                 goto bailout_error;
2527         }
2528         cbe_lun = &be_lun->cbe_lun;
2529
2530         if (be_lun->vn != NULL) {
2531                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2532                 ctl_lun_no_media(cbe_lun);
2533                 taskqueue_drain_all(be_lun->io_taskqueue);
2534                 ctl_be_block_close(be_lun);
2535         }
2536
2537         mtx_lock(&softc->lock);
2538         be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2539         mtx_unlock(&softc->lock);
2540
2541         retval = ctl_remove_lun(cbe_lun);
2542         if (retval != 0) {
2543                 snprintf(req->error_str, sizeof(req->error_str),
2544                          "error %d returned from ctl_remove_lun() for "
2545                          "LUN %d", retval, params->lun_id);
2546                 mtx_lock(&softc->lock);
2547                 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2548                 mtx_unlock(&softc->lock);
2549                 goto bailout_error;
2550         }
2551
2552         mtx_lock(&softc->lock);
2553         while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2554                 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0);
2555                 if (retval == EINTR)
2556                         break;
2557         }
2558         be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2559         if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2560                 mtx_unlock(&softc->lock);
2561                 free(be_lun, M_CTLBLK);
2562         } else {
2563                 mtx_unlock(&softc->lock);
2564                 return (EINTR);
2565         }
2566
2567         req->status = CTL_LUN_OK;
2568         return (0);
2569
2570 bailout_error:
2571         req->status = CTL_LUN_ERROR;
2572         return (0);
2573 }
2574
2575 static int
2576 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2577 {
2578         struct ctl_lun_modify_params *params;
2579         struct ctl_be_block_lun *be_lun;
2580         struct ctl_be_lun *cbe_lun;
2581         const char *value;
2582         uint64_t oldsize;
2583         int error, wasprim;
2584
2585         params = &req->reqdata.modify;
2586
2587         sx_xlock(&softc->modify_lock);
2588         mtx_lock(&softc->lock);
2589         SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2590                 if (be_lun->cbe_lun.lun_id == params->lun_id)
2591                         break;
2592         }
2593         mtx_unlock(&softc->lock);
2594         if (be_lun == NULL) {
2595                 snprintf(req->error_str, sizeof(req->error_str),
2596                          "LUN %u is not managed by the block backend",
2597                          params->lun_id);
2598                 goto bailout_error;
2599         }
2600         cbe_lun = &be_lun->cbe_lun;
2601
2602         if (params->lun_size_bytes != 0)
2603                 be_lun->params.lun_size_bytes = params->lun_size_bytes;
2604
2605         if (req->args_nvl != NULL) {
2606                 nvlist_destroy(cbe_lun->options);
2607                 cbe_lun->options = nvlist_clone(req->args_nvl);
2608         }
2609
2610         wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2611         value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2612         if (value != NULL) {
2613                 if (strcmp(value, "primary") == 0)
2614                         cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2615                 else
2616                         cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2617         } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2618                 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2619         else
2620                 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2621         if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2622                 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2623                         ctl_lun_primary(cbe_lun);
2624                 else
2625                         ctl_lun_secondary(cbe_lun);
2626         }
2627
2628         oldsize = be_lun->size_blocks;
2629         if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2630             control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2631                 if (be_lun->vn == NULL)
2632                         error = ctl_be_block_open(be_lun, req);
2633                 else if (vn_isdisk_error(be_lun->vn, &error))
2634                         error = ctl_be_block_open_dev(be_lun, req);
2635                 else if (be_lun->vn->v_type == VREG) {
2636                         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2637                         error = ctl_be_block_open_file(be_lun, req);
2638                         VOP_UNLOCK(be_lun->vn);
2639                 } else
2640                         error = EINVAL;
2641                 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
2642                     be_lun->vn != NULL) {
2643                         cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2644                         ctl_lun_has_media(cbe_lun);
2645                 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
2646                     be_lun->vn == NULL) {
2647                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2648                         ctl_lun_no_media(cbe_lun);
2649                 }
2650                 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2651         } else {
2652                 if (be_lun->vn != NULL) {
2653                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2654                         ctl_lun_no_media(cbe_lun);
2655                         taskqueue_drain_all(be_lun->io_taskqueue);
2656                         error = ctl_be_block_close(be_lun);
2657                 } else
2658                         error = 0;
2659         }
2660         if (be_lun->size_blocks != oldsize)
2661                 ctl_lun_capacity_changed(cbe_lun);
2662
2663         /* Tell the user the exact size we ended up using */
2664         params->lun_size_bytes = be_lun->size_bytes;
2665
2666         sx_xunlock(&softc->modify_lock);
2667         req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2668         return (0);
2669
2670 bailout_error:
2671         sx_xunlock(&softc->modify_lock);
2672         req->status = CTL_LUN_ERROR;
2673         return (0);
2674 }
2675
2676 static void
2677 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun)
2678 {
2679         struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun;
2680         struct ctl_be_block_softc *softc = be_lun->softc;
2681
2682         taskqueue_drain_all(be_lun->io_taskqueue);
2683         taskqueue_free(be_lun->io_taskqueue);
2684         if (be_lun->disk_stats != NULL)
2685                 devstat_remove_entry(be_lun->disk_stats);
2686         nvlist_destroy(be_lun->cbe_lun.options);
2687         free(be_lun->dev_path, M_CTLBLK);
2688         mtx_destroy(&be_lun->queue_lock);
2689         mtx_destroy(&be_lun->io_lock);
2690
2691         mtx_lock(&softc->lock);
2692         be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2693         if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2694                 wakeup(be_lun);
2695         else
2696                 free(be_lun, M_CTLBLK);
2697         mtx_unlock(&softc->lock);
2698 }
2699
2700 static int
2701 ctl_be_block_config_write(union ctl_io *io)
2702 {
2703         struct ctl_be_block_lun *be_lun;
2704         struct ctl_be_lun *cbe_lun;
2705         int retval;
2706
2707         DPRINTF("entered\n");
2708
2709         cbe_lun = CTL_BACKEND_LUN(io);
2710         be_lun = (struct ctl_be_block_lun *)cbe_lun;
2711
2712         retval = 0;
2713         switch (io->scsiio.cdb[0]) {
2714         case SYNCHRONIZE_CACHE:
2715         case SYNCHRONIZE_CACHE_16:
2716         case WRITE_SAME_10:
2717         case WRITE_SAME_16:
2718         case UNMAP:
2719                 /*
2720                  * The upper level CTL code will filter out any CDBs with
2721                  * the immediate bit set and return the proper error.
2722                  *
2723                  * We don't really need to worry about what LBA range the
2724                  * user asked to be synced out.  When they issue a sync
2725                  * cache command, we'll sync out the whole thing.
2726                  */
2727                 mtx_lock(&be_lun->queue_lock);
2728                 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2729                                    links);
2730                 mtx_unlock(&be_lun->queue_lock);
2731                 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2732                 break;
2733         case START_STOP_UNIT: {
2734                 struct scsi_start_stop_unit *cdb;
2735                 struct ctl_lun_req req;
2736
2737                 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2738                 if ((cdb->how & SSS_PC_MASK) != 0) {
2739                         ctl_set_success(&io->scsiio);
2740                         ctl_config_write_done(io);
2741                         break;
2742                 }
2743                 if (cdb->how & SSS_START) {
2744                         if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
2745                                 retval = ctl_be_block_open(be_lun, &req);
2746                                 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2747                                 if (retval == 0) {
2748                                         cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2749                                         ctl_lun_has_media(cbe_lun);
2750                                 } else {
2751                                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2752                                         ctl_lun_no_media(cbe_lun);
2753                                 }
2754                         }
2755                         ctl_start_lun(cbe_lun);
2756                 } else {
2757                         ctl_stop_lun(cbe_lun);
2758                         if (cdb->how & SSS_LOEJ) {
2759                                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2760                                 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
2761                                 ctl_lun_ejected(cbe_lun);
2762                                 if (be_lun->vn != NULL)
2763                                         ctl_be_block_close(be_lun);
2764                         }
2765                 }
2766
2767                 ctl_set_success(&io->scsiio);
2768                 ctl_config_write_done(io);
2769                 break;
2770         }
2771         case PREVENT_ALLOW:
2772                 ctl_set_success(&io->scsiio);
2773                 ctl_config_write_done(io);
2774                 break;
2775         default:
2776                 ctl_set_invalid_opcode(&io->scsiio);
2777                 ctl_config_write_done(io);
2778                 retval = CTL_RETVAL_COMPLETE;
2779                 break;
2780         }
2781
2782         return (retval);
2783 }
2784
2785 static int
2786 ctl_be_block_config_read(union ctl_io *io)
2787 {
2788         struct ctl_be_block_lun *be_lun;
2789         int retval = 0;
2790
2791         DPRINTF("entered\n");
2792
2793         be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
2794
2795         switch (io->scsiio.cdb[0]) {
2796         case SERVICE_ACTION_IN:
2797                 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2798                         mtx_lock(&be_lun->queue_lock);
2799                         STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2800                             &io->io_hdr, links);
2801                         mtx_unlock(&be_lun->queue_lock);
2802                         taskqueue_enqueue(be_lun->io_taskqueue,
2803                             &be_lun->io_task);
2804                         retval = CTL_RETVAL_QUEUED;
2805                         break;
2806                 }
2807                 ctl_set_invalid_field(&io->scsiio,
2808                                       /*sks_valid*/ 1,
2809                                       /*command*/ 1,
2810                                       /*field*/ 1,
2811                                       /*bit_valid*/ 1,
2812                                       /*bit*/ 4);
2813                 ctl_config_read_done(io);
2814                 retval = CTL_RETVAL_COMPLETE;
2815                 break;
2816         default:
2817                 ctl_set_invalid_opcode(&io->scsiio);
2818                 ctl_config_read_done(io);
2819                 retval = CTL_RETVAL_COMPLETE;
2820                 break;
2821         }
2822
2823         return (retval);
2824 }
2825
2826 static int
2827 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb)
2828 {
2829         struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2830         int retval;
2831
2832         retval = sbuf_printf(sb, "\t<num_threads>");
2833         if (retval != 0)
2834                 goto bailout;
2835         retval = sbuf_printf(sb, "%d", lun->num_threads);
2836         if (retval != 0)
2837                 goto bailout;
2838         retval = sbuf_printf(sb, "</num_threads>\n");
2839
2840 bailout:
2841         return (retval);
2842 }
2843
2844 static uint64_t
2845 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname)
2846 {
2847         struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2848
2849         if (lun->getattr == NULL)
2850                 return (UINT64_MAX);
2851         return (lun->getattr(lun, attrname));
2852 }
2853
2854 static int
2855 ctl_be_block_init(void)
2856 {
2857         struct ctl_be_block_softc *softc = &backend_block_softc;
2858
2859         sx_init(&softc->modify_lock, "ctlblock modify");
2860         mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2861         softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2862             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2863         softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG,
2864             NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2865         if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
2866                 softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG,
2867                     NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2868         SLIST_INIT(&softc->lun_list);
2869         return (0);
2870 }
2871
2872 static int
2873 ctl_be_block_shutdown(void)
2874 {
2875         struct ctl_be_block_softc *softc = &backend_block_softc;
2876         struct ctl_be_block_lun *lun;
2877
2878         mtx_lock(&softc->lock);
2879         while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) {
2880                 SLIST_REMOVE_HEAD(&softc->lun_list, links);
2881                 softc->num_luns--;
2882                 /*
2883                  * Drop our lock here.  Since ctl_remove_lun() can call
2884                  * back into us, this could potentially lead to a recursive
2885                  * lock of the same mutex, which would cause a hang.
2886                  */
2887                 mtx_unlock(&softc->lock);
2888                 ctl_remove_lun(&lun->cbe_lun);
2889                 mtx_lock(&softc->lock);
2890         }
2891         mtx_unlock(&softc->lock);
2892         uma_zdestroy(softc->bufmin_zone);
2893         if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
2894                 uma_zdestroy(softc->bufmax_zone);
2895         uma_zdestroy(softc->beio_zone);
2896         mtx_destroy(&softc->lock);
2897         sx_destroy(&softc->modify_lock);
2898         return (0);
2899 }