]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/cam/ctl/ctl_backend_block.c
Merge ^/vendor/lvm-project/master up to its last change (upstream commit
[FreeBSD/FreeBSD.git] / sys / cam / ctl / ctl_backend_block.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2003 Silicon Graphics International Corp.
5  * Copyright (c) 2009-2011 Spectra Logic Corporation
6  * Copyright (c) 2012 The FreeBSD Foundation
7  * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Edward Tomasz Napierala
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions, and the following disclaimer,
18  *    without modification.
19  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
20  *    substantially similar to the "NO WARRANTY" disclaimer below
21  *    ("Disclaimer") and any redistribution must be conditioned upon
22  *    including a substantially similar Disclaimer requirement for further
23  *    binary redistribution.
24  *
25  * NO WARRANTY
26  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
29  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGES.
37  *
38  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
39  */
40 /*
41  * CAM Target Layer driver backend for block devices.
42  *
43  * Author: Ken Merry <ken@FreeBSD.org>
44  */
45 #include <sys/cdefs.h>
46 __FBSDID("$FreeBSD$");
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/types.h>
52 #include <sys/kthread.h>
53 #include <sys/bio.h>
54 #include <sys/fcntl.h>
55 #include <sys/limits.h>
56 #include <sys/lock.h>
57 #include <sys/mutex.h>
58 #include <sys/condvar.h>
59 #include <sys/malloc.h>
60 #include <sys/conf.h>
61 #include <sys/ioccom.h>
62 #include <sys/queue.h>
63 #include <sys/sbuf.h>
64 #include <sys/endian.h>
65 #include <sys/uio.h>
66 #include <sys/buf.h>
67 #include <sys/taskqueue.h>
68 #include <sys/vnode.h>
69 #include <sys/namei.h>
70 #include <sys/mount.h>
71 #include <sys/disk.h>
72 #include <sys/fcntl.h>
73 #include <sys/filedesc.h>
74 #include <sys/filio.h>
75 #include <sys/proc.h>
76 #include <sys/pcpu.h>
77 #include <sys/module.h>
78 #include <sys/sdt.h>
79 #include <sys/devicestat.h>
80 #include <sys/sysctl.h>
81 #include <sys/nv.h>
82 #include <sys/dnv.h>
83
84 #include <geom/geom.h>
85
86 #include <cam/cam.h>
87 #include <cam/scsi/scsi_all.h>
88 #include <cam/scsi/scsi_da.h>
89 #include <cam/ctl/ctl_io.h>
90 #include <cam/ctl/ctl.h>
91 #include <cam/ctl/ctl_backend.h>
92 #include <cam/ctl/ctl_ioctl.h>
93 #include <cam/ctl/ctl_ha.h>
94 #include <cam/ctl/ctl_scsi_all.h>
95 #include <cam/ctl/ctl_private.h>
96 #include <cam/ctl/ctl_error.h>
97
98 /*
99  * The idea here is that we'll allocate enough S/G space to hold a 1MB
100  * I/O.  If we get an I/O larger than that, we'll split it.
101  */
102 #define CTLBLK_HALF_IO_SIZE     (512 * 1024)
103 #define CTLBLK_MAX_IO_SIZE      (CTLBLK_HALF_IO_SIZE * 2)
104 #define CTLBLK_MAX_SEG          MAXPHYS
105 #define CTLBLK_HALF_SEGS        MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
106 #define CTLBLK_MAX_SEGS         (CTLBLK_HALF_SEGS * 2)
107
108 #ifdef CTLBLK_DEBUG
109 #define DPRINTF(fmt, args...) \
110     printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
111 #else
112 #define DPRINTF(fmt, args...) do {} while(0)
113 #endif
114
115 #define PRIV(io)        \
116     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
117 #define ARGS(io)        \
118     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
119
120 SDT_PROVIDER_DEFINE(cbb);
121
122 typedef enum {
123         CTL_BE_BLOCK_LUN_UNCONFIGURED   = 0x01,
124         CTL_BE_BLOCK_LUN_CONFIG_ERR     = 0x02,
125         CTL_BE_BLOCK_LUN_WAITING        = 0x04,
126 } ctl_be_block_lun_flags;
127
128 typedef enum {
129         CTL_BE_BLOCK_NONE,
130         CTL_BE_BLOCK_DEV,
131         CTL_BE_BLOCK_FILE
132 } ctl_be_block_type;
133
134 struct ctl_be_block_filedata {
135         struct ucred *cred;
136 };
137
138 union ctl_be_block_bedata {
139         struct ctl_be_block_filedata file;
140 };
141
142 struct ctl_be_block_io;
143 struct ctl_be_block_lun;
144
145 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
146                                struct ctl_be_block_io *beio);
147 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
148                                   const char *attrname);
149
150 /*
151  * Backend LUN structure.  There is a 1:1 mapping between a block device
152  * and a backend block LUN, and between a backend block LUN and a CTL LUN.
153  */
154 struct ctl_be_block_lun {
155         struct ctl_lun_create_params params;
156         char lunname[32];
157         char *dev_path;
158         ctl_be_block_type dev_type;
159         struct vnode *vn;
160         union ctl_be_block_bedata backend;
161         cbb_dispatch_t dispatch;
162         cbb_dispatch_t lun_flush;
163         cbb_dispatch_t unmap;
164         cbb_dispatch_t get_lba_status;
165         cbb_getattr_t getattr;
166         uma_zone_t lun_zone;
167         uint64_t size_blocks;
168         uint64_t size_bytes;
169         struct ctl_be_block_softc *softc;
170         struct devstat *disk_stats;
171         ctl_be_block_lun_flags flags;
172         STAILQ_ENTRY(ctl_be_block_lun) links;
173         struct ctl_be_lun cbe_lun;
174         struct taskqueue *io_taskqueue;
175         struct task io_task;
176         int num_threads;
177         STAILQ_HEAD(, ctl_io_hdr) input_queue;
178         STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
179         STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
180         STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
181         struct mtx_padalign io_lock;
182         struct mtx_padalign queue_lock;
183 };
184
185 /*
186  * Overall softc structure for the block backend module.
187  */
188 struct ctl_be_block_softc {
189         struct mtx                       lock;
190         uma_zone_t                       beio_zone;
191         int                              num_luns;
192         STAILQ_HEAD(, ctl_be_block_lun)  lun_list;
193 };
194
195 static struct ctl_be_block_softc backend_block_softc;
196
197 /*
198  * Per-I/O information.
199  */
200 struct ctl_be_block_io {
201         union ctl_io                    *io;
202         struct ctl_sg_entry             sg_segs[CTLBLK_MAX_SEGS];
203         struct iovec                    xiovecs[CTLBLK_MAX_SEGS];
204         int                             bio_cmd;
205         int                             num_segs;
206         int                             num_bios_sent;
207         int                             num_bios_done;
208         int                             send_complete;
209         int                             first_error;
210         uint64_t                        first_error_offset;
211         struct bintime                  ds_t0;
212         devstat_tag_type                ds_tag_type;
213         devstat_trans_flags             ds_trans_type;
214         uint64_t                        io_len;
215         uint64_t                        io_offset;
216         int                             io_arg;
217         struct ctl_be_block_softc       *softc;
218         struct ctl_be_block_lun         *lun;
219         void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
220 };
221
222 extern struct ctl_softc *control_softc;
223
224 static int cbb_num_threads = 14;
225 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
226             "CAM Target Layer Block Backend");
227 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
228            &cbb_num_threads, 0, "Number of threads per backing file");
229
230 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
231 static void ctl_free_beio(struct ctl_be_block_io *beio);
232 static void ctl_complete_beio(struct ctl_be_block_io *beio);
233 static int ctl_be_block_move_done(union ctl_io *io);
234 static void ctl_be_block_biodone(struct bio *bio);
235 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
236                                     struct ctl_be_block_io *beio);
237 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
238                                        struct ctl_be_block_io *beio);
239 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
240                                   struct ctl_be_block_io *beio);
241 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
242                                          const char *attrname);
243 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
244                                    struct ctl_be_block_io *beio);
245 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
246                                    struct ctl_be_block_io *beio);
247 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
248                                       struct ctl_be_block_io *beio);
249 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
250                                          const char *attrname);
251 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
252                                     union ctl_io *io);
253 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
254                                     union ctl_io *io);
255 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
256                                   union ctl_io *io);
257 static void ctl_be_block_worker(void *context, int pending);
258 static int ctl_be_block_submit(union ctl_io *io);
259 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
260                                    int flag, struct thread *td);
261 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
262                                   struct ctl_lun_req *req);
263 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
264                                  struct ctl_lun_req *req);
265 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
266 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
267                              struct ctl_lun_req *req);
268 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
269                                struct ctl_lun_req *req);
270 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
271                            struct ctl_lun_req *req);
272 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
273                            struct ctl_lun_req *req);
274 static void ctl_be_block_lun_shutdown(void *be_lun);
275 static void ctl_be_block_lun_config_status(void *be_lun,
276                                            ctl_lun_config_status status);
277 static int ctl_be_block_config_write(union ctl_io *io);
278 static int ctl_be_block_config_read(union ctl_io *io);
279 static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
280 static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname);
281 static int ctl_be_block_init(void);
282 static int ctl_be_block_shutdown(void);
283
284 static struct ctl_backend_driver ctl_be_block_driver = 
285 {
286         .name = "block",
287         .flags = CTL_BE_FLAG_HAS_CONFIG,
288         .init = ctl_be_block_init,
289         .shutdown = ctl_be_block_shutdown,
290         .data_submit = ctl_be_block_submit,
291         .data_move_done = ctl_be_block_move_done,
292         .config_read = ctl_be_block_config_read,
293         .config_write = ctl_be_block_config_write,
294         .ioctl = ctl_be_block_ioctl,
295         .lun_info = ctl_be_block_lun_info,
296         .lun_attr = ctl_be_block_lun_attr
297 };
298
299 MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
300 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
301
302 static struct ctl_be_block_io *
303 ctl_alloc_beio(struct ctl_be_block_softc *softc)
304 {
305         struct ctl_be_block_io *beio;
306
307         beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO);
308         beio->softc = softc;
309         return (beio);
310 }
311
312 static void
313 ctl_free_beio(struct ctl_be_block_io *beio)
314 {
315         int duplicate_free;
316         int i;
317
318         duplicate_free = 0;
319
320         for (i = 0; i < beio->num_segs; i++) {
321                 if (beio->sg_segs[i].addr == NULL)
322                         duplicate_free++;
323
324                 uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
325                 beio->sg_segs[i].addr = NULL;
326
327                 /* For compare we had two equal S/G lists. */
328                 if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
329                         uma_zfree(beio->lun->lun_zone,
330                             beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
331                         beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
332                 }
333         }
334
335         if (duplicate_free > 0) {
336                 printf("%s: %d duplicate frees out of %d segments\n", __func__,
337                        duplicate_free, beio->num_segs);
338         }
339
340         uma_zfree(beio->softc->beio_zone, beio);
341 }
342
343 static void
344 ctl_complete_beio(struct ctl_be_block_io *beio)
345 {
346         union ctl_io *io = beio->io;
347
348         if (beio->beio_cont != NULL) {
349                 beio->beio_cont(beio);
350         } else {
351                 ctl_free_beio(beio);
352                 ctl_data_submit_done(io);
353         }
354 }
355
356 static size_t
357 cmp(uint8_t *a, uint8_t *b, size_t size)
358 {
359         size_t i;
360
361         for (i = 0; i < size; i++) {
362                 if (a[i] != b[i])
363                         break;
364         }
365         return (i);
366 }
367
368 static void
369 ctl_be_block_compare(union ctl_io *io)
370 {
371         struct ctl_be_block_io *beio;
372         uint64_t off, res;
373         int i;
374         uint8_t info[8];
375
376         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
377         off = 0;
378         for (i = 0; i < beio->num_segs; i++) {
379                 res = cmp(beio->sg_segs[i].addr,
380                     beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
381                     beio->sg_segs[i].len);
382                 off += res;
383                 if (res < beio->sg_segs[i].len)
384                         break;
385         }
386         if (i < beio->num_segs) {
387                 scsi_u64to8b(off, info);
388                 ctl_set_sense(&io->scsiio, /*current_error*/ 1,
389                     /*sense_key*/ SSD_KEY_MISCOMPARE,
390                     /*asc*/ 0x1D, /*ascq*/ 0x00,
391                     /*type*/ SSD_ELEM_INFO,
392                     /*size*/ sizeof(info), /*data*/ &info,
393                     /*type*/ SSD_ELEM_NONE);
394         } else
395                 ctl_set_success(&io->scsiio);
396 }
397
398 static int
399 ctl_be_block_move_done(union ctl_io *io)
400 {
401         struct ctl_be_block_io *beio;
402         struct ctl_be_block_lun *be_lun;
403         struct ctl_lba_len_flags *lbalen;
404 #ifdef CTL_TIME_IO
405         struct bintime cur_bt;
406 #endif
407
408         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
409         be_lun = beio->lun;
410
411         DPRINTF("entered\n");
412
413 #ifdef CTL_TIME_IO
414         getbinuptime(&cur_bt);
415         bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
416         bintime_add(&io->io_hdr.dma_bt, &cur_bt);
417 #endif
418         io->io_hdr.num_dmas++;
419         io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
420
421         /*
422          * We set status at this point for read commands, and write
423          * commands with errors.
424          */
425         if (io->io_hdr.flags & CTL_FLAG_ABORT) {
426                 ;
427         } else if ((io->io_hdr.port_status != 0) &&
428             ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
429              (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
430                 ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1,
431                     /*retry_count*/ io->io_hdr.port_status);
432         } else if (io->scsiio.kern_data_resid != 0 &&
433             (io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT &&
434             ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
435              (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
436                 ctl_set_invalid_field_ciu(&io->scsiio);
437         } else if ((io->io_hdr.port_status == 0) &&
438             ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
439                 lbalen = ARGS(beio->io);
440                 if (lbalen->flags & CTL_LLF_READ) {
441                         ctl_set_success(&io->scsiio);
442                 } else if (lbalen->flags & CTL_LLF_COMPARE) {
443                         /* We have two data blocks ready for comparison. */
444                         ctl_be_block_compare(io);
445                 }
446         }
447
448         /*
449          * If this is a read, or a write with errors, it is done.
450          */
451         if ((beio->bio_cmd == BIO_READ)
452          || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
453          || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
454                 ctl_complete_beio(beio);
455                 return (0);
456         }
457
458         /*
459          * At this point, we have a write and the DMA completed
460          * successfully.  We now have to queue it to the task queue to
461          * execute the backend I/O.  That is because we do blocking
462          * memory allocations, and in the file backing case, blocking I/O.
463          * This move done routine is generally called in the SIM's
464          * interrupt context, and therefore we cannot block.
465          */
466         mtx_lock(&be_lun->queue_lock);
467         STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
468         mtx_unlock(&be_lun->queue_lock);
469         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
470
471         return (0);
472 }
473
474 static void
475 ctl_be_block_biodone(struct bio *bio)
476 {
477         struct ctl_be_block_io *beio;
478         struct ctl_be_block_lun *be_lun;
479         union ctl_io *io;
480         int error;
481
482         beio = bio->bio_caller1;
483         be_lun = beio->lun;
484         io = beio->io;
485
486         DPRINTF("entered\n");
487
488         error = bio->bio_error;
489         mtx_lock(&be_lun->io_lock);
490         if (error != 0 &&
491             (beio->first_error == 0 ||
492              bio->bio_offset < beio->first_error_offset)) {
493                 beio->first_error = error;
494                 beio->first_error_offset = bio->bio_offset;
495         }
496
497         beio->num_bios_done++;
498
499         /*
500          * XXX KDM will this cause WITNESS to complain?  Holding a lock
501          * during the free might cause it to complain.
502          */
503         g_destroy_bio(bio);
504
505         /*
506          * If the send complete bit isn't set, or we aren't the last I/O to
507          * complete, then we're done.
508          */
509         if ((beio->send_complete == 0)
510          || (beio->num_bios_done < beio->num_bios_sent)) {
511                 mtx_unlock(&be_lun->io_lock);
512                 return;
513         }
514
515         /*
516          * At this point, we've verified that we are the last I/O to
517          * complete, so it's safe to drop the lock.
518          */
519         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
520             beio->ds_tag_type, beio->ds_trans_type,
521             /*now*/ NULL, /*then*/&beio->ds_t0);
522         mtx_unlock(&be_lun->io_lock);
523
524         /*
525          * If there are any errors from the backing device, we fail the
526          * entire I/O with a medium error.
527          */
528         error = beio->first_error;
529         if (error != 0) {
530                 if (error == EOPNOTSUPP) {
531                         ctl_set_invalid_opcode(&io->scsiio);
532                 } else if (error == ENOSPC || error == EDQUOT) {
533                         ctl_set_space_alloc_fail(&io->scsiio);
534                 } else if (error == EROFS || error == EACCES) {
535                         ctl_set_hw_write_protected(&io->scsiio);
536                 } else if (beio->bio_cmd == BIO_FLUSH) {
537                         /* XXX KDM is there is a better error here? */
538                         ctl_set_internal_failure(&io->scsiio,
539                                                  /*sks_valid*/ 1,
540                                                  /*retry_count*/ 0xbad2);
541                 } else {
542                         ctl_set_medium_error(&io->scsiio,
543                             beio->bio_cmd == BIO_READ);
544                 }
545                 ctl_complete_beio(beio);
546                 return;
547         }
548
549         /*
550          * If this is a write, a flush, a delete or verify, we're all done.
551          * If this is a read, we can now send the data to the user.
552          */
553         if ((beio->bio_cmd == BIO_WRITE)
554          || (beio->bio_cmd == BIO_FLUSH)
555          || (beio->bio_cmd == BIO_DELETE)
556          || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
557                 ctl_set_success(&io->scsiio);
558                 ctl_complete_beio(beio);
559         } else {
560                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
561                     beio->beio_cont == NULL) {
562                         ctl_set_success(&io->scsiio);
563                         ctl_serseq_done(io);
564                 }
565 #ifdef CTL_TIME_IO
566                 getbinuptime(&io->io_hdr.dma_start_bt);
567 #endif
568                 ctl_datamove(io);
569         }
570 }
571
572 static void
573 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
574                         struct ctl_be_block_io *beio)
575 {
576         union ctl_io *io = beio->io;
577         struct mount *mountpoint;
578         int error, lock_flags;
579
580         DPRINTF("entered\n");
581
582         binuptime(&beio->ds_t0);
583         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
584
585         (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
586
587         if (MNT_SHARED_WRITES(mountpoint) ||
588             ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
589                 lock_flags = LK_SHARED;
590         else
591                 lock_flags = LK_EXCLUSIVE;
592         vn_lock(be_lun->vn, lock_flags | LK_RETRY);
593         error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
594             curthread);
595         VOP_UNLOCK(be_lun->vn);
596
597         vn_finished_write(mountpoint);
598
599         mtx_lock(&be_lun->io_lock);
600         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
601             beio->ds_tag_type, beio->ds_trans_type,
602             /*now*/ NULL, /*then*/&beio->ds_t0);
603         mtx_unlock(&be_lun->io_lock);
604
605         if (error == 0)
606                 ctl_set_success(&io->scsiio);
607         else {
608                 /* XXX KDM is there is a better error here? */
609                 ctl_set_internal_failure(&io->scsiio,
610                                          /*sks_valid*/ 1,
611                                          /*retry_count*/ 0xbad1);
612         }
613
614         ctl_complete_beio(beio);
615 }
616
617 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
618 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
619 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
620 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
621
622 static void
623 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
624                            struct ctl_be_block_io *beio)
625 {
626         struct ctl_be_block_filedata *file_data;
627         union ctl_io *io;
628         struct uio xuio;
629         struct iovec *xiovec;
630         size_t s;
631         int error, flags, i;
632
633         DPRINTF("entered\n");
634
635         file_data = &be_lun->backend.file;
636         io = beio->io;
637         flags = 0;
638         if (ARGS(io)->flags & CTL_LLF_DPO)
639                 flags |= IO_DIRECT;
640         if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
641                 flags |= IO_SYNC;
642
643         bzero(&xuio, sizeof(xuio));
644         if (beio->bio_cmd == BIO_READ) {
645                 SDT_PROBE0(cbb, , read, file_start);
646                 xuio.uio_rw = UIO_READ;
647         } else {
648                 SDT_PROBE0(cbb, , write, file_start);
649                 xuio.uio_rw = UIO_WRITE;
650         }
651         xuio.uio_offset = beio->io_offset;
652         xuio.uio_resid = beio->io_len;
653         xuio.uio_segflg = UIO_SYSSPACE;
654         xuio.uio_iov = beio->xiovecs;
655         xuio.uio_iovcnt = beio->num_segs;
656         xuio.uio_td = curthread;
657
658         for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
659                 xiovec->iov_base = beio->sg_segs[i].addr;
660                 xiovec->iov_len = beio->sg_segs[i].len;
661         }
662
663         binuptime(&beio->ds_t0);
664         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
665
666         if (beio->bio_cmd == BIO_READ) {
667                 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
668
669                 /*
670                  * UFS pays attention to IO_DIRECT for reads.  If the
671                  * DIRECTIO option is configured into the kernel, it calls
672                  * ffs_rawread().  But that only works for single-segment
673                  * uios with user space addresses.  In our case, with a
674                  * kernel uio, it still reads into the buffer cache, but it
675                  * will just try to release the buffer from the cache later
676                  * on in ffs_read().
677                  *
678                  * ZFS does not pay attention to IO_DIRECT for reads.
679                  *
680                  * UFS does not pay attention to IO_SYNC for reads.
681                  *
682                  * ZFS pays attention to IO_SYNC (which translates into the
683                  * Solaris define FRSYNC for zfs_read()) for reads.  It
684                  * attempts to sync the file before reading.
685                  */
686                 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
687
688                 VOP_UNLOCK(be_lun->vn);
689                 SDT_PROBE0(cbb, , read, file_done);
690                 if (error == 0 && xuio.uio_resid > 0) {
691                         /*
692                          * If we red less then requested (EOF), then
693                          * we should clean the rest of the buffer.
694                          */
695                         s = beio->io_len - xuio.uio_resid;
696                         for (i = 0; i < beio->num_segs; i++) {
697                                 if (s >= beio->sg_segs[i].len) {
698                                         s -= beio->sg_segs[i].len;
699                                         continue;
700                                 }
701                                 bzero((uint8_t *)beio->sg_segs[i].addr + s,
702                                     beio->sg_segs[i].len - s);
703                                 s = 0;
704                         }
705                 }
706         } else {
707                 struct mount *mountpoint;
708                 int lock_flags;
709
710                 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
711
712                 if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL)
713                   && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
714                         lock_flags = LK_SHARED;
715                 else
716                         lock_flags = LK_EXCLUSIVE;
717                 vn_lock(be_lun->vn, lock_flags | LK_RETRY);
718
719                 /*
720                  * UFS pays attention to IO_DIRECT for writes.  The write
721                  * is done asynchronously.  (Normally the write would just
722                  * get put into cache.
723                  *
724                  * UFS pays attention to IO_SYNC for writes.  It will
725                  * attempt to write the buffer out synchronously if that
726                  * flag is set.
727                  *
728                  * ZFS does not pay attention to IO_DIRECT for writes.
729                  *
730                  * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
731                  * for writes.  It will flush the transaction from the
732                  * cache before returning.
733                  */
734                 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
735                 VOP_UNLOCK(be_lun->vn);
736
737                 vn_finished_write(mountpoint);
738                 SDT_PROBE0(cbb, , write, file_done);
739         }
740
741         mtx_lock(&be_lun->io_lock);
742         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
743             beio->ds_tag_type, beio->ds_trans_type,
744             /*now*/ NULL, /*then*/&beio->ds_t0);
745         mtx_unlock(&be_lun->io_lock);
746
747         /*
748          * If we got an error, set the sense data to "MEDIUM ERROR" and
749          * return the I/O to the user.
750          */
751         if (error != 0) {
752                 if (error == ENOSPC || error == EDQUOT) {
753                         ctl_set_space_alloc_fail(&io->scsiio);
754                 } else if (error == EROFS || error == EACCES) {
755                         ctl_set_hw_write_protected(&io->scsiio);
756                 } else {
757                         ctl_set_medium_error(&io->scsiio,
758                             beio->bio_cmd == BIO_READ);
759                 }
760                 ctl_complete_beio(beio);
761                 return;
762         }
763
764         /*
765          * If this is a write or a verify, we're all done.
766          * If this is a read, we can now send the data to the user.
767          */
768         if ((beio->bio_cmd == BIO_WRITE) ||
769             (ARGS(io)->flags & CTL_LLF_VERIFY)) {
770                 ctl_set_success(&io->scsiio);
771                 ctl_complete_beio(beio);
772         } else {
773                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
774                     beio->beio_cont == NULL) {
775                         ctl_set_success(&io->scsiio);
776                         ctl_serseq_done(io);
777                 }
778 #ifdef CTL_TIME_IO
779                 getbinuptime(&io->io_hdr.dma_start_bt);
780 #endif
781                 ctl_datamove(io);
782         }
783 }
784
785 static void
786 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
787                         struct ctl_be_block_io *beio)
788 {
789         union ctl_io *io = beio->io;
790         struct ctl_lba_len_flags *lbalen = ARGS(io);
791         struct scsi_get_lba_status_data *data;
792         off_t roff, off;
793         int error, status;
794
795         DPRINTF("entered\n");
796
797         off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
798         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
799         error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
800             0, curthread->td_ucred, curthread);
801         if (error == 0 && off > roff)
802                 status = 0;     /* mapped up to off */
803         else {
804                 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
805                     0, curthread->td_ucred, curthread);
806                 if (error == 0 && off > roff)
807                         status = 1;     /* deallocated up to off */
808                 else {
809                         status = 0;     /* unknown up to the end */
810                         off = be_lun->size_bytes;
811                 }
812         }
813         VOP_UNLOCK(be_lun->vn);
814
815         data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
816         scsi_u64to8b(lbalen->lba, data->descr[0].addr);
817         scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
818             lbalen->lba), data->descr[0].length);
819         data->descr[0].status = status;
820
821         ctl_complete_beio(beio);
822 }
823
824 static uint64_t
825 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
826 {
827         struct vattr            vattr;
828         struct statfs           statfs;
829         uint64_t                val;
830         int                     error;
831
832         val = UINT64_MAX;
833         if (be_lun->vn == NULL)
834                 return (val);
835         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
836         if (strcmp(attrname, "blocksused") == 0) {
837                 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
838                 if (error == 0)
839                         val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
840         }
841         if (strcmp(attrname, "blocksavail") == 0 &&
842             !VN_IS_DOOMED(be_lun->vn)) {
843                 error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
844                 if (error == 0)
845                         val = statfs.f_bavail * statfs.f_bsize /
846                             be_lun->cbe_lun.blocksize;
847         }
848         VOP_UNLOCK(be_lun->vn);
849         return (val);
850 }
851
852 static void
853 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
854                            struct ctl_be_block_io *beio)
855 {
856         union ctl_io *io;
857         struct cdevsw *csw;
858         struct cdev *dev;
859         struct uio xuio;
860         struct iovec *xiovec;
861         int error, flags, i, ref;
862
863         DPRINTF("entered\n");
864
865         io = beio->io;
866         flags = 0;
867         if (ARGS(io)->flags & CTL_LLF_DPO)
868                 flags |= IO_DIRECT;
869         if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
870                 flags |= IO_SYNC;
871
872         bzero(&xuio, sizeof(xuio));
873         if (beio->bio_cmd == BIO_READ) {
874                 SDT_PROBE0(cbb, , read, file_start);
875                 xuio.uio_rw = UIO_READ;
876         } else {
877                 SDT_PROBE0(cbb, , write, file_start);
878                 xuio.uio_rw = UIO_WRITE;
879         }
880         xuio.uio_offset = beio->io_offset;
881         xuio.uio_resid = beio->io_len;
882         xuio.uio_segflg = UIO_SYSSPACE;
883         xuio.uio_iov = beio->xiovecs;
884         xuio.uio_iovcnt = beio->num_segs;
885         xuio.uio_td = curthread;
886
887         for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
888                 xiovec->iov_base = beio->sg_segs[i].addr;
889                 xiovec->iov_len = beio->sg_segs[i].len;
890         }
891
892         binuptime(&beio->ds_t0);
893         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
894
895         csw = devvn_refthread(be_lun->vn, &dev, &ref);
896         if (csw) {
897                 if (beio->bio_cmd == BIO_READ)
898                         error = csw->d_read(dev, &xuio, flags);
899                 else
900                         error = csw->d_write(dev, &xuio, flags);
901                 dev_relthread(dev, ref);
902         } else
903                 error = ENXIO;
904
905         if (beio->bio_cmd == BIO_READ)
906                 SDT_PROBE0(cbb, , read, file_done);
907         else
908                 SDT_PROBE0(cbb, , write, file_done);
909
910         mtx_lock(&be_lun->io_lock);
911         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
912             beio->ds_tag_type, beio->ds_trans_type,
913             /*now*/ NULL, /*then*/&beio->ds_t0);
914         mtx_unlock(&be_lun->io_lock);
915
916         /*
917          * If we got an error, set the sense data to "MEDIUM ERROR" and
918          * return the I/O to the user.
919          */
920         if (error != 0) {
921                 if (error == ENOSPC || error == EDQUOT) {
922                         ctl_set_space_alloc_fail(&io->scsiio);
923                 } else if (error == EROFS || error == EACCES) {
924                         ctl_set_hw_write_protected(&io->scsiio);
925                 } else {
926                         ctl_set_medium_error(&io->scsiio,
927                             beio->bio_cmd == BIO_READ);
928                 }
929                 ctl_complete_beio(beio);
930                 return;
931         }
932
933         /*
934          * If this is a write or a verify, we're all done.
935          * If this is a read, we can now send the data to the user.
936          */
937         if ((beio->bio_cmd == BIO_WRITE) ||
938             (ARGS(io)->flags & CTL_LLF_VERIFY)) {
939                 ctl_set_success(&io->scsiio);
940                 ctl_complete_beio(beio);
941         } else {
942                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
943                     beio->beio_cont == NULL) {
944                         ctl_set_success(&io->scsiio);
945                         ctl_serseq_done(io);
946                 }
947 #ifdef CTL_TIME_IO
948                 getbinuptime(&io->io_hdr.dma_start_bt);
949 #endif
950                 ctl_datamove(io);
951         }
952 }
953
954 static void
955 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
956                         struct ctl_be_block_io *beio)
957 {
958         union ctl_io *io = beio->io;
959         struct cdevsw *csw;
960         struct cdev *dev;
961         struct ctl_lba_len_flags *lbalen = ARGS(io);
962         struct scsi_get_lba_status_data *data;
963         off_t roff, off;
964         int error, ref, status;
965
966         DPRINTF("entered\n");
967
968         csw = devvn_refthread(be_lun->vn, &dev, &ref);
969         if (csw == NULL) {
970                 status = 0;     /* unknown up to the end */
971                 off = be_lun->size_bytes;
972                 goto done;
973         }
974         off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
975         error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
976             curthread);
977         if (error == 0 && off > roff)
978                 status = 0;     /* mapped up to off */
979         else {
980                 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
981                     curthread);
982                 if (error == 0 && off > roff)
983                         status = 1;     /* deallocated up to off */
984                 else {
985                         status = 0;     /* unknown up to the end */
986                         off = be_lun->size_bytes;
987                 }
988         }
989         dev_relthread(dev, ref);
990
991 done:
992         data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
993         scsi_u64to8b(lbalen->lba, data->descr[0].addr);
994         scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
995             lbalen->lba), data->descr[0].length);
996         data->descr[0].status = status;
997
998         ctl_complete_beio(beio);
999 }
1000
1001 static void
1002 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
1003                        struct ctl_be_block_io *beio)
1004 {
1005         struct bio *bio;
1006         struct cdevsw *csw;
1007         struct cdev *dev;
1008         int ref;
1009
1010         DPRINTF("entered\n");
1011
1012         /* This can't fail, it's a blocking allocation. */
1013         bio = g_alloc_bio();
1014
1015         bio->bio_cmd        = BIO_FLUSH;
1016         bio->bio_offset     = 0;
1017         bio->bio_data       = 0;
1018         bio->bio_done       = ctl_be_block_biodone;
1019         bio->bio_caller1    = beio;
1020         bio->bio_pblkno     = 0;
1021
1022         /*
1023          * We don't need to acquire the LUN lock here, because we are only
1024          * sending one bio, and so there is no other context to synchronize
1025          * with.
1026          */
1027         beio->num_bios_sent = 1;
1028         beio->send_complete = 1;
1029
1030         binuptime(&beio->ds_t0);
1031         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1032
1033         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1034         if (csw) {
1035                 bio->bio_dev = dev;
1036                 csw->d_strategy(bio);
1037                 dev_relthread(dev, ref);
1038         } else {
1039                 bio->bio_error = ENXIO;
1040                 ctl_be_block_biodone(bio);
1041         }
1042 }
1043
1044 static void
1045 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1046                        struct ctl_be_block_io *beio,
1047                        uint64_t off, uint64_t len, int last)
1048 {
1049         struct bio *bio;
1050         uint64_t maxlen;
1051         struct cdevsw *csw;
1052         struct cdev *dev;
1053         int ref;
1054
1055         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1056         maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1057         while (len > 0) {
1058                 bio = g_alloc_bio();
1059                 bio->bio_cmd        = BIO_DELETE;
1060                 bio->bio_dev        = dev;
1061                 bio->bio_offset     = off;
1062                 bio->bio_length     = MIN(len, maxlen);
1063                 bio->bio_data       = 0;
1064                 bio->bio_done       = ctl_be_block_biodone;
1065                 bio->bio_caller1    = beio;
1066                 bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1067
1068                 off += bio->bio_length;
1069                 len -= bio->bio_length;
1070
1071                 mtx_lock(&be_lun->io_lock);
1072                 beio->num_bios_sent++;
1073                 if (last && len == 0)
1074                         beio->send_complete = 1;
1075                 mtx_unlock(&be_lun->io_lock);
1076
1077                 if (csw) {
1078                         csw->d_strategy(bio);
1079                 } else {
1080                         bio->bio_error = ENXIO;
1081                         ctl_be_block_biodone(bio);
1082                 }
1083         }
1084         if (csw)
1085                 dev_relthread(dev, ref);
1086 }
1087
1088 static void
1089 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1090                        struct ctl_be_block_io *beio)
1091 {
1092         union ctl_io *io;
1093         struct ctl_ptr_len_flags *ptrlen;
1094         struct scsi_unmap_desc *buf, *end;
1095         uint64_t len;
1096
1097         io = beio->io;
1098
1099         DPRINTF("entered\n");
1100
1101         binuptime(&beio->ds_t0);
1102         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1103
1104         if (beio->io_offset == -1) {
1105                 beio->io_len = 0;
1106                 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1107                 buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1108                 end = buf + ptrlen->len / sizeof(*buf);
1109                 for (; buf < end; buf++) {
1110                         len = (uint64_t)scsi_4btoul(buf->length) *
1111                             be_lun->cbe_lun.blocksize;
1112                         beio->io_len += len;
1113                         ctl_be_block_unmap_dev_range(be_lun, beio,
1114                             scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1115                             len, (end - buf < 2) ? TRUE : FALSE);
1116                 }
1117         } else
1118                 ctl_be_block_unmap_dev_range(be_lun, beio,
1119                     beio->io_offset, beio->io_len, TRUE);
1120 }
1121
1122 static void
1123 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1124                           struct ctl_be_block_io *beio)
1125 {
1126         TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1127         struct bio *bio;
1128         struct cdevsw *csw;
1129         struct cdev *dev;
1130         off_t cur_offset;
1131         int i, max_iosize, ref;
1132
1133         DPRINTF("entered\n");
1134         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1135
1136         /*
1137          * We have to limit our I/O size to the maximum supported by the
1138          * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
1139          * set it properly, use DFLTPHYS.
1140          */
1141         if (csw) {
1142                 max_iosize = dev->si_iosize_max;
1143                 if (max_iosize < PAGE_SIZE)
1144                         max_iosize = DFLTPHYS;
1145         } else
1146                 max_iosize = DFLTPHYS;
1147
1148         cur_offset = beio->io_offset;
1149         for (i = 0; i < beio->num_segs; i++) {
1150                 size_t cur_size;
1151                 uint8_t *cur_ptr;
1152
1153                 cur_size = beio->sg_segs[i].len;
1154                 cur_ptr = beio->sg_segs[i].addr;
1155
1156                 while (cur_size > 0) {
1157                         /* This can't fail, it's a blocking allocation. */
1158                         bio = g_alloc_bio();
1159
1160                         KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1161
1162                         bio->bio_cmd = beio->bio_cmd;
1163                         bio->bio_dev = dev;
1164                         bio->bio_caller1 = beio;
1165                         bio->bio_length = min(cur_size, max_iosize);
1166                         bio->bio_offset = cur_offset;
1167                         bio->bio_data = cur_ptr;
1168                         bio->bio_done = ctl_be_block_biodone;
1169                         bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1170
1171                         cur_offset += bio->bio_length;
1172                         cur_ptr += bio->bio_length;
1173                         cur_size -= bio->bio_length;
1174
1175                         TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1176                         beio->num_bios_sent++;
1177                 }
1178         }
1179         beio->send_complete = 1;
1180         binuptime(&beio->ds_t0);
1181         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1182
1183         /*
1184          * Fire off all allocated requests!
1185          */
1186         while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1187                 TAILQ_REMOVE(&queue, bio, bio_queue);
1188                 if (csw)
1189                         csw->d_strategy(bio);
1190                 else {
1191                         bio->bio_error = ENXIO;
1192                         ctl_be_block_biodone(bio);
1193                 }
1194         }
1195         if (csw)
1196                 dev_relthread(dev, ref);
1197 }
1198
1199 static uint64_t
1200 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1201 {
1202         struct diocgattr_arg    arg;
1203         struct cdevsw *csw;
1204         struct cdev *dev;
1205         int error, ref;
1206
1207         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1208         if (csw == NULL)
1209                 return (UINT64_MAX);
1210         strlcpy(arg.name, attrname, sizeof(arg.name));
1211         arg.len = sizeof(arg.value.off);
1212         if (csw->d_ioctl) {
1213                 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1214                     curthread);
1215         } else
1216                 error = ENODEV;
1217         dev_relthread(dev, ref);
1218         if (error != 0)
1219                 return (UINT64_MAX);
1220         return (arg.value.off);
1221 }
1222
1223 static void
1224 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1225                             union ctl_io *io)
1226 {
1227         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1228         struct ctl_be_block_io *beio;
1229         struct ctl_lba_len_flags *lbalen;
1230
1231         DPRINTF("entered\n");
1232         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1233         lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1234
1235         beio->io_len = lbalen->len * cbe_lun->blocksize;
1236         beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1237         beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1238         beio->bio_cmd = BIO_FLUSH;
1239         beio->ds_trans_type = DEVSTAT_NO_DATA;
1240         DPRINTF("SYNC\n");
1241         be_lun->lun_flush(be_lun, beio);
1242 }
1243
1244 static void
1245 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1246 {
1247         union ctl_io *io;
1248
1249         io = beio->io;
1250         ctl_free_beio(beio);
1251         if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1252             ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1253              (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1254                 ctl_config_write_done(io);
1255                 return;
1256         }
1257
1258         ctl_be_block_config_write(io);
1259 }
1260
1261 static void
1262 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1263                             union ctl_io *io)
1264 {
1265         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1266         struct ctl_be_block_io *beio;
1267         struct ctl_lba_len_flags *lbalen;
1268         uint64_t len_left, lba;
1269         uint32_t pb, pbo, adj;
1270         int i, seglen;
1271         uint8_t *buf, *end;
1272
1273         DPRINTF("entered\n");
1274
1275         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1276         lbalen = ARGS(beio->io);
1277
1278         if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1279             (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1280                 ctl_free_beio(beio);
1281                 ctl_set_invalid_field(&io->scsiio,
1282                                       /*sks_valid*/ 1,
1283                                       /*command*/ 1,
1284                                       /*field*/ 1,
1285                                       /*bit_valid*/ 0,
1286                                       /*bit*/ 0);
1287                 ctl_config_write_done(io);
1288                 return;
1289         }
1290
1291         if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1292                 beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1293                 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1294                 beio->bio_cmd = BIO_DELETE;
1295                 beio->ds_trans_type = DEVSTAT_FREE;
1296
1297                 be_lun->unmap(be_lun, beio);
1298                 return;
1299         }
1300
1301         beio->bio_cmd = BIO_WRITE;
1302         beio->ds_trans_type = DEVSTAT_WRITE;
1303
1304         DPRINTF("WRITE SAME at LBA %jx len %u\n",
1305                (uintmax_t)lbalen->lba, lbalen->len);
1306
1307         pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1308         if (be_lun->cbe_lun.pblockoff > 0)
1309                 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1310         else
1311                 pbo = 0;
1312         len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1313         for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1314
1315                 /*
1316                  * Setup the S/G entry for this chunk.
1317                  */
1318                 seglen = MIN(CTLBLK_MAX_SEG, len_left);
1319                 if (pb > cbe_lun->blocksize) {
1320                         adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1321                             seglen - pbo) % pb;
1322                         if (seglen > adj)
1323                                 seglen -= adj;
1324                         else
1325                                 seglen -= seglen % cbe_lun->blocksize;
1326                 } else
1327                         seglen -= seglen % cbe_lun->blocksize;
1328                 beio->sg_segs[i].len = seglen;
1329                 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1330
1331                 DPRINTF("segment %d addr %p len %zd\n", i,
1332                         beio->sg_segs[i].addr, beio->sg_segs[i].len);
1333
1334                 beio->num_segs++;
1335                 len_left -= seglen;
1336
1337                 buf = beio->sg_segs[i].addr;
1338                 end = buf + seglen;
1339                 for (; buf < end; buf += cbe_lun->blocksize) {
1340                         if (lbalen->flags & SWS_NDOB) {
1341                                 memset(buf, 0, cbe_lun->blocksize);
1342                         } else {
1343                                 memcpy(buf, io->scsiio.kern_data_ptr,
1344                                     cbe_lun->blocksize);
1345                         }
1346                         if (lbalen->flags & SWS_LBDATA)
1347                                 scsi_ulto4b(lbalen->lba + lba, buf);
1348                         lba++;
1349                 }
1350         }
1351
1352         beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1353         beio->io_len = lba * cbe_lun->blocksize;
1354
1355         /* We can not do all in one run. Correct and schedule rerun. */
1356         if (len_left > 0) {
1357                 lbalen->lba += lba;
1358                 lbalen->len -= lba;
1359                 beio->beio_cont = ctl_be_block_cw_done_ws;
1360         }
1361
1362         be_lun->dispatch(be_lun, beio);
1363 }
1364
1365 static void
1366 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1367                             union ctl_io *io)
1368 {
1369         struct ctl_be_block_io *beio;
1370         struct ctl_ptr_len_flags *ptrlen;
1371
1372         DPRINTF("entered\n");
1373
1374         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1375         ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1376
1377         if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1378                 ctl_free_beio(beio);
1379                 ctl_set_invalid_field(&io->scsiio,
1380                                       /*sks_valid*/ 0,
1381                                       /*command*/ 1,
1382                                       /*field*/ 0,
1383                                       /*bit_valid*/ 0,
1384                                       /*bit*/ 0);
1385                 ctl_config_write_done(io);
1386                 return;
1387         }
1388
1389         beio->io_len = 0;
1390         beio->io_offset = -1;
1391         beio->bio_cmd = BIO_DELETE;
1392         beio->ds_trans_type = DEVSTAT_FREE;
1393         DPRINTF("UNMAP\n");
1394         be_lun->unmap(be_lun, beio);
1395 }
1396
1397 static void
1398 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1399 {
1400         union ctl_io *io;
1401
1402         io = beio->io;
1403         ctl_free_beio(beio);
1404         ctl_config_read_done(io);
1405 }
1406
1407 static void
1408 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1409                          union ctl_io *io)
1410 {
1411         struct ctl_be_block_io *beio;
1412         struct ctl_be_block_softc *softc;
1413
1414         DPRINTF("entered\n");
1415
1416         softc = be_lun->softc;
1417         beio = ctl_alloc_beio(softc);
1418         beio->io = io;
1419         beio->lun = be_lun;
1420         beio->beio_cont = ctl_be_block_cr_done;
1421         PRIV(io)->ptr = (void *)beio;
1422
1423         switch (io->scsiio.cdb[0]) {
1424         case SERVICE_ACTION_IN:         /* GET LBA STATUS */
1425                 beio->bio_cmd = -1;
1426                 beio->ds_trans_type = DEVSTAT_NO_DATA;
1427                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1428                 beio->io_len = 0;
1429                 if (be_lun->get_lba_status)
1430                         be_lun->get_lba_status(be_lun, beio);
1431                 else
1432                         ctl_be_block_cr_done(beio);
1433                 break;
1434         default:
1435                 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1436                 break;
1437         }
1438 }
1439
1440 static void
1441 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1442 {
1443         union ctl_io *io;
1444
1445         io = beio->io;
1446         ctl_free_beio(beio);
1447         ctl_config_write_done(io);
1448 }
1449
1450 static void
1451 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1452                          union ctl_io *io)
1453 {
1454         struct ctl_be_block_io *beio;
1455         struct ctl_be_block_softc *softc;
1456
1457         DPRINTF("entered\n");
1458
1459         softc = be_lun->softc;
1460         beio = ctl_alloc_beio(softc);
1461         beio->io = io;
1462         beio->lun = be_lun;
1463         beio->beio_cont = ctl_be_block_cw_done;
1464         switch (io->scsiio.tag_type) {
1465         case CTL_TAG_ORDERED:
1466                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1467                 break;
1468         case CTL_TAG_HEAD_OF_QUEUE:
1469                 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1470                 break;
1471         case CTL_TAG_UNTAGGED:
1472         case CTL_TAG_SIMPLE:
1473         case CTL_TAG_ACA:
1474         default:
1475                 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1476                 break;
1477         }
1478         PRIV(io)->ptr = (void *)beio;
1479
1480         switch (io->scsiio.cdb[0]) {
1481         case SYNCHRONIZE_CACHE:
1482         case SYNCHRONIZE_CACHE_16:
1483                 ctl_be_block_cw_dispatch_sync(be_lun, io);
1484                 break;
1485         case WRITE_SAME_10:
1486         case WRITE_SAME_16:
1487                 ctl_be_block_cw_dispatch_ws(be_lun, io);
1488                 break;
1489         case UNMAP:
1490                 ctl_be_block_cw_dispatch_unmap(be_lun, io);
1491                 break;
1492         default:
1493                 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1494                 break;
1495         }
1496 }
1497
1498 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
1499 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
1500 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
1501 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
1502
1503 static void
1504 ctl_be_block_next(struct ctl_be_block_io *beio)
1505 {
1506         struct ctl_be_block_lun *be_lun;
1507         union ctl_io *io;
1508
1509         io = beio->io;
1510         be_lun = beio->lun;
1511         ctl_free_beio(beio);
1512         if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1513             ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1514              (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1515                 ctl_data_submit_done(io);
1516                 return;
1517         }
1518
1519         io->io_hdr.status &= ~CTL_STATUS_MASK;
1520         io->io_hdr.status |= CTL_STATUS_NONE;
1521
1522         mtx_lock(&be_lun->queue_lock);
1523         STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1524         mtx_unlock(&be_lun->queue_lock);
1525         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1526 }
1527
1528 static void
1529 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1530                            union ctl_io *io)
1531 {
1532         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1533         struct ctl_be_block_io *beio;
1534         struct ctl_be_block_softc *softc;
1535         struct ctl_lba_len_flags *lbalen;
1536         struct ctl_ptr_len_flags *bptrlen;
1537         uint64_t len_left, lbas;
1538         int i;
1539
1540         softc = be_lun->softc;
1541
1542         DPRINTF("entered\n");
1543
1544         lbalen = ARGS(io);
1545         if (lbalen->flags & CTL_LLF_WRITE) {
1546                 SDT_PROBE0(cbb, , write, start);
1547         } else {
1548                 SDT_PROBE0(cbb, , read, start);
1549         }
1550
1551         beio = ctl_alloc_beio(softc);
1552         beio->io = io;
1553         beio->lun = be_lun;
1554         bptrlen = PRIV(io);
1555         bptrlen->ptr = (void *)beio;
1556
1557         switch (io->scsiio.tag_type) {
1558         case CTL_TAG_ORDERED:
1559                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1560                 break;
1561         case CTL_TAG_HEAD_OF_QUEUE:
1562                 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1563                 break;
1564         case CTL_TAG_UNTAGGED:
1565         case CTL_TAG_SIMPLE:
1566         case CTL_TAG_ACA:
1567         default:
1568                 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1569                 break;
1570         }
1571
1572         if (lbalen->flags & CTL_LLF_WRITE) {
1573                 beio->bio_cmd = BIO_WRITE;
1574                 beio->ds_trans_type = DEVSTAT_WRITE;
1575         } else {
1576                 beio->bio_cmd = BIO_READ;
1577                 beio->ds_trans_type = DEVSTAT_READ;
1578         }
1579
1580         DPRINTF("%s at LBA %jx len %u @%ju\n",
1581                (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1582                (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1583         if (lbalen->flags & CTL_LLF_COMPARE)
1584                 lbas = CTLBLK_HALF_IO_SIZE;
1585         else
1586                 lbas = CTLBLK_MAX_IO_SIZE;
1587         lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1588         beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1589         beio->io_len = lbas * cbe_lun->blocksize;
1590         bptrlen->len += lbas;
1591
1592         for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1593                 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1594                     i, CTLBLK_MAX_SEGS));
1595
1596                 /*
1597                  * Setup the S/G entry for this chunk.
1598                  */
1599                 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1600                 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1601
1602                 DPRINTF("segment %d addr %p len %zd\n", i,
1603                         beio->sg_segs[i].addr, beio->sg_segs[i].len);
1604
1605                 /* Set up second segment for compare operation. */
1606                 if (lbalen->flags & CTL_LLF_COMPARE) {
1607                         beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1608                             beio->sg_segs[i].len;
1609                         beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1610                             uma_zalloc(be_lun->lun_zone, M_WAITOK);
1611                 }
1612
1613                 beio->num_segs++;
1614                 len_left -= beio->sg_segs[i].len;
1615         }
1616         if (bptrlen->len < lbalen->len)
1617                 beio->beio_cont = ctl_be_block_next;
1618         io->scsiio.be_move_done = ctl_be_block_move_done;
1619         /* For compare we have separate S/G lists for read and datamove. */
1620         if (lbalen->flags & CTL_LLF_COMPARE)
1621                 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1622         else
1623                 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1624         io->scsiio.kern_data_len = beio->io_len;
1625         io->scsiio.kern_sg_entries = beio->num_segs;
1626         io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
1627
1628         /*
1629          * For the read case, we need to read the data into our buffers and
1630          * then we can send it back to the user.  For the write case, we
1631          * need to get the data from the user first.
1632          */
1633         if (beio->bio_cmd == BIO_READ) {
1634                 SDT_PROBE0(cbb, , read, alloc_done);
1635                 be_lun->dispatch(be_lun, beio);
1636         } else {
1637                 SDT_PROBE0(cbb, , write, alloc_done);
1638 #ifdef CTL_TIME_IO
1639                 getbinuptime(&io->io_hdr.dma_start_bt);
1640 #endif
1641                 ctl_datamove(io);
1642         }
1643 }
1644
1645 static void
1646 ctl_be_block_worker(void *context, int pending)
1647 {
1648         struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1649         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1650         union ctl_io *io;
1651         struct ctl_be_block_io *beio;
1652
1653         DPRINTF("entered\n");
1654         /*
1655          * Fetch and process I/Os from all queues.  If we detect LUN
1656          * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
1657          * so make response maximally opaque to not confuse initiator.
1658          */
1659         for (;;) {
1660                 mtx_lock(&be_lun->queue_lock);
1661                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1662                 if (io != NULL) {
1663                         DPRINTF("datamove queue\n");
1664                         STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1665                                       ctl_io_hdr, links);
1666                         mtx_unlock(&be_lun->queue_lock);
1667                         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1668                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1669                                 ctl_set_busy(&io->scsiio);
1670                                 ctl_complete_beio(beio);
1671                                 return;
1672                         }
1673                         be_lun->dispatch(be_lun, beio);
1674                         continue;
1675                 }
1676                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1677                 if (io != NULL) {
1678                         DPRINTF("config write queue\n");
1679                         STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1680                                       ctl_io_hdr, links);
1681                         mtx_unlock(&be_lun->queue_lock);
1682                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1683                                 ctl_set_busy(&io->scsiio);
1684                                 ctl_config_write_done(io);
1685                                 return;
1686                         }
1687                         ctl_be_block_cw_dispatch(be_lun, io);
1688                         continue;
1689                 }
1690                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1691                 if (io != NULL) {
1692                         DPRINTF("config read queue\n");
1693                         STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr,
1694                                       ctl_io_hdr, links);
1695                         mtx_unlock(&be_lun->queue_lock);
1696                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1697                                 ctl_set_busy(&io->scsiio);
1698                                 ctl_config_read_done(io);
1699                                 return;
1700                         }
1701                         ctl_be_block_cr_dispatch(be_lun, io);
1702                         continue;
1703                 }
1704                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1705                 if (io != NULL) {
1706                         DPRINTF("input queue\n");
1707                         STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1708                                       ctl_io_hdr, links);
1709                         mtx_unlock(&be_lun->queue_lock);
1710                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1711                                 ctl_set_busy(&io->scsiio);
1712                                 ctl_data_submit_done(io);
1713                                 return;
1714                         }
1715                         ctl_be_block_dispatch(be_lun, io);
1716                         continue;
1717                 }
1718
1719                 /*
1720                  * If we get here, there is no work left in the queues, so
1721                  * just break out and let the task queue go to sleep.
1722                  */
1723                 mtx_unlock(&be_lun->queue_lock);
1724                 break;
1725         }
1726 }
1727
1728 /*
1729  * Entry point from CTL to the backend for I/O.  We queue everything to a
1730  * work thread, so this just puts the I/O on a queue and wakes up the
1731  * thread.
1732  */
1733 static int
1734 ctl_be_block_submit(union ctl_io *io)
1735 {
1736         struct ctl_be_block_lun *be_lun;
1737         struct ctl_be_lun *cbe_lun;
1738
1739         DPRINTF("entered\n");
1740
1741         cbe_lun = CTL_BACKEND_LUN(io);
1742         be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
1743
1744         /*
1745          * Make sure we only get SCSI I/O.
1746          */
1747         KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1748                 "%#x) encountered", io->io_hdr.io_type));
1749
1750         PRIV(io)->len = 0;
1751
1752         mtx_lock(&be_lun->queue_lock);
1753         STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1754         mtx_unlock(&be_lun->queue_lock);
1755         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1756
1757         return (CTL_RETVAL_COMPLETE);
1758 }
1759
1760 static int
1761 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1762                         int flag, struct thread *td)
1763 {
1764         struct ctl_be_block_softc *softc;
1765         int error;
1766
1767         softc = &backend_block_softc;
1768
1769         error = 0;
1770
1771         switch (cmd) {
1772         case CTL_LUN_REQ: {
1773                 struct ctl_lun_req *lun_req;
1774
1775                 lun_req = (struct ctl_lun_req *)addr;
1776
1777                 switch (lun_req->reqtype) {
1778                 case CTL_LUNREQ_CREATE:
1779                         error = ctl_be_block_create(softc, lun_req);
1780                         break;
1781                 case CTL_LUNREQ_RM:
1782                         error = ctl_be_block_rm(softc, lun_req);
1783                         break;
1784                 case CTL_LUNREQ_MODIFY:
1785                         error = ctl_be_block_modify(softc, lun_req);
1786                         break;
1787                 default:
1788                         lun_req->status = CTL_LUN_ERROR;
1789                         snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1790                                  "invalid LUN request type %d",
1791                                  lun_req->reqtype);
1792                         break;
1793                 }
1794                 break;
1795         }
1796         default:
1797                 error = ENOTTY;
1798                 break;
1799         }
1800
1801         return (error);
1802 }
1803
1804 static int
1805 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1806 {
1807         struct ctl_be_lun *cbe_lun;
1808         struct ctl_be_block_filedata *file_data;
1809         struct ctl_lun_create_params *params;
1810         const char                   *value;
1811         struct vattr                  vattr;
1812         off_t                         ps, pss, po, pos, us, uss, uo, uos;
1813         int                           error;
1814
1815         cbe_lun = &be_lun->cbe_lun;
1816         file_data = &be_lun->backend.file;
1817         params = &be_lun->params;
1818
1819         be_lun->dev_type = CTL_BE_BLOCK_FILE;
1820         be_lun->dispatch = ctl_be_block_dispatch_file;
1821         be_lun->lun_flush = ctl_be_block_flush_file;
1822         be_lun->get_lba_status = ctl_be_block_gls_file;
1823         be_lun->getattr = ctl_be_block_getattr_file;
1824         be_lun->unmap = NULL;
1825         cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1826
1827         error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1828         if (error != 0) {
1829                 snprintf(req->error_str, sizeof(req->error_str),
1830                          "error calling VOP_GETATTR() for file %s",
1831                          be_lun->dev_path);
1832                 return (error);
1833         }
1834
1835         file_data->cred = crhold(curthread->td_ucred);
1836         if (params->lun_size_bytes != 0)
1837                 be_lun->size_bytes = params->lun_size_bytes;
1838         else
1839                 be_lun->size_bytes = vattr.va_size;
1840
1841         /*
1842          * For files we can use any logical block size.  Prefer 512 bytes
1843          * for compatibility reasons.  If file's vattr.va_blocksize
1844          * (preferred I/O block size) is bigger and multiple to chosen
1845          * logical block size -- report it as physical block size.
1846          */
1847         if (params->blocksize_bytes != 0)
1848                 cbe_lun->blocksize = params->blocksize_bytes;
1849         else if (cbe_lun->lun_type == T_CDROM)
1850                 cbe_lun->blocksize = 2048;
1851         else
1852                 cbe_lun->blocksize = 512;
1853         be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1854         cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1855             0 : (be_lun->size_blocks - 1);
1856
1857         us = ps = vattr.va_blocksize;
1858         uo = po = 0;
1859
1860         value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
1861         if (value != NULL)
1862                 ctl_expand_number(value, &ps);
1863         value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
1864         if (value != NULL)
1865                 ctl_expand_number(value, &po);
1866         pss = ps / cbe_lun->blocksize;
1867         pos = po / cbe_lun->blocksize;
1868         if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1869             ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1870                 cbe_lun->pblockexp = fls(pss) - 1;
1871                 cbe_lun->pblockoff = (pss - pos) % pss;
1872         }
1873
1874         value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
1875         if (value != NULL)
1876                 ctl_expand_number(value, &us);
1877         value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
1878         if (value != NULL)
1879                 ctl_expand_number(value, &uo);
1880         uss = us / cbe_lun->blocksize;
1881         uos = uo / cbe_lun->blocksize;
1882         if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1883             ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1884                 cbe_lun->ublockexp = fls(uss) - 1;
1885                 cbe_lun->ublockoff = (uss - uos) % uss;
1886         }
1887
1888         /*
1889          * Sanity check.  The media size has to be at least one
1890          * sector long.
1891          */
1892         if (be_lun->size_bytes < cbe_lun->blocksize) {
1893                 error = EINVAL;
1894                 snprintf(req->error_str, sizeof(req->error_str),
1895                          "file %s size %ju < block size %u", be_lun->dev_path,
1896                          (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1897         }
1898
1899         cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1900         return (error);
1901 }
1902
1903 static int
1904 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1905 {
1906         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1907         struct ctl_lun_create_params *params;
1908         struct cdevsw                *csw;
1909         struct cdev                  *dev;
1910         const char                   *value;
1911         int                           error, atomic, maxio, ref, unmap, tmp;
1912         off_t                         ps, pss, po, pos, us, uss, uo, uos, otmp;
1913
1914         params = &be_lun->params;
1915
1916         be_lun->dev_type = CTL_BE_BLOCK_DEV;
1917         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1918         if (csw == NULL)
1919                 return (ENXIO);
1920         if (strcmp(csw->d_name, "zvol") == 0) {
1921                 be_lun->dispatch = ctl_be_block_dispatch_zvol;
1922                 be_lun->get_lba_status = ctl_be_block_gls_zvol;
1923                 atomic = maxio = CTLBLK_MAX_IO_SIZE;
1924         } else {
1925                 be_lun->dispatch = ctl_be_block_dispatch_dev;
1926                 be_lun->get_lba_status = NULL;
1927                 atomic = 0;
1928                 maxio = dev->si_iosize_max;
1929                 if (maxio <= 0)
1930                         maxio = DFLTPHYS;
1931                 if (maxio > CTLBLK_MAX_IO_SIZE)
1932                         maxio = CTLBLK_MAX_IO_SIZE;
1933         }
1934         be_lun->lun_flush = ctl_be_block_flush_dev;
1935         be_lun->getattr = ctl_be_block_getattr_dev;
1936         be_lun->unmap = ctl_be_block_unmap_dev;
1937
1938         if (!csw->d_ioctl) {
1939                 dev_relthread(dev, ref);
1940                 snprintf(req->error_str, sizeof(req->error_str),
1941                          "no d_ioctl for device %s!", be_lun->dev_path);
1942                 return (ENODEV);
1943         }
1944
1945         error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
1946                                curthread);
1947         if (error) {
1948                 dev_relthread(dev, ref);
1949                 snprintf(req->error_str, sizeof(req->error_str),
1950                          "error %d returned for DIOCGSECTORSIZE ioctl "
1951                          "on %s!", error, be_lun->dev_path);
1952                 return (error);
1953         }
1954
1955         /*
1956          * If the user has asked for a blocksize that is greater than the
1957          * backing device's blocksize, we can do it only if the blocksize
1958          * the user is asking for is an even multiple of the underlying 
1959          * device's blocksize.
1960          */
1961         if ((params->blocksize_bytes != 0) &&
1962             (params->blocksize_bytes >= tmp)) {
1963                 if (params->blocksize_bytes % tmp == 0) {
1964                         cbe_lun->blocksize = params->blocksize_bytes;
1965                 } else {
1966                         dev_relthread(dev, ref);
1967                         snprintf(req->error_str, sizeof(req->error_str),
1968                                  "requested blocksize %u is not an even "
1969                                  "multiple of backing device blocksize %u",
1970                                  params->blocksize_bytes, tmp);
1971                         return (EINVAL);
1972                 }
1973         } else if (params->blocksize_bytes != 0) {
1974                 dev_relthread(dev, ref);
1975                 snprintf(req->error_str, sizeof(req->error_str),
1976                          "requested blocksize %u < backing device "
1977                          "blocksize %u", params->blocksize_bytes, tmp);
1978                 return (EINVAL);
1979         } else if (cbe_lun->lun_type == T_CDROM)
1980                 cbe_lun->blocksize = MAX(tmp, 2048);
1981         else
1982                 cbe_lun->blocksize = tmp;
1983
1984         error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
1985                              curthread);
1986         if (error) {
1987                 dev_relthread(dev, ref);
1988                 snprintf(req->error_str, sizeof(req->error_str),
1989                          "error %d returned for DIOCGMEDIASIZE "
1990                          " ioctl on %s!", error,
1991                          be_lun->dev_path);
1992                 return (error);
1993         }
1994
1995         if (params->lun_size_bytes != 0) {
1996                 if (params->lun_size_bytes > otmp) {
1997                         dev_relthread(dev, ref);
1998                         snprintf(req->error_str, sizeof(req->error_str),
1999                                  "requested LUN size %ju > backing device "
2000                                  "size %ju",
2001                                  (uintmax_t)params->lun_size_bytes,
2002                                  (uintmax_t)otmp);
2003                         return (EINVAL);
2004                 }
2005
2006                 be_lun->size_bytes = params->lun_size_bytes;
2007         } else
2008                 be_lun->size_bytes = otmp;
2009         be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2010         cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2011             0 : (be_lun->size_blocks - 1);
2012
2013         error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2014             curthread);
2015         if (error)
2016                 ps = po = 0;
2017         else {
2018                 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2019                     FREAD, curthread);
2020                 if (error)
2021                         po = 0;
2022         }
2023         us = ps;
2024         uo = po;
2025
2026         value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
2027         if (value != NULL)
2028                 ctl_expand_number(value, &ps);
2029         value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
2030         if (value != NULL)
2031                 ctl_expand_number(value, &po);
2032         pss = ps / cbe_lun->blocksize;
2033         pos = po / cbe_lun->blocksize;
2034         if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2035             ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2036                 cbe_lun->pblockexp = fls(pss) - 1;
2037                 cbe_lun->pblockoff = (pss - pos) % pss;
2038         }
2039
2040         value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
2041         if (value != NULL)
2042                 ctl_expand_number(value, &us);
2043         value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
2044         if (value != NULL)
2045                 ctl_expand_number(value, &uo);
2046         uss = us / cbe_lun->blocksize;
2047         uos = uo / cbe_lun->blocksize;
2048         if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2049             ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2050                 cbe_lun->ublockexp = fls(uss) - 1;
2051                 cbe_lun->ublockoff = (uss - uos) % uss;
2052         }
2053
2054         cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2055         cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2056
2057         if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2058                 unmap = 1;
2059         } else {
2060                 struct diocgattr_arg    arg;
2061
2062                 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2063                 arg.len = sizeof(arg.value.i);
2064                 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2065                     curthread);
2066                 unmap = (error == 0) ? arg.value.i : 0;
2067         }
2068         value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
2069         if (value != NULL)
2070                 unmap = (strcmp(value, "on") == 0);
2071         if (unmap)
2072                 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2073         else
2074                 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2075
2076         dev_relthread(dev, ref);
2077         return (0);
2078 }
2079
2080 static int
2081 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2082 {
2083         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2084         int flags;
2085
2086         if (be_lun->vn) {
2087                 flags = FREAD;
2088                 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2089                         flags |= FWRITE;
2090                 (void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2091                 be_lun->vn = NULL;
2092
2093                 switch (be_lun->dev_type) {
2094                 case CTL_BE_BLOCK_DEV:
2095                         break;
2096                 case CTL_BE_BLOCK_FILE:
2097                         if (be_lun->backend.file.cred != NULL) {
2098                                 crfree(be_lun->backend.file.cred);
2099                                 be_lun->backend.file.cred = NULL;
2100                         }
2101                         break;
2102                 case CTL_BE_BLOCK_NONE:
2103                         break;
2104                 default:
2105                         panic("Unexpected backend type %d", be_lun->dev_type);
2106                         break;
2107                 }
2108                 be_lun->dev_type = CTL_BE_BLOCK_NONE;
2109         }
2110         return (0);
2111 }
2112
2113 static int
2114 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2115 {
2116         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2117         struct nameidata nd;
2118         const char      *value;
2119         int              error, flags;
2120
2121         error = 0;
2122         if (rootvnode == NULL) {
2123                 snprintf(req->error_str, sizeof(req->error_str),
2124                          "Root filesystem is not mounted");
2125                 return (1);
2126         }
2127         pwd_ensure_dirs();
2128
2129         value = dnvlist_get_string(cbe_lun->options, "file", NULL);
2130         if (value == NULL) {
2131                 snprintf(req->error_str, sizeof(req->error_str),
2132                          "no file argument specified");
2133                 return (1);
2134         }
2135         free(be_lun->dev_path, M_CTLBLK);
2136         be_lun->dev_path = strdup(value, M_CTLBLK);
2137
2138         flags = FREAD;
2139         value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
2140         if (value != NULL) {
2141                 if (strcmp(value, "on") != 0)
2142                         flags |= FWRITE;
2143         } else if (cbe_lun->lun_type == T_DIRECT)
2144                 flags |= FWRITE;
2145
2146 again:
2147         NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
2148         error = vn_open(&nd, &flags, 0, NULL);
2149         if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2150                 flags &= ~FWRITE;
2151                 goto again;
2152         }
2153         if (error) {
2154                 /*
2155                  * This is the only reasonable guess we can make as far as
2156                  * path if the user doesn't give us a fully qualified path.
2157                  * If they want to specify a file, they need to specify the
2158                  * full path.
2159                  */
2160                 if (be_lun->dev_path[0] != '/') {
2161                         char *dev_name;
2162
2163                         asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2164                                 be_lun->dev_path);
2165                         free(be_lun->dev_path, M_CTLBLK);
2166                         be_lun->dev_path = dev_name;
2167                         goto again;
2168                 }
2169                 snprintf(req->error_str, sizeof(req->error_str),
2170                     "error opening %s: %d", be_lun->dev_path, error);
2171                 return (error);
2172         }
2173         if (flags & FWRITE)
2174                 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2175         else
2176                 cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2177
2178         NDFREE(&nd, NDF_ONLY_PNBUF);
2179         be_lun->vn = nd.ni_vp;
2180
2181         /* We only support disks and files. */
2182         if (vn_isdisk(be_lun->vn, &error)) {
2183                 error = ctl_be_block_open_dev(be_lun, req);
2184         } else if (be_lun->vn->v_type == VREG) {
2185                 error = ctl_be_block_open_file(be_lun, req);
2186         } else {
2187                 error = EINVAL;
2188                 snprintf(req->error_str, sizeof(req->error_str),
2189                          "%s is not a disk or plain file", be_lun->dev_path);
2190         }
2191         VOP_UNLOCK(be_lun->vn);
2192
2193         if (error != 0)
2194                 ctl_be_block_close(be_lun);
2195         cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2196         if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2197                 cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2198         value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
2199         if (value != NULL && strcmp(value, "on") == 0)
2200                 cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2201         else if (value != NULL && strcmp(value, "read") == 0)
2202                 cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2203         else if (value != NULL && strcmp(value, "off") == 0)
2204                 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2205         return (0);
2206 }
2207
2208 static int
2209 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2210 {
2211         struct ctl_be_lun *cbe_lun;
2212         struct ctl_be_block_lun *be_lun;
2213         struct ctl_lun_create_params *params;
2214         char num_thread_str[16];
2215         char tmpstr[32];
2216         const char *value;
2217         int retval, num_threads;
2218         int tmp_num_threads;
2219
2220         params = &req->reqdata.create;
2221         retval = 0;
2222         req->status = CTL_LUN_OK;
2223
2224         be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2225         cbe_lun = &be_lun->cbe_lun;
2226         cbe_lun->be_lun = be_lun;
2227         be_lun->params = req->reqdata.create;
2228         be_lun->softc = softc;
2229         STAILQ_INIT(&be_lun->input_queue);
2230         STAILQ_INIT(&be_lun->config_read_queue);
2231         STAILQ_INIT(&be_lun->config_write_queue);
2232         STAILQ_INIT(&be_lun->datamove_queue);
2233         sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
2234         mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
2235         mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
2236         cbe_lun->options = nvlist_clone(req->args_nvl);
2237         be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
2238             NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2239         if (be_lun->lun_zone == NULL) {
2240                 snprintf(req->error_str, sizeof(req->error_str),
2241                          "error allocating UMA zone");
2242                 goto bailout_error;
2243         }
2244
2245         if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2246                 cbe_lun->lun_type = params->device_type;
2247         else
2248                 cbe_lun->lun_type = T_DIRECT;
2249         be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
2250         cbe_lun->flags = 0;
2251         value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2252         if (value != NULL) {
2253                 if (strcmp(value, "primary") == 0)
2254                         cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2255         } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2256                 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2257
2258         if (cbe_lun->lun_type == T_DIRECT ||
2259             cbe_lun->lun_type == T_CDROM) {
2260                 be_lun->size_bytes = params->lun_size_bytes;
2261                 if (params->blocksize_bytes != 0)
2262                         cbe_lun->blocksize = params->blocksize_bytes;
2263                 else if (cbe_lun->lun_type == T_CDROM)
2264                         cbe_lun->blocksize = 2048;
2265                 else
2266                         cbe_lun->blocksize = 512;
2267                 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2268                 cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2269                     0 : (be_lun->size_blocks - 1);
2270
2271                 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2272                     control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2273                         retval = ctl_be_block_open(be_lun, req);
2274                         if (retval != 0) {
2275                                 retval = 0;
2276                                 req->status = CTL_LUN_WARNING;
2277                         }
2278                 }
2279                 num_threads = cbb_num_threads;
2280         } else {
2281                 num_threads = 1;
2282         }
2283
2284         value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL);
2285         if (value != NULL) {
2286                 tmp_num_threads = strtol(value, NULL, 0);
2287
2288                 /*
2289                  * We don't let the user specify less than one
2290                  * thread, but hope he's clueful enough not to
2291                  * specify 1000 threads.
2292                  */
2293                 if (tmp_num_threads < 1) {
2294                         snprintf(req->error_str, sizeof(req->error_str),
2295                                  "invalid number of threads %s",
2296                                  num_thread_str);
2297                         goto bailout_error;
2298                 }
2299                 num_threads = tmp_num_threads;
2300         }
2301
2302         if (be_lun->vn == NULL)
2303                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2304         /* Tell the user the blocksize we ended up using */
2305         params->lun_size_bytes = be_lun->size_bytes;
2306         params->blocksize_bytes = cbe_lun->blocksize;
2307         if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2308                 cbe_lun->req_lun_id = params->req_lun_id;
2309                 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2310         } else
2311                 cbe_lun->req_lun_id = 0;
2312
2313         cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2314         cbe_lun->lun_config_status = ctl_be_block_lun_config_status;
2315         cbe_lun->be = &ctl_be_block_driver;
2316
2317         if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2318                 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
2319                          softc->num_luns);
2320                 strncpy((char *)cbe_lun->serial_num, tmpstr,
2321                         MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2322
2323                 /* Tell the user what we used for a serial number */
2324                 strncpy((char *)params->serial_num, tmpstr,
2325                         MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2326         } else { 
2327                 strncpy((char *)cbe_lun->serial_num, params->serial_num,
2328                         MIN(sizeof(cbe_lun->serial_num),
2329                         sizeof(params->serial_num)));
2330         }
2331         if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2332                 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
2333                 strncpy((char *)cbe_lun->device_id, tmpstr,
2334                         MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2335
2336                 /* Tell the user what we used for a device ID */
2337                 strncpy((char *)params->device_id, tmpstr,
2338                         MIN(sizeof(params->device_id), sizeof(tmpstr)));
2339         } else {
2340                 strncpy((char *)cbe_lun->device_id, params->device_id,
2341                         MIN(sizeof(cbe_lun->device_id),
2342                             sizeof(params->device_id)));
2343         }
2344
2345         TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2346
2347         be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2348             taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2349
2350         if (be_lun->io_taskqueue == NULL) {
2351                 snprintf(req->error_str, sizeof(req->error_str),
2352                          "unable to create taskqueue");
2353                 goto bailout_error;
2354         }
2355
2356         /*
2357          * Note that we start the same number of threads by default for
2358          * both the file case and the block device case.  For the file
2359          * case, we need multiple threads to allow concurrency, because the
2360          * vnode interface is designed to be a blocking interface.  For the
2361          * block device case, ZFS zvols at least will block the caller's
2362          * context in many instances, and so we need multiple threads to
2363          * overcome that problem.  Other block devices don't need as many
2364          * threads, but they shouldn't cause too many problems.
2365          *
2366          * If the user wants to just have a single thread for a block
2367          * device, he can specify that when the LUN is created, or change
2368          * the tunable/sysctl to alter the default number of threads.
2369          */
2370         retval = taskqueue_start_threads(&be_lun->io_taskqueue,
2371                                          /*num threads*/num_threads,
2372                                          /*priority*/PUSER,
2373                                          /*thread name*/
2374                                          "%s taskq", be_lun->lunname);
2375
2376         if (retval != 0)
2377                 goto bailout_error;
2378
2379         be_lun->num_threads = num_threads;
2380
2381         mtx_lock(&softc->lock);
2382         softc->num_luns++;
2383         STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2384
2385         mtx_unlock(&softc->lock);
2386
2387         retval = ctl_add_lun(&be_lun->cbe_lun);
2388         if (retval != 0) {
2389                 mtx_lock(&softc->lock);
2390                 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2391                               links);
2392                 softc->num_luns--;
2393                 mtx_unlock(&softc->lock);
2394                 snprintf(req->error_str, sizeof(req->error_str),
2395                          "ctl_add_lun() returned error %d, see dmesg for "
2396                          "details", retval);
2397                 retval = 0;
2398                 goto bailout_error;
2399         }
2400
2401         mtx_lock(&softc->lock);
2402
2403         /*
2404          * Tell the config_status routine that we're waiting so it won't
2405          * clean up the LUN in the event of an error.
2406          */
2407         be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2408
2409         while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2410                 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2411                 if (retval == EINTR)
2412                         break;
2413         }
2414         be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2415
2416         if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2417                 snprintf(req->error_str, sizeof(req->error_str),
2418                          "LUN configuration error, see dmesg for details");
2419                 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2420                               links);
2421                 softc->num_luns--;
2422                 mtx_unlock(&softc->lock);
2423                 goto bailout_error;
2424         } else {
2425                 params->req_lun_id = cbe_lun->lun_id;
2426         }
2427
2428         mtx_unlock(&softc->lock);
2429
2430         be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2431                                                cbe_lun->blocksize,
2432                                                DEVSTAT_ALL_SUPPORTED,
2433                                                cbe_lun->lun_type
2434                                                | DEVSTAT_TYPE_IF_OTHER,
2435                                                DEVSTAT_PRIORITY_OTHER);
2436
2437         return (retval);
2438
2439 bailout_error:
2440         req->status = CTL_LUN_ERROR;
2441
2442         if (be_lun->io_taskqueue != NULL)
2443                 taskqueue_free(be_lun->io_taskqueue);
2444         ctl_be_block_close(be_lun);
2445         if (be_lun->dev_path != NULL)
2446                 free(be_lun->dev_path, M_CTLBLK);
2447         if (be_lun->lun_zone != NULL)
2448                 uma_zdestroy(be_lun->lun_zone);
2449         nvlist_destroy(cbe_lun->options);
2450         mtx_destroy(&be_lun->queue_lock);
2451         mtx_destroy(&be_lun->io_lock);
2452         free(be_lun, M_CTLBLK);
2453
2454         return (retval);
2455 }
2456
2457 static int
2458 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2459 {
2460         struct ctl_lun_rm_params *params;
2461         struct ctl_be_block_lun *be_lun;
2462         struct ctl_be_lun *cbe_lun;
2463         int retval;
2464
2465         params = &req->reqdata.rm;
2466
2467         mtx_lock(&softc->lock);
2468         STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2469                 if (be_lun->cbe_lun.lun_id == params->lun_id)
2470                         break;
2471         }
2472         mtx_unlock(&softc->lock);
2473         if (be_lun == NULL) {
2474                 snprintf(req->error_str, sizeof(req->error_str),
2475                          "LUN %u is not managed by the block backend",
2476                          params->lun_id);
2477                 goto bailout_error;
2478         }
2479         cbe_lun = &be_lun->cbe_lun;
2480
2481         retval = ctl_disable_lun(cbe_lun);
2482         if (retval != 0) {
2483                 snprintf(req->error_str, sizeof(req->error_str),
2484                          "error %d returned from ctl_disable_lun() for "
2485                          "LUN %d", retval, params->lun_id);
2486                 goto bailout_error;
2487         }
2488
2489         if (be_lun->vn != NULL) {
2490                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2491                 ctl_lun_no_media(cbe_lun);
2492                 taskqueue_drain_all(be_lun->io_taskqueue);
2493                 ctl_be_block_close(be_lun);
2494         }
2495
2496         retval = ctl_invalidate_lun(cbe_lun);
2497         if (retval != 0) {
2498                 snprintf(req->error_str, sizeof(req->error_str),
2499                          "error %d returned from ctl_invalidate_lun() for "
2500                          "LUN %d", retval, params->lun_id);
2501                 goto bailout_error;
2502         }
2503
2504         mtx_lock(&softc->lock);
2505         be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2506         while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2507                 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2508                 if (retval == EINTR)
2509                         break;
2510         }
2511         be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2512
2513         if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2514                 snprintf(req->error_str, sizeof(req->error_str),
2515                          "interrupted waiting for LUN to be freed");
2516                 mtx_unlock(&softc->lock);
2517                 goto bailout_error;
2518         }
2519
2520         STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2521
2522         softc->num_luns--;
2523         mtx_unlock(&softc->lock);
2524
2525         taskqueue_drain_all(be_lun->io_taskqueue);
2526         taskqueue_free(be_lun->io_taskqueue);
2527
2528         if (be_lun->disk_stats != NULL)
2529                 devstat_remove_entry(be_lun->disk_stats);
2530
2531         uma_zdestroy(be_lun->lun_zone);
2532
2533         nvlist_destroy(cbe_lun->options);
2534         free(be_lun->dev_path, M_CTLBLK);
2535         mtx_destroy(&be_lun->queue_lock);
2536         mtx_destroy(&be_lun->io_lock);
2537         free(be_lun, M_CTLBLK);
2538
2539         req->status = CTL_LUN_OK;
2540         return (0);
2541
2542 bailout_error:
2543         req->status = CTL_LUN_ERROR;
2544         return (0);
2545 }
2546
2547 static int
2548 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2549 {
2550         struct ctl_lun_modify_params *params;
2551         struct ctl_be_block_lun *be_lun;
2552         struct ctl_be_lun *cbe_lun;
2553         const char *value;
2554         uint64_t oldsize;
2555         int error, wasprim;
2556
2557         params = &req->reqdata.modify;
2558
2559         mtx_lock(&softc->lock);
2560         STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2561                 if (be_lun->cbe_lun.lun_id == params->lun_id)
2562                         break;
2563         }
2564         mtx_unlock(&softc->lock);
2565         if (be_lun == NULL) {
2566                 snprintf(req->error_str, sizeof(req->error_str),
2567                          "LUN %u is not managed by the block backend",
2568                          params->lun_id);
2569                 goto bailout_error;
2570         }
2571         cbe_lun = &be_lun->cbe_lun;
2572
2573         if (params->lun_size_bytes != 0)
2574                 be_lun->params.lun_size_bytes = params->lun_size_bytes;
2575
2576         nvlist_destroy(cbe_lun->options);
2577         cbe_lun->options = nvlist_clone(req->args_nvl);
2578
2579         wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2580         value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2581         if (value != NULL) {
2582                 if (strcmp(value, "primary") == 0)
2583                         cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2584                 else
2585                         cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2586         } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2587                 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2588         else
2589                 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2590         if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2591                 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2592                         ctl_lun_primary(cbe_lun);
2593                 else
2594                         ctl_lun_secondary(cbe_lun);
2595         }
2596
2597         oldsize = be_lun->size_blocks;
2598         if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2599             control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2600                 if (be_lun->vn == NULL)
2601                         error = ctl_be_block_open(be_lun, req);
2602                 else if (vn_isdisk(be_lun->vn, &error))
2603                         error = ctl_be_block_open_dev(be_lun, req);
2604                 else if (be_lun->vn->v_type == VREG) {
2605                         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2606                         error = ctl_be_block_open_file(be_lun, req);
2607                         VOP_UNLOCK(be_lun->vn);
2608                 } else
2609                         error = EINVAL;
2610                 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
2611                     be_lun->vn != NULL) {
2612                         cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2613                         ctl_lun_has_media(cbe_lun);
2614                 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
2615                     be_lun->vn == NULL) {
2616                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2617                         ctl_lun_no_media(cbe_lun);
2618                 }
2619                 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2620         } else {
2621                 if (be_lun->vn != NULL) {
2622                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2623                         ctl_lun_no_media(cbe_lun);
2624                         taskqueue_drain_all(be_lun->io_taskqueue);
2625                         error = ctl_be_block_close(be_lun);
2626                 } else
2627                         error = 0;
2628         }
2629         if (be_lun->size_blocks != oldsize)
2630                 ctl_lun_capacity_changed(cbe_lun);
2631
2632         /* Tell the user the exact size we ended up using */
2633         params->lun_size_bytes = be_lun->size_bytes;
2634
2635         req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2636         return (0);
2637
2638 bailout_error:
2639         req->status = CTL_LUN_ERROR;
2640         return (0);
2641 }
2642
2643 static void
2644 ctl_be_block_lun_shutdown(void *be_lun)
2645 {
2646         struct ctl_be_block_lun *lun = be_lun;
2647         struct ctl_be_block_softc *softc = lun->softc;
2648
2649         mtx_lock(&softc->lock);
2650         lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2651         if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2652                 wakeup(lun);
2653         mtx_unlock(&softc->lock);
2654 }
2655
2656 static void
2657 ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2658 {
2659         struct ctl_be_block_lun *lun;
2660         struct ctl_be_block_softc *softc;
2661
2662         lun = (struct ctl_be_block_lun *)be_lun;
2663         softc = lun->softc;
2664
2665         if (status == CTL_LUN_CONFIG_OK) {
2666                 mtx_lock(&softc->lock);
2667                 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2668                 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2669                         wakeup(lun);
2670                 mtx_unlock(&softc->lock);
2671
2672                 /*
2673                  * We successfully added the LUN, attempt to enable it.
2674                  */
2675                 if (ctl_enable_lun(&lun->cbe_lun) != 0) {
2676                         printf("%s: ctl_enable_lun() failed!\n", __func__);
2677                         if (ctl_invalidate_lun(&lun->cbe_lun) != 0) {
2678                                 printf("%s: ctl_invalidate_lun() failed!\n",
2679                                        __func__);
2680                         }
2681                 }
2682
2683                 return;
2684         }
2685
2686
2687         mtx_lock(&softc->lock);
2688         lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2689         lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2690         wakeup(lun);
2691         mtx_unlock(&softc->lock);
2692 }
2693
2694
2695 static int
2696 ctl_be_block_config_write(union ctl_io *io)
2697 {
2698         struct ctl_be_block_lun *be_lun;
2699         struct ctl_be_lun *cbe_lun;
2700         int retval;
2701
2702         DPRINTF("entered\n");
2703
2704         cbe_lun = CTL_BACKEND_LUN(io);
2705         be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2706
2707         retval = 0;
2708         switch (io->scsiio.cdb[0]) {
2709         case SYNCHRONIZE_CACHE:
2710         case SYNCHRONIZE_CACHE_16:
2711         case WRITE_SAME_10:
2712         case WRITE_SAME_16:
2713         case UNMAP:
2714                 /*
2715                  * The upper level CTL code will filter out any CDBs with
2716                  * the immediate bit set and return the proper error.
2717                  *
2718                  * We don't really need to worry about what LBA range the
2719                  * user asked to be synced out.  When they issue a sync
2720                  * cache command, we'll sync out the whole thing.
2721                  */
2722                 mtx_lock(&be_lun->queue_lock);
2723                 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2724                                    links);
2725                 mtx_unlock(&be_lun->queue_lock);
2726                 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2727                 break;
2728         case START_STOP_UNIT: {
2729                 struct scsi_start_stop_unit *cdb;
2730                 struct ctl_lun_req req;
2731
2732                 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2733                 if ((cdb->how & SSS_PC_MASK) != 0) {
2734                         ctl_set_success(&io->scsiio);
2735                         ctl_config_write_done(io);
2736                         break;
2737                 }
2738                 if (cdb->how & SSS_START) {
2739                         if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
2740                                 retval = ctl_be_block_open(be_lun, &req);
2741                                 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2742                                 if (retval == 0) {
2743                                         cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2744                                         ctl_lun_has_media(cbe_lun);
2745                                 } else {
2746                                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2747                                         ctl_lun_no_media(cbe_lun);
2748                                 }
2749                         }
2750                         ctl_start_lun(cbe_lun);
2751                 } else {
2752                         ctl_stop_lun(cbe_lun);
2753                         if (cdb->how & SSS_LOEJ) {
2754                                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2755                                 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
2756                                 ctl_lun_ejected(cbe_lun);
2757                                 if (be_lun->vn != NULL)
2758                                         ctl_be_block_close(be_lun);
2759                         }
2760                 }
2761
2762                 ctl_set_success(&io->scsiio);
2763                 ctl_config_write_done(io);
2764                 break;
2765         }
2766         case PREVENT_ALLOW:
2767                 ctl_set_success(&io->scsiio);
2768                 ctl_config_write_done(io);
2769                 break;
2770         default:
2771                 ctl_set_invalid_opcode(&io->scsiio);
2772                 ctl_config_write_done(io);
2773                 retval = CTL_RETVAL_COMPLETE;
2774                 break;
2775         }
2776
2777         return (retval);
2778 }
2779
2780 static int
2781 ctl_be_block_config_read(union ctl_io *io)
2782 {
2783         struct ctl_be_block_lun *be_lun;
2784         struct ctl_be_lun *cbe_lun;
2785         int retval = 0;
2786
2787         DPRINTF("entered\n");
2788
2789         cbe_lun = CTL_BACKEND_LUN(io);
2790         be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2791
2792         switch (io->scsiio.cdb[0]) {
2793         case SERVICE_ACTION_IN:
2794                 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2795                         mtx_lock(&be_lun->queue_lock);
2796                         STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2797                             &io->io_hdr, links);
2798                         mtx_unlock(&be_lun->queue_lock);
2799                         taskqueue_enqueue(be_lun->io_taskqueue,
2800                             &be_lun->io_task);
2801                         retval = CTL_RETVAL_QUEUED;
2802                         break;
2803                 }
2804                 ctl_set_invalid_field(&io->scsiio,
2805                                       /*sks_valid*/ 1,
2806                                       /*command*/ 1,
2807                                       /*field*/ 1,
2808                                       /*bit_valid*/ 1,
2809                                       /*bit*/ 4);
2810                 ctl_config_read_done(io);
2811                 retval = CTL_RETVAL_COMPLETE;
2812                 break;
2813         default:
2814                 ctl_set_invalid_opcode(&io->scsiio);
2815                 ctl_config_read_done(io);
2816                 retval = CTL_RETVAL_COMPLETE;
2817                 break;
2818         }
2819
2820         return (retval);
2821 }
2822
2823 static int
2824 ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2825 {
2826         struct ctl_be_block_lun *lun;
2827         int retval;
2828
2829         lun = (struct ctl_be_block_lun *)be_lun;
2830
2831         retval = sbuf_printf(sb, "\t<num_threads>");
2832         if (retval != 0)
2833                 goto bailout;
2834         retval = sbuf_printf(sb, "%d", lun->num_threads);
2835         if (retval != 0)
2836                 goto bailout;
2837         retval = sbuf_printf(sb, "</num_threads>\n");
2838
2839 bailout:
2840         return (retval);
2841 }
2842
2843 static uint64_t
2844 ctl_be_block_lun_attr(void *be_lun, const char *attrname)
2845 {
2846         struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun;
2847
2848         if (lun->getattr == NULL)
2849                 return (UINT64_MAX);
2850         return (lun->getattr(lun, attrname));
2851 }
2852
2853 static int
2854 ctl_be_block_init(void)
2855 {
2856         struct ctl_be_block_softc *softc = &backend_block_softc;
2857
2858         mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2859         softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2860             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2861         STAILQ_INIT(&softc->lun_list);
2862         return (0);
2863 }
2864
2865
2866 static int
2867 ctl_be_block_shutdown(void)
2868 {
2869         struct ctl_be_block_softc *softc = &backend_block_softc;
2870         struct ctl_be_block_lun *lun, *next_lun;
2871
2872         mtx_lock(&softc->lock);
2873         STAILQ_FOREACH_SAFE(lun, &softc->lun_list, links, next_lun) {
2874                 /*
2875                  * Drop our lock here.  Since ctl_invalidate_lun() can call
2876                  * back into us, this could potentially lead to a recursive
2877                  * lock of the same mutex, which would cause a hang.
2878                  */
2879                 mtx_unlock(&softc->lock);
2880                 ctl_disable_lun(&lun->cbe_lun);
2881                 ctl_invalidate_lun(&lun->cbe_lun);
2882                 mtx_lock(&softc->lock);
2883         }
2884         mtx_unlock(&softc->lock);
2885
2886         uma_zdestroy(softc->beio_zone);
2887         mtx_destroy(&softc->lock);
2888         return (0);
2889 }