]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/cam/ctl/ctl_backend_block.c
Merge release 1.14 of bsnmp.
[FreeBSD/FreeBSD.git] / sys / cam / ctl / ctl_backend_block.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2003 Silicon Graphics International Corp.
5  * Copyright (c) 2009-2011 Spectra Logic Corporation
6  * Copyright (c) 2012 The FreeBSD Foundation
7  * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Edward Tomasz Napierala
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions, and the following disclaimer,
18  *    without modification.
19  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
20  *    substantially similar to the "NO WARRANTY" disclaimer below
21  *    ("Disclaimer") and any redistribution must be conditioned upon
22  *    including a substantially similar Disclaimer requirement for further
23  *    binary redistribution.
24  *
25  * NO WARRANTY
26  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
29  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGES.
37  *
38  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
39  */
40 /*
41  * CAM Target Layer driver backend for block devices.
42  *
43  * Author: Ken Merry <ken@FreeBSD.org>
44  */
45 #include <sys/cdefs.h>
46 __FBSDID("$FreeBSD$");
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/types.h>
52 #include <sys/kthread.h>
53 #include <sys/bio.h>
54 #include <sys/fcntl.h>
55 #include <sys/limits.h>
56 #include <sys/lock.h>
57 #include <sys/mutex.h>
58 #include <sys/condvar.h>
59 #include <sys/malloc.h>
60 #include <sys/conf.h>
61 #include <sys/ioccom.h>
62 #include <sys/queue.h>
63 #include <sys/sbuf.h>
64 #include <sys/endian.h>
65 #include <sys/uio.h>
66 #include <sys/buf.h>
67 #include <sys/taskqueue.h>
68 #include <sys/vnode.h>
69 #include <sys/namei.h>
70 #include <sys/mount.h>
71 #include <sys/disk.h>
72 #include <sys/fcntl.h>
73 #include <sys/filedesc.h>
74 #include <sys/filio.h>
75 #include <sys/proc.h>
76 #include <sys/pcpu.h>
77 #include <sys/module.h>
78 #include <sys/sdt.h>
79 #include <sys/devicestat.h>
80 #include <sys/sysctl.h>
81 #include <sys/nv.h>
82 #include <sys/dnv.h>
83
84 #include <geom/geom.h>
85
86 #include <cam/cam.h>
87 #include <cam/scsi/scsi_all.h>
88 #include <cam/scsi/scsi_da.h>
89 #include <cam/ctl/ctl_io.h>
90 #include <cam/ctl/ctl.h>
91 #include <cam/ctl/ctl_backend.h>
92 #include <cam/ctl/ctl_ioctl.h>
93 #include <cam/ctl/ctl_ha.h>
94 #include <cam/ctl/ctl_scsi_all.h>
95 #include <cam/ctl/ctl_private.h>
96 #include <cam/ctl/ctl_error.h>
97
98 /*
99  * The idea here is that we'll allocate enough S/G space to hold a 1MB
100  * I/O.  If we get an I/O larger than that, we'll split it.
101  */
102 #define CTLBLK_HALF_IO_SIZE     (512 * 1024)
103 #define CTLBLK_MAX_IO_SIZE      (CTLBLK_HALF_IO_SIZE * 2)
104 #define CTLBLK_MAX_SEG          MAXPHYS
105 #define CTLBLK_HALF_SEGS        MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
106 #define CTLBLK_MAX_SEGS         (CTLBLK_HALF_SEGS * 2)
107
108 #ifdef CTLBLK_DEBUG
109 #define DPRINTF(fmt, args...) \
110     printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
111 #else
112 #define DPRINTF(fmt, args...) do {} while(0)
113 #endif
114
115 #define PRIV(io)        \
116     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
117 #define ARGS(io)        \
118     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
119
120 SDT_PROVIDER_DEFINE(cbb);
121
122 typedef enum {
123         CTL_BE_BLOCK_LUN_UNCONFIGURED   = 0x01,
124         CTL_BE_BLOCK_LUN_CONFIG_ERR     = 0x02,
125         CTL_BE_BLOCK_LUN_WAITING        = 0x04,
126 } ctl_be_block_lun_flags;
127
128 typedef enum {
129         CTL_BE_BLOCK_NONE,
130         CTL_BE_BLOCK_DEV,
131         CTL_BE_BLOCK_FILE
132 } ctl_be_block_type;
133
134 struct ctl_be_block_filedata {
135         struct ucred *cred;
136 };
137
138 union ctl_be_block_bedata {
139         struct ctl_be_block_filedata file;
140 };
141
142 struct ctl_be_block_io;
143 struct ctl_be_block_lun;
144
145 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
146                                struct ctl_be_block_io *beio);
147 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
148                                   const char *attrname);
149
150 /*
151  * Backend LUN structure.  There is a 1:1 mapping between a block device
152  * and a backend block LUN, and between a backend block LUN and a CTL LUN.
153  */
154 struct ctl_be_block_lun {
155         struct ctl_lun_create_params params;
156         char lunname[32];
157         char *dev_path;
158         ctl_be_block_type dev_type;
159         struct vnode *vn;
160         union ctl_be_block_bedata backend;
161         cbb_dispatch_t dispatch;
162         cbb_dispatch_t lun_flush;
163         cbb_dispatch_t unmap;
164         cbb_dispatch_t get_lba_status;
165         cbb_getattr_t getattr;
166         uma_zone_t lun_zone;
167         uint64_t size_blocks;
168         uint64_t size_bytes;
169         struct ctl_be_block_softc *softc;
170         struct devstat *disk_stats;
171         ctl_be_block_lun_flags flags;
172         STAILQ_ENTRY(ctl_be_block_lun) links;
173         struct ctl_be_lun cbe_lun;
174         struct taskqueue *io_taskqueue;
175         struct task io_task;
176         int num_threads;
177         STAILQ_HEAD(, ctl_io_hdr) input_queue;
178         STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
179         STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
180         STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
181         struct mtx_padalign io_lock;
182         struct mtx_padalign queue_lock;
183 };
184
185 /*
186  * Overall softc structure for the block backend module.
187  */
188 struct ctl_be_block_softc {
189         struct mtx                       lock;
190         uma_zone_t                       beio_zone;
191         int                              num_luns;
192         STAILQ_HEAD(, ctl_be_block_lun)  lun_list;
193 };
194
195 static struct ctl_be_block_softc backend_block_softc;
196
197 /*
198  * Per-I/O information.
199  */
200 struct ctl_be_block_io {
201         union ctl_io                    *io;
202         struct ctl_sg_entry             sg_segs[CTLBLK_MAX_SEGS];
203         struct iovec                    xiovecs[CTLBLK_MAX_SEGS];
204         int                             bio_cmd;
205         int                             num_segs;
206         int                             num_bios_sent;
207         int                             num_bios_done;
208         int                             send_complete;
209         int                             first_error;
210         uint64_t                        first_error_offset;
211         struct bintime                  ds_t0;
212         devstat_tag_type                ds_tag_type;
213         devstat_trans_flags             ds_trans_type;
214         uint64_t                        io_len;
215         uint64_t                        io_offset;
216         int                             io_arg;
217         struct ctl_be_block_softc       *softc;
218         struct ctl_be_block_lun         *lun;
219         void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
220 };
221
222 extern struct ctl_softc *control_softc;
223
224 static int cbb_num_threads = 14;
225 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
226             "CAM Target Layer Block Backend");
227 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
228            &cbb_num_threads, 0, "Number of threads per backing file");
229
230 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
231 static void ctl_free_beio(struct ctl_be_block_io *beio);
232 static void ctl_complete_beio(struct ctl_be_block_io *beio);
233 static int ctl_be_block_move_done(union ctl_io *io);
234 static void ctl_be_block_biodone(struct bio *bio);
235 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
236                                     struct ctl_be_block_io *beio);
237 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
238                                        struct ctl_be_block_io *beio);
239 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
240                                   struct ctl_be_block_io *beio);
241 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
242                                          const char *attrname);
243 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
244                                    struct ctl_be_block_io *beio);
245 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
246                                    struct ctl_be_block_io *beio);
247 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
248                                       struct ctl_be_block_io *beio);
249 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
250                                          const char *attrname);
251 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
252                                     union ctl_io *io);
253 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
254                                     union ctl_io *io);
255 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
256                                   union ctl_io *io);
257 static void ctl_be_block_worker(void *context, int pending);
258 static int ctl_be_block_submit(union ctl_io *io);
259 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
260                                    int flag, struct thread *td);
261 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
262                                   struct ctl_lun_req *req);
263 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
264                                  struct ctl_lun_req *req);
265 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
266 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
267                              struct ctl_lun_req *req);
268 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
269                                struct ctl_lun_req *req);
270 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
271                            struct ctl_lun_req *req);
272 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
273                            struct ctl_lun_req *req);
274 static void ctl_be_block_lun_shutdown(void *be_lun);
275 static void ctl_be_block_lun_config_status(void *be_lun,
276                                            ctl_lun_config_status status);
277 static int ctl_be_block_config_write(union ctl_io *io);
278 static int ctl_be_block_config_read(union ctl_io *io);
279 static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
280 static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname);
281 static int ctl_be_block_init(void);
282 static int ctl_be_block_shutdown(void);
283
284 static struct ctl_backend_driver ctl_be_block_driver = 
285 {
286         .name = "block",
287         .flags = CTL_BE_FLAG_HAS_CONFIG,
288         .init = ctl_be_block_init,
289         .shutdown = ctl_be_block_shutdown,
290         .data_submit = ctl_be_block_submit,
291         .data_move_done = ctl_be_block_move_done,
292         .config_read = ctl_be_block_config_read,
293         .config_write = ctl_be_block_config_write,
294         .ioctl = ctl_be_block_ioctl,
295         .lun_info = ctl_be_block_lun_info,
296         .lun_attr = ctl_be_block_lun_attr
297 };
298
299 MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
300 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
301
302 static struct ctl_be_block_io *
303 ctl_alloc_beio(struct ctl_be_block_softc *softc)
304 {
305         struct ctl_be_block_io *beio;
306
307         beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO);
308         beio->softc = softc;
309         return (beio);
310 }
311
312 static void
313 ctl_free_beio(struct ctl_be_block_io *beio)
314 {
315         int duplicate_free;
316         int i;
317
318         duplicate_free = 0;
319
320         for (i = 0; i < beio->num_segs; i++) {
321                 if (beio->sg_segs[i].addr == NULL)
322                         duplicate_free++;
323
324                 uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
325                 beio->sg_segs[i].addr = NULL;
326
327                 /* For compare we had two equal S/G lists. */
328                 if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
329                         uma_zfree(beio->lun->lun_zone,
330                             beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
331                         beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
332                 }
333         }
334
335         if (duplicate_free > 0) {
336                 printf("%s: %d duplicate frees out of %d segments\n", __func__,
337                        duplicate_free, beio->num_segs);
338         }
339
340         uma_zfree(beio->softc->beio_zone, beio);
341 }
342
343 static void
344 ctl_complete_beio(struct ctl_be_block_io *beio)
345 {
346         union ctl_io *io = beio->io;
347
348         if (beio->beio_cont != NULL) {
349                 beio->beio_cont(beio);
350         } else {
351                 ctl_free_beio(beio);
352                 ctl_data_submit_done(io);
353         }
354 }
355
356 static size_t
357 cmp(uint8_t *a, uint8_t *b, size_t size)
358 {
359         size_t i;
360
361         for (i = 0; i < size; i++) {
362                 if (a[i] != b[i])
363                         break;
364         }
365         return (i);
366 }
367
368 static void
369 ctl_be_block_compare(union ctl_io *io)
370 {
371         struct ctl_be_block_io *beio;
372         uint64_t off, res;
373         int i;
374         uint8_t info[8];
375
376         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
377         off = 0;
378         for (i = 0; i < beio->num_segs; i++) {
379                 res = cmp(beio->sg_segs[i].addr,
380                     beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
381                     beio->sg_segs[i].len);
382                 off += res;
383                 if (res < beio->sg_segs[i].len)
384                         break;
385         }
386         if (i < beio->num_segs) {
387                 scsi_u64to8b(off, info);
388                 ctl_set_sense(&io->scsiio, /*current_error*/ 1,
389                     /*sense_key*/ SSD_KEY_MISCOMPARE,
390                     /*asc*/ 0x1D, /*ascq*/ 0x00,
391                     /*type*/ SSD_ELEM_INFO,
392                     /*size*/ sizeof(info), /*data*/ &info,
393                     /*type*/ SSD_ELEM_NONE);
394         } else
395                 ctl_set_success(&io->scsiio);
396 }
397
398 static int
399 ctl_be_block_move_done(union ctl_io *io)
400 {
401         struct ctl_be_block_io *beio;
402         struct ctl_be_block_lun *be_lun;
403         struct ctl_lba_len_flags *lbalen;
404 #ifdef CTL_TIME_IO
405         struct bintime cur_bt;
406 #endif
407
408         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
409         be_lun = beio->lun;
410
411         DPRINTF("entered\n");
412
413 #ifdef CTL_TIME_IO
414         getbinuptime(&cur_bt);
415         bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
416         bintime_add(&io->io_hdr.dma_bt, &cur_bt);
417 #endif
418         io->io_hdr.num_dmas++;
419         io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
420
421         /*
422          * We set status at this point for read commands, and write
423          * commands with errors.
424          */
425         if (io->io_hdr.flags & CTL_FLAG_ABORT) {
426                 ;
427         } else if ((io->io_hdr.port_status != 0) &&
428             ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
429              (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
430                 ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1,
431                     /*retry_count*/ io->io_hdr.port_status);
432         } else if (io->scsiio.kern_data_resid != 0 &&
433             (io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT &&
434             ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
435              (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
436                 ctl_set_invalid_field_ciu(&io->scsiio);
437         } else if ((io->io_hdr.port_status == 0) &&
438             ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
439                 lbalen = ARGS(beio->io);
440                 if (lbalen->flags & CTL_LLF_READ) {
441                         ctl_set_success(&io->scsiio);
442                 } else if (lbalen->flags & CTL_LLF_COMPARE) {
443                         /* We have two data blocks ready for comparison. */
444                         ctl_be_block_compare(io);
445                 }
446         }
447
448         /*
449          * If this is a read, or a write with errors, it is done.
450          */
451         if ((beio->bio_cmd == BIO_READ)
452          || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
453          || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
454                 ctl_complete_beio(beio);
455                 return (0);
456         }
457
458         /*
459          * At this point, we have a write and the DMA completed
460          * successfully.  We now have to queue it to the task queue to
461          * execute the backend I/O.  That is because we do blocking
462          * memory allocations, and in the file backing case, blocking I/O.
463          * This move done routine is generally called in the SIM's
464          * interrupt context, and therefore we cannot block.
465          */
466         mtx_lock(&be_lun->queue_lock);
467         STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
468         mtx_unlock(&be_lun->queue_lock);
469         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
470
471         return (0);
472 }
473
474 static void
475 ctl_be_block_biodone(struct bio *bio)
476 {
477         struct ctl_be_block_io *beio;
478         struct ctl_be_block_lun *be_lun;
479         union ctl_io *io;
480         int error;
481
482         beio = bio->bio_caller1;
483         be_lun = beio->lun;
484         io = beio->io;
485
486         DPRINTF("entered\n");
487
488         error = bio->bio_error;
489         mtx_lock(&be_lun->io_lock);
490         if (error != 0 &&
491             (beio->first_error == 0 ||
492              bio->bio_offset < beio->first_error_offset)) {
493                 beio->first_error = error;
494                 beio->first_error_offset = bio->bio_offset;
495         }
496
497         beio->num_bios_done++;
498
499         /*
500          * XXX KDM will this cause WITNESS to complain?  Holding a lock
501          * during the free might cause it to complain.
502          */
503         g_destroy_bio(bio);
504
505         /*
506          * If the send complete bit isn't set, or we aren't the last I/O to
507          * complete, then we're done.
508          */
509         if ((beio->send_complete == 0)
510          || (beio->num_bios_done < beio->num_bios_sent)) {
511                 mtx_unlock(&be_lun->io_lock);
512                 return;
513         }
514
515         /*
516          * At this point, we've verified that we are the last I/O to
517          * complete, so it's safe to drop the lock.
518          */
519         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
520             beio->ds_tag_type, beio->ds_trans_type,
521             /*now*/ NULL, /*then*/&beio->ds_t0);
522         mtx_unlock(&be_lun->io_lock);
523
524         /*
525          * If there are any errors from the backing device, we fail the
526          * entire I/O with a medium error.
527          */
528         error = beio->first_error;
529         if (error != 0) {
530                 if (error == EOPNOTSUPP) {
531                         ctl_set_invalid_opcode(&io->scsiio);
532                 } else if (error == ENOSPC || error == EDQUOT) {
533                         ctl_set_space_alloc_fail(&io->scsiio);
534                 } else if (error == EROFS || error == EACCES) {
535                         ctl_set_hw_write_protected(&io->scsiio);
536                 } else if (beio->bio_cmd == BIO_FLUSH) {
537                         /* XXX KDM is there is a better error here? */
538                         ctl_set_internal_failure(&io->scsiio,
539                                                  /*sks_valid*/ 1,
540                                                  /*retry_count*/ 0xbad2);
541                 } else {
542                         ctl_set_medium_error(&io->scsiio,
543                             beio->bio_cmd == BIO_READ);
544                 }
545                 ctl_complete_beio(beio);
546                 return;
547         }
548
549         /*
550          * If this is a write, a flush, a delete or verify, we're all done.
551          * If this is a read, we can now send the data to the user.
552          */
553         if ((beio->bio_cmd == BIO_WRITE)
554          || (beio->bio_cmd == BIO_FLUSH)
555          || (beio->bio_cmd == BIO_DELETE)
556          || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
557                 ctl_set_success(&io->scsiio);
558                 ctl_complete_beio(beio);
559         } else {
560                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
561                     beio->beio_cont == NULL) {
562                         ctl_set_success(&io->scsiio);
563                         ctl_serseq_done(io);
564                 }
565 #ifdef CTL_TIME_IO
566                 getbinuptime(&io->io_hdr.dma_start_bt);
567 #endif
568                 ctl_datamove(io);
569         }
570 }
571
572 static void
573 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
574                         struct ctl_be_block_io *beio)
575 {
576         union ctl_io *io = beio->io;
577         struct mount *mountpoint;
578         int error, lock_flags;
579
580         DPRINTF("entered\n");
581
582         binuptime(&beio->ds_t0);
583         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
584
585         (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
586
587         if (MNT_SHARED_WRITES(mountpoint) ||
588             ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
589                 lock_flags = LK_SHARED;
590         else
591                 lock_flags = LK_EXCLUSIVE;
592         vn_lock(be_lun->vn, lock_flags | LK_RETRY);
593         error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
594             curthread);
595         VOP_UNLOCK(be_lun->vn);
596
597         vn_finished_write(mountpoint);
598
599         mtx_lock(&be_lun->io_lock);
600         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
601             beio->ds_tag_type, beio->ds_trans_type,
602             /*now*/ NULL, /*then*/&beio->ds_t0);
603         mtx_unlock(&be_lun->io_lock);
604
605         if (error == 0)
606                 ctl_set_success(&io->scsiio);
607         else {
608                 /* XXX KDM is there is a better error here? */
609                 ctl_set_internal_failure(&io->scsiio,
610                                          /*sks_valid*/ 1,
611                                          /*retry_count*/ 0xbad1);
612         }
613
614         ctl_complete_beio(beio);
615 }
616
617 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
618 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
619 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
620 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
621
622 static void
623 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
624                            struct ctl_be_block_io *beio)
625 {
626         struct ctl_be_block_filedata *file_data;
627         union ctl_io *io;
628         struct uio xuio;
629         struct iovec *xiovec;
630         size_t s;
631         int error, flags, i;
632
633         DPRINTF("entered\n");
634
635         file_data = &be_lun->backend.file;
636         io = beio->io;
637         flags = 0;
638         if (ARGS(io)->flags & CTL_LLF_DPO)
639                 flags |= IO_DIRECT;
640         if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
641                 flags |= IO_SYNC;
642
643         bzero(&xuio, sizeof(xuio));
644         if (beio->bio_cmd == BIO_READ) {
645                 SDT_PROBE0(cbb, , read, file_start);
646                 xuio.uio_rw = UIO_READ;
647         } else {
648                 SDT_PROBE0(cbb, , write, file_start);
649                 xuio.uio_rw = UIO_WRITE;
650         }
651         xuio.uio_offset = beio->io_offset;
652         xuio.uio_resid = beio->io_len;
653         xuio.uio_segflg = UIO_SYSSPACE;
654         xuio.uio_iov = beio->xiovecs;
655         xuio.uio_iovcnt = beio->num_segs;
656         xuio.uio_td = curthread;
657
658         for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
659                 xiovec->iov_base = beio->sg_segs[i].addr;
660                 xiovec->iov_len = beio->sg_segs[i].len;
661         }
662
663         binuptime(&beio->ds_t0);
664         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
665
666         if (beio->bio_cmd == BIO_READ) {
667                 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
668
669                 /*
670                  * UFS pays attention to IO_DIRECT for reads.  If the
671                  * DIRECTIO option is configured into the kernel, it calls
672                  * ffs_rawread().  But that only works for single-segment
673                  * uios with user space addresses.  In our case, with a
674                  * kernel uio, it still reads into the buffer cache, but it
675                  * will just try to release the buffer from the cache later
676                  * on in ffs_read().
677                  *
678                  * ZFS does not pay attention to IO_DIRECT for reads.
679                  *
680                  * UFS does not pay attention to IO_SYNC for reads.
681                  *
682                  * ZFS pays attention to IO_SYNC (which translates into the
683                  * Solaris define FRSYNC for zfs_read()) for reads.  It
684                  * attempts to sync the file before reading.
685                  */
686                 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
687
688                 VOP_UNLOCK(be_lun->vn);
689                 SDT_PROBE0(cbb, , read, file_done);
690                 if (error == 0 && xuio.uio_resid > 0) {
691                         /*
692                          * If we red less then requested (EOF), then
693                          * we should clean the rest of the buffer.
694                          */
695                         s = beio->io_len - xuio.uio_resid;
696                         for (i = 0; i < beio->num_segs; i++) {
697                                 if (s >= beio->sg_segs[i].len) {
698                                         s -= beio->sg_segs[i].len;
699                                         continue;
700                                 }
701                                 bzero((uint8_t *)beio->sg_segs[i].addr + s,
702                                     beio->sg_segs[i].len - s);
703                                 s = 0;
704                         }
705                 }
706         } else {
707                 struct mount *mountpoint;
708                 int lock_flags;
709
710                 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
711
712                 if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL)
713                   && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
714                         lock_flags = LK_SHARED;
715                 else
716                         lock_flags = LK_EXCLUSIVE;
717                 vn_lock(be_lun->vn, lock_flags | LK_RETRY);
718
719                 /*
720                  * UFS pays attention to IO_DIRECT for writes.  The write
721                  * is done asynchronously.  (Normally the write would just
722                  * get put into cache.
723                  *
724                  * UFS pays attention to IO_SYNC for writes.  It will
725                  * attempt to write the buffer out synchronously if that
726                  * flag is set.
727                  *
728                  * ZFS does not pay attention to IO_DIRECT for writes.
729                  *
730                  * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
731                  * for writes.  It will flush the transaction from the
732                  * cache before returning.
733                  */
734                 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
735                 VOP_UNLOCK(be_lun->vn);
736
737                 vn_finished_write(mountpoint);
738                 SDT_PROBE0(cbb, , write, file_done);
739         }
740
741         mtx_lock(&be_lun->io_lock);
742         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
743             beio->ds_tag_type, beio->ds_trans_type,
744             /*now*/ NULL, /*then*/&beio->ds_t0);
745         mtx_unlock(&be_lun->io_lock);
746
747         /*
748          * If we got an error, set the sense data to "MEDIUM ERROR" and
749          * return the I/O to the user.
750          */
751         if (error != 0) {
752                 if (error == ENOSPC || error == EDQUOT) {
753                         ctl_set_space_alloc_fail(&io->scsiio);
754                 } else if (error == EROFS || error == EACCES) {
755                         ctl_set_hw_write_protected(&io->scsiio);
756                 } else {
757                         ctl_set_medium_error(&io->scsiio,
758                             beio->bio_cmd == BIO_READ);
759                 }
760                 ctl_complete_beio(beio);
761                 return;
762         }
763
764         /*
765          * If this is a write or a verify, we're all done.
766          * If this is a read, we can now send the data to the user.
767          */
768         if ((beio->bio_cmd == BIO_WRITE) ||
769             (ARGS(io)->flags & CTL_LLF_VERIFY)) {
770                 ctl_set_success(&io->scsiio);
771                 ctl_complete_beio(beio);
772         } else {
773                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
774                     beio->beio_cont == NULL) {
775                         ctl_set_success(&io->scsiio);
776                         ctl_serseq_done(io);
777                 }
778 #ifdef CTL_TIME_IO
779                 getbinuptime(&io->io_hdr.dma_start_bt);
780 #endif
781                 ctl_datamove(io);
782         }
783 }
784
785 static void
786 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
787                         struct ctl_be_block_io *beio)
788 {
789         union ctl_io *io = beio->io;
790         struct ctl_lba_len_flags *lbalen = ARGS(io);
791         struct scsi_get_lba_status_data *data;
792         off_t roff, off;
793         int error, status;
794
795         DPRINTF("entered\n");
796
797         off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
798         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
799         error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
800             0, curthread->td_ucred, curthread);
801         if (error == 0 && off > roff)
802                 status = 0;     /* mapped up to off */
803         else {
804                 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
805                     0, curthread->td_ucred, curthread);
806                 if (error == 0 && off > roff)
807                         status = 1;     /* deallocated up to off */
808                 else {
809                         status = 0;     /* unknown up to the end */
810                         off = be_lun->size_bytes;
811                 }
812         }
813         VOP_UNLOCK(be_lun->vn);
814
815         data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
816         scsi_u64to8b(lbalen->lba, data->descr[0].addr);
817         scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
818             lbalen->lba), data->descr[0].length);
819         data->descr[0].status = status;
820
821         ctl_complete_beio(beio);
822 }
823
824 static uint64_t
825 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
826 {
827         struct vattr            vattr;
828         struct statfs           statfs;
829         uint64_t                val;
830         int                     error;
831
832         val = UINT64_MAX;
833         if (be_lun->vn == NULL)
834                 return (val);
835         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
836         if (strcmp(attrname, "blocksused") == 0) {
837                 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
838                 if (error == 0)
839                         val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
840         }
841         if (strcmp(attrname, "blocksavail") == 0 &&
842             !VN_IS_DOOMED(be_lun->vn)) {
843                 error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
844                 if (error == 0)
845                         val = statfs.f_bavail * statfs.f_bsize /
846                             be_lun->cbe_lun.blocksize;
847         }
848         VOP_UNLOCK(be_lun->vn);
849         return (val);
850 }
851
852 static void
853 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
854                            struct ctl_be_block_io *beio)
855 {
856         union ctl_io *io;
857         struct cdevsw *csw;
858         struct cdev *dev;
859         struct uio xuio;
860         struct iovec *xiovec;
861         int error, flags, i, ref;
862
863         DPRINTF("entered\n");
864
865         io = beio->io;
866         flags = 0;
867         if (ARGS(io)->flags & CTL_LLF_DPO)
868                 flags |= IO_DIRECT;
869         if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
870                 flags |= IO_SYNC;
871
872         bzero(&xuio, sizeof(xuio));
873         if (beio->bio_cmd == BIO_READ) {
874                 SDT_PROBE0(cbb, , read, file_start);
875                 xuio.uio_rw = UIO_READ;
876         } else {
877                 SDT_PROBE0(cbb, , write, file_start);
878                 xuio.uio_rw = UIO_WRITE;
879         }
880         xuio.uio_offset = beio->io_offset;
881         xuio.uio_resid = beio->io_len;
882         xuio.uio_segflg = UIO_SYSSPACE;
883         xuio.uio_iov = beio->xiovecs;
884         xuio.uio_iovcnt = beio->num_segs;
885         xuio.uio_td = curthread;
886
887         for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
888                 xiovec->iov_base = beio->sg_segs[i].addr;
889                 xiovec->iov_len = beio->sg_segs[i].len;
890         }
891
892         binuptime(&beio->ds_t0);
893         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
894
895         csw = devvn_refthread(be_lun->vn, &dev, &ref);
896         if (csw) {
897                 if (beio->bio_cmd == BIO_READ)
898                         error = csw->d_read(dev, &xuio, flags);
899                 else
900                         error = csw->d_write(dev, &xuio, flags);
901                 dev_relthread(dev, ref);
902         } else
903                 error = ENXIO;
904
905         if (beio->bio_cmd == BIO_READ)
906                 SDT_PROBE0(cbb, , read, file_done);
907         else
908                 SDT_PROBE0(cbb, , write, file_done);
909
910         mtx_lock(&be_lun->io_lock);
911         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
912             beio->ds_tag_type, beio->ds_trans_type,
913             /*now*/ NULL, /*then*/&beio->ds_t0);
914         mtx_unlock(&be_lun->io_lock);
915
916         /*
917          * If we got an error, set the sense data to "MEDIUM ERROR" and
918          * return the I/O to the user.
919          */
920         if (error != 0) {
921                 if (error == ENOSPC || error == EDQUOT) {
922                         ctl_set_space_alloc_fail(&io->scsiio);
923                 } else if (error == EROFS || error == EACCES) {
924                         ctl_set_hw_write_protected(&io->scsiio);
925                 } else {
926                         ctl_set_medium_error(&io->scsiio,
927                             beio->bio_cmd == BIO_READ);
928                 }
929                 ctl_complete_beio(beio);
930                 return;
931         }
932
933         /*
934          * If this is a write or a verify, we're all done.
935          * If this is a read, we can now send the data to the user.
936          */
937         if ((beio->bio_cmd == BIO_WRITE) ||
938             (ARGS(io)->flags & CTL_LLF_VERIFY)) {
939                 ctl_set_success(&io->scsiio);
940                 ctl_complete_beio(beio);
941         } else {
942                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
943                     beio->beio_cont == NULL) {
944                         ctl_set_success(&io->scsiio);
945                         ctl_serseq_done(io);
946                 }
947 #ifdef CTL_TIME_IO
948                 getbinuptime(&io->io_hdr.dma_start_bt);
949 #endif
950                 ctl_datamove(io);
951         }
952 }
953
954 static void
955 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
956                         struct ctl_be_block_io *beio)
957 {
958         union ctl_io *io = beio->io;
959         struct cdevsw *csw;
960         struct cdev *dev;
961         struct ctl_lba_len_flags *lbalen = ARGS(io);
962         struct scsi_get_lba_status_data *data;
963         off_t roff, off;
964         int error, ref, status;
965
966         DPRINTF("entered\n");
967
968         csw = devvn_refthread(be_lun->vn, &dev, &ref);
969         if (csw == NULL) {
970                 status = 0;     /* unknown up to the end */
971                 off = be_lun->size_bytes;
972                 goto done;
973         }
974         off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
975         error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
976             curthread);
977         if (error == 0 && off > roff)
978                 status = 0;     /* mapped up to off */
979         else {
980                 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
981                     curthread);
982                 if (error == 0 && off > roff)
983                         status = 1;     /* deallocated up to off */
984                 else {
985                         status = 0;     /* unknown up to the end */
986                         off = be_lun->size_bytes;
987                 }
988         }
989         dev_relthread(dev, ref);
990
991 done:
992         data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
993         scsi_u64to8b(lbalen->lba, data->descr[0].addr);
994         scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
995             lbalen->lba), data->descr[0].length);
996         data->descr[0].status = status;
997
998         ctl_complete_beio(beio);
999 }
1000
1001 static void
1002 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
1003                        struct ctl_be_block_io *beio)
1004 {
1005         struct bio *bio;
1006         struct cdevsw *csw;
1007         struct cdev *dev;
1008         int ref;
1009
1010         DPRINTF("entered\n");
1011
1012         /* This can't fail, it's a blocking allocation. */
1013         bio = g_alloc_bio();
1014
1015         bio->bio_cmd        = BIO_FLUSH;
1016         bio->bio_offset     = 0;
1017         bio->bio_data       = 0;
1018         bio->bio_done       = ctl_be_block_biodone;
1019         bio->bio_caller1    = beio;
1020         bio->bio_pblkno     = 0;
1021
1022         /*
1023          * We don't need to acquire the LUN lock here, because we are only
1024          * sending one bio, and so there is no other context to synchronize
1025          * with.
1026          */
1027         beio->num_bios_sent = 1;
1028         beio->send_complete = 1;
1029
1030         binuptime(&beio->ds_t0);
1031         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1032
1033         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1034         if (csw) {
1035                 bio->bio_dev = dev;
1036                 csw->d_strategy(bio);
1037                 dev_relthread(dev, ref);
1038         } else {
1039                 bio->bio_error = ENXIO;
1040                 ctl_be_block_biodone(bio);
1041         }
1042 }
1043
1044 static void
1045 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1046                        struct ctl_be_block_io *beio,
1047                        uint64_t off, uint64_t len, int last)
1048 {
1049         struct bio *bio;
1050         uint64_t maxlen;
1051         struct cdevsw *csw;
1052         struct cdev *dev;
1053         int ref;
1054
1055         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1056         maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1057         while (len > 0) {
1058                 bio = g_alloc_bio();
1059                 bio->bio_cmd        = BIO_DELETE;
1060                 bio->bio_dev        = dev;
1061                 bio->bio_offset     = off;
1062                 bio->bio_length     = MIN(len, maxlen);
1063                 bio->bio_data       = 0;
1064                 bio->bio_done       = ctl_be_block_biodone;
1065                 bio->bio_caller1    = beio;
1066                 bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1067
1068                 off += bio->bio_length;
1069                 len -= bio->bio_length;
1070
1071                 mtx_lock(&be_lun->io_lock);
1072                 beio->num_bios_sent++;
1073                 if (last && len == 0)
1074                         beio->send_complete = 1;
1075                 mtx_unlock(&be_lun->io_lock);
1076
1077                 if (csw) {
1078                         csw->d_strategy(bio);
1079                 } else {
1080                         bio->bio_error = ENXIO;
1081                         ctl_be_block_biodone(bio);
1082                 }
1083         }
1084         if (csw)
1085                 dev_relthread(dev, ref);
1086 }
1087
1088 static void
1089 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1090                        struct ctl_be_block_io *beio)
1091 {
1092         union ctl_io *io;
1093         struct ctl_ptr_len_flags *ptrlen;
1094         struct scsi_unmap_desc *buf, *end;
1095         uint64_t len;
1096
1097         io = beio->io;
1098
1099         DPRINTF("entered\n");
1100
1101         binuptime(&beio->ds_t0);
1102         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1103
1104         if (beio->io_offset == -1) {
1105                 beio->io_len = 0;
1106                 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1107                 buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1108                 end = buf + ptrlen->len / sizeof(*buf);
1109                 for (; buf < end; buf++) {
1110                         len = (uint64_t)scsi_4btoul(buf->length) *
1111                             be_lun->cbe_lun.blocksize;
1112                         beio->io_len += len;
1113                         ctl_be_block_unmap_dev_range(be_lun, beio,
1114                             scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1115                             len, (end - buf < 2) ? TRUE : FALSE);
1116                 }
1117         } else
1118                 ctl_be_block_unmap_dev_range(be_lun, beio,
1119                     beio->io_offset, beio->io_len, TRUE);
1120 }
1121
1122 static void
1123 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1124                           struct ctl_be_block_io *beio)
1125 {
1126         TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1127         struct bio *bio;
1128         struct cdevsw *csw;
1129         struct cdev *dev;
1130         off_t cur_offset;
1131         int i, max_iosize, ref;
1132
1133         DPRINTF("entered\n");
1134         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1135
1136         /*
1137          * We have to limit our I/O size to the maximum supported by the
1138          * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
1139          * set it properly, use DFLTPHYS.
1140          */
1141         if (csw) {
1142                 max_iosize = dev->si_iosize_max;
1143                 if (max_iosize < PAGE_SIZE)
1144                         max_iosize = DFLTPHYS;
1145         } else
1146                 max_iosize = DFLTPHYS;
1147
1148         cur_offset = beio->io_offset;
1149         for (i = 0; i < beio->num_segs; i++) {
1150                 size_t cur_size;
1151                 uint8_t *cur_ptr;
1152
1153                 cur_size = beio->sg_segs[i].len;
1154                 cur_ptr = beio->sg_segs[i].addr;
1155
1156                 while (cur_size > 0) {
1157                         /* This can't fail, it's a blocking allocation. */
1158                         bio = g_alloc_bio();
1159
1160                         KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1161
1162                         bio->bio_cmd = beio->bio_cmd;
1163                         bio->bio_dev = dev;
1164                         bio->bio_caller1 = beio;
1165                         bio->bio_length = min(cur_size, max_iosize);
1166                         bio->bio_offset = cur_offset;
1167                         bio->bio_data = cur_ptr;
1168                         bio->bio_done = ctl_be_block_biodone;
1169                         bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1170
1171                         cur_offset += bio->bio_length;
1172                         cur_ptr += bio->bio_length;
1173                         cur_size -= bio->bio_length;
1174
1175                         TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1176                         beio->num_bios_sent++;
1177                 }
1178         }
1179         beio->send_complete = 1;
1180         binuptime(&beio->ds_t0);
1181         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1182
1183         /*
1184          * Fire off all allocated requests!
1185          */
1186         while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1187                 TAILQ_REMOVE(&queue, bio, bio_queue);
1188                 if (csw)
1189                         csw->d_strategy(bio);
1190                 else {
1191                         bio->bio_error = ENXIO;
1192                         ctl_be_block_biodone(bio);
1193                 }
1194         }
1195         if (csw)
1196                 dev_relthread(dev, ref);
1197 }
1198
1199 static uint64_t
1200 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1201 {
1202         struct diocgattr_arg    arg;
1203         struct cdevsw *csw;
1204         struct cdev *dev;
1205         int error, ref;
1206
1207         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1208         if (csw == NULL)
1209                 return (UINT64_MAX);
1210         strlcpy(arg.name, attrname, sizeof(arg.name));
1211         arg.len = sizeof(arg.value.off);
1212         if (csw->d_ioctl) {
1213                 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1214                     curthread);
1215         } else
1216                 error = ENODEV;
1217         dev_relthread(dev, ref);
1218         if (error != 0)
1219                 return (UINT64_MAX);
1220         return (arg.value.off);
1221 }
1222
1223 static void
1224 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1225                             union ctl_io *io)
1226 {
1227         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1228         struct ctl_be_block_io *beio;
1229         struct ctl_lba_len_flags *lbalen;
1230
1231         DPRINTF("entered\n");
1232         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1233         lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1234
1235         beio->io_len = lbalen->len * cbe_lun->blocksize;
1236         beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1237         beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1238         beio->bio_cmd = BIO_FLUSH;
1239         beio->ds_trans_type = DEVSTAT_NO_DATA;
1240         DPRINTF("SYNC\n");
1241         be_lun->lun_flush(be_lun, beio);
1242 }
1243
1244 static void
1245 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1246 {
1247         union ctl_io *io;
1248
1249         io = beio->io;
1250         ctl_free_beio(beio);
1251         if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1252             ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1253              (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1254                 ctl_config_write_done(io);
1255                 return;
1256         }
1257
1258         ctl_be_block_config_write(io);
1259 }
1260
1261 static void
1262 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1263                             union ctl_io *io)
1264 {
1265         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1266         struct ctl_be_block_io *beio;
1267         struct ctl_lba_len_flags *lbalen;
1268         uint64_t len_left, lba;
1269         uint32_t pb, pbo, adj;
1270         int i, seglen;
1271         uint8_t *buf, *end;
1272
1273         DPRINTF("entered\n");
1274
1275         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1276         lbalen = ARGS(beio->io);
1277
1278         if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1279             (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1280                 ctl_free_beio(beio);
1281                 ctl_set_invalid_field(&io->scsiio,
1282                                       /*sks_valid*/ 1,
1283                                       /*command*/ 1,
1284                                       /*field*/ 1,
1285                                       /*bit_valid*/ 0,
1286                                       /*bit*/ 0);
1287                 ctl_config_write_done(io);
1288                 return;
1289         }
1290
1291         if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1292                 beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1293                 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1294                 beio->bio_cmd = BIO_DELETE;
1295                 beio->ds_trans_type = DEVSTAT_FREE;
1296
1297                 be_lun->unmap(be_lun, beio);
1298                 return;
1299         }
1300
1301         beio->bio_cmd = BIO_WRITE;
1302         beio->ds_trans_type = DEVSTAT_WRITE;
1303
1304         DPRINTF("WRITE SAME at LBA %jx len %u\n",
1305                (uintmax_t)lbalen->lba, lbalen->len);
1306
1307         pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1308         if (be_lun->cbe_lun.pblockoff > 0)
1309                 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1310         else
1311                 pbo = 0;
1312         len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1313         for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1314
1315                 /*
1316                  * Setup the S/G entry for this chunk.
1317                  */
1318                 seglen = MIN(CTLBLK_MAX_SEG, len_left);
1319                 if (pb > cbe_lun->blocksize) {
1320                         adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1321                             seglen - pbo) % pb;
1322                         if (seglen > adj)
1323                                 seglen -= adj;
1324                         else
1325                                 seglen -= seglen % cbe_lun->blocksize;
1326                 } else
1327                         seglen -= seglen % cbe_lun->blocksize;
1328                 beio->sg_segs[i].len = seglen;
1329                 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1330
1331                 DPRINTF("segment %d addr %p len %zd\n", i,
1332                         beio->sg_segs[i].addr, beio->sg_segs[i].len);
1333
1334                 beio->num_segs++;
1335                 len_left -= seglen;
1336
1337                 buf = beio->sg_segs[i].addr;
1338                 end = buf + seglen;
1339                 for (; buf < end; buf += cbe_lun->blocksize) {
1340                         if (lbalen->flags & SWS_NDOB) {
1341                                 memset(buf, 0, cbe_lun->blocksize);
1342                         } else {
1343                                 memcpy(buf, io->scsiio.kern_data_ptr,
1344                                     cbe_lun->blocksize);
1345                         }
1346                         if (lbalen->flags & SWS_LBDATA)
1347                                 scsi_ulto4b(lbalen->lba + lba, buf);
1348                         lba++;
1349                 }
1350         }
1351
1352         beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1353         beio->io_len = lba * cbe_lun->blocksize;
1354
1355         /* We can not do all in one run. Correct and schedule rerun. */
1356         if (len_left > 0) {
1357                 lbalen->lba += lba;
1358                 lbalen->len -= lba;
1359                 beio->beio_cont = ctl_be_block_cw_done_ws;
1360         }
1361
1362         be_lun->dispatch(be_lun, beio);
1363 }
1364
1365 static void
1366 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1367                             union ctl_io *io)
1368 {
1369         struct ctl_be_block_io *beio;
1370         struct ctl_ptr_len_flags *ptrlen;
1371
1372         DPRINTF("entered\n");
1373
1374         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1375         ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1376
1377         if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1378                 ctl_free_beio(beio);
1379                 ctl_set_invalid_field(&io->scsiio,
1380                                       /*sks_valid*/ 0,
1381                                       /*command*/ 1,
1382                                       /*field*/ 0,
1383                                       /*bit_valid*/ 0,
1384                                       /*bit*/ 0);
1385                 ctl_config_write_done(io);
1386                 return;
1387         }
1388
1389         beio->io_len = 0;
1390         beio->io_offset = -1;
1391         beio->bio_cmd = BIO_DELETE;
1392         beio->ds_trans_type = DEVSTAT_FREE;
1393         DPRINTF("UNMAP\n");
1394         be_lun->unmap(be_lun, beio);
1395 }
1396
1397 static void
1398 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1399 {
1400         union ctl_io *io;
1401
1402         io = beio->io;
1403         ctl_free_beio(beio);
1404         ctl_config_read_done(io);
1405 }
1406
1407 static void
1408 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1409                          union ctl_io *io)
1410 {
1411         struct ctl_be_block_io *beio;
1412         struct ctl_be_block_softc *softc;
1413
1414         DPRINTF("entered\n");
1415
1416         softc = be_lun->softc;
1417         beio = ctl_alloc_beio(softc);
1418         beio->io = io;
1419         beio->lun = be_lun;
1420         beio->beio_cont = ctl_be_block_cr_done;
1421         PRIV(io)->ptr = (void *)beio;
1422
1423         switch (io->scsiio.cdb[0]) {
1424         case SERVICE_ACTION_IN:         /* GET LBA STATUS */
1425                 beio->bio_cmd = -1;
1426                 beio->ds_trans_type = DEVSTAT_NO_DATA;
1427                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1428                 beio->io_len = 0;
1429                 if (be_lun->get_lba_status)
1430                         be_lun->get_lba_status(be_lun, beio);
1431                 else
1432                         ctl_be_block_cr_done(beio);
1433                 break;
1434         default:
1435                 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1436                 break;
1437         }
1438 }
1439
1440 static void
1441 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1442 {
1443         union ctl_io *io;
1444
1445         io = beio->io;
1446         ctl_free_beio(beio);
1447         ctl_config_write_done(io);
1448 }
1449
1450 static void
1451 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1452                          union ctl_io *io)
1453 {
1454         struct ctl_be_block_io *beio;
1455         struct ctl_be_block_softc *softc;
1456
1457         DPRINTF("entered\n");
1458
1459         softc = be_lun->softc;
1460         beio = ctl_alloc_beio(softc);
1461         beio->io = io;
1462         beio->lun = be_lun;
1463         beio->beio_cont = ctl_be_block_cw_done;
1464         switch (io->scsiio.tag_type) {
1465         case CTL_TAG_ORDERED:
1466                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1467                 break;
1468         case CTL_TAG_HEAD_OF_QUEUE:
1469                 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1470                 break;
1471         case CTL_TAG_UNTAGGED:
1472         case CTL_TAG_SIMPLE:
1473         case CTL_TAG_ACA:
1474         default:
1475                 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1476                 break;
1477         }
1478         PRIV(io)->ptr = (void *)beio;
1479
1480         switch (io->scsiio.cdb[0]) {
1481         case SYNCHRONIZE_CACHE:
1482         case SYNCHRONIZE_CACHE_16:
1483                 ctl_be_block_cw_dispatch_sync(be_lun, io);
1484                 break;
1485         case WRITE_SAME_10:
1486         case WRITE_SAME_16:
1487                 ctl_be_block_cw_dispatch_ws(be_lun, io);
1488                 break;
1489         case UNMAP:
1490                 ctl_be_block_cw_dispatch_unmap(be_lun, io);
1491                 break;
1492         default:
1493                 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1494                 break;
1495         }
1496 }
1497
1498 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
1499 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
1500 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
1501 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
1502
1503 static void
1504 ctl_be_block_next(struct ctl_be_block_io *beio)
1505 {
1506         struct ctl_be_block_lun *be_lun;
1507         union ctl_io *io;
1508
1509         io = beio->io;
1510         be_lun = beio->lun;
1511         ctl_free_beio(beio);
1512         if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1513             ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1514              (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1515                 ctl_data_submit_done(io);
1516                 return;
1517         }
1518
1519         io->io_hdr.status &= ~CTL_STATUS_MASK;
1520         io->io_hdr.status |= CTL_STATUS_NONE;
1521
1522         mtx_lock(&be_lun->queue_lock);
1523         STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1524         mtx_unlock(&be_lun->queue_lock);
1525         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1526 }
1527
1528 static void
1529 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1530                            union ctl_io *io)
1531 {
1532         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1533         struct ctl_be_block_io *beio;
1534         struct ctl_be_block_softc *softc;
1535         struct ctl_lba_len_flags *lbalen;
1536         struct ctl_ptr_len_flags *bptrlen;
1537         uint64_t len_left, lbas;
1538         int i;
1539
1540         softc = be_lun->softc;
1541
1542         DPRINTF("entered\n");
1543
1544         lbalen = ARGS(io);
1545         if (lbalen->flags & CTL_LLF_WRITE) {
1546                 SDT_PROBE0(cbb, , write, start);
1547         } else {
1548                 SDT_PROBE0(cbb, , read, start);
1549         }
1550
1551         beio = ctl_alloc_beio(softc);
1552         beio->io = io;
1553         beio->lun = be_lun;
1554         bptrlen = PRIV(io);
1555         bptrlen->ptr = (void *)beio;
1556
1557         switch (io->scsiio.tag_type) {
1558         case CTL_TAG_ORDERED:
1559                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1560                 break;
1561         case CTL_TAG_HEAD_OF_QUEUE:
1562                 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1563                 break;
1564         case CTL_TAG_UNTAGGED:
1565         case CTL_TAG_SIMPLE:
1566         case CTL_TAG_ACA:
1567         default:
1568                 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1569                 break;
1570         }
1571
1572         if (lbalen->flags & CTL_LLF_WRITE) {
1573                 beio->bio_cmd = BIO_WRITE;
1574                 beio->ds_trans_type = DEVSTAT_WRITE;
1575         } else {
1576                 beio->bio_cmd = BIO_READ;
1577                 beio->ds_trans_type = DEVSTAT_READ;
1578         }
1579
1580         DPRINTF("%s at LBA %jx len %u @%ju\n",
1581                (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1582                (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1583         if (lbalen->flags & CTL_LLF_COMPARE)
1584                 lbas = CTLBLK_HALF_IO_SIZE;
1585         else
1586                 lbas = CTLBLK_MAX_IO_SIZE;
1587         lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1588         beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1589         beio->io_len = lbas * cbe_lun->blocksize;
1590         bptrlen->len += lbas;
1591
1592         for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1593                 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1594                     i, CTLBLK_MAX_SEGS));
1595
1596                 /*
1597                  * Setup the S/G entry for this chunk.
1598                  */
1599                 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1600                 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1601
1602                 DPRINTF("segment %d addr %p len %zd\n", i,
1603                         beio->sg_segs[i].addr, beio->sg_segs[i].len);
1604
1605                 /* Set up second segment for compare operation. */
1606                 if (lbalen->flags & CTL_LLF_COMPARE) {
1607                         beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1608                             beio->sg_segs[i].len;
1609                         beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1610                             uma_zalloc(be_lun->lun_zone, M_WAITOK);
1611                 }
1612
1613                 beio->num_segs++;
1614                 len_left -= beio->sg_segs[i].len;
1615         }
1616         if (bptrlen->len < lbalen->len)
1617                 beio->beio_cont = ctl_be_block_next;
1618         io->scsiio.be_move_done = ctl_be_block_move_done;
1619         /* For compare we have separate S/G lists for read and datamove. */
1620         if (lbalen->flags & CTL_LLF_COMPARE)
1621                 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1622         else
1623                 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1624         io->scsiio.kern_data_len = beio->io_len;
1625         io->scsiio.kern_sg_entries = beio->num_segs;
1626         io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
1627
1628         /*
1629          * For the read case, we need to read the data into our buffers and
1630          * then we can send it back to the user.  For the write case, we
1631          * need to get the data from the user first.
1632          */
1633         if (beio->bio_cmd == BIO_READ) {
1634                 SDT_PROBE0(cbb, , read, alloc_done);
1635                 be_lun->dispatch(be_lun, beio);
1636         } else {
1637                 SDT_PROBE0(cbb, , write, alloc_done);
1638 #ifdef CTL_TIME_IO
1639                 getbinuptime(&io->io_hdr.dma_start_bt);
1640 #endif
1641                 ctl_datamove(io);
1642         }
1643 }
1644
1645 static void
1646 ctl_be_block_worker(void *context, int pending)
1647 {
1648         struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1649         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1650         union ctl_io *io;
1651         struct ctl_be_block_io *beio;
1652
1653         DPRINTF("entered\n");
1654         /*
1655          * Fetch and process I/Os from all queues.  If we detect LUN
1656          * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
1657          * so make response maximally opaque to not confuse initiator.
1658          */
1659         for (;;) {
1660                 mtx_lock(&be_lun->queue_lock);
1661                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1662                 if (io != NULL) {
1663                         DPRINTF("datamove queue\n");
1664                         STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1665                                       ctl_io_hdr, links);
1666                         mtx_unlock(&be_lun->queue_lock);
1667                         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1668                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1669                                 ctl_set_busy(&io->scsiio);
1670                                 ctl_complete_beio(beio);
1671                                 return;
1672                         }
1673                         be_lun->dispatch(be_lun, beio);
1674                         continue;
1675                 }
1676                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1677                 if (io != NULL) {
1678                         DPRINTF("config write queue\n");
1679                         STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1680                                       ctl_io_hdr, links);
1681                         mtx_unlock(&be_lun->queue_lock);
1682                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1683                                 ctl_set_busy(&io->scsiio);
1684                                 ctl_config_write_done(io);
1685                                 return;
1686                         }
1687                         ctl_be_block_cw_dispatch(be_lun, io);
1688                         continue;
1689                 }
1690                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1691                 if (io != NULL) {
1692                         DPRINTF("config read queue\n");
1693                         STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr,
1694                                       ctl_io_hdr, links);
1695                         mtx_unlock(&be_lun->queue_lock);
1696                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1697                                 ctl_set_busy(&io->scsiio);
1698                                 ctl_config_read_done(io);
1699                                 return;
1700                         }
1701                         ctl_be_block_cr_dispatch(be_lun, io);
1702                         continue;
1703                 }
1704                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1705                 if (io != NULL) {
1706                         DPRINTF("input queue\n");
1707                         STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1708                                       ctl_io_hdr, links);
1709                         mtx_unlock(&be_lun->queue_lock);
1710                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1711                                 ctl_set_busy(&io->scsiio);
1712                                 ctl_data_submit_done(io);
1713                                 return;
1714                         }
1715                         ctl_be_block_dispatch(be_lun, io);
1716                         continue;
1717                 }
1718
1719                 /*
1720                  * If we get here, there is no work left in the queues, so
1721                  * just break out and let the task queue go to sleep.
1722                  */
1723                 mtx_unlock(&be_lun->queue_lock);
1724                 break;
1725         }
1726 }
1727
1728 /*
1729  * Entry point from CTL to the backend for I/O.  We queue everything to a
1730  * work thread, so this just puts the I/O on a queue and wakes up the
1731  * thread.
1732  */
1733 static int
1734 ctl_be_block_submit(union ctl_io *io)
1735 {
1736         struct ctl_be_block_lun *be_lun;
1737         struct ctl_be_lun *cbe_lun;
1738
1739         DPRINTF("entered\n");
1740
1741         cbe_lun = CTL_BACKEND_LUN(io);
1742         be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
1743
1744         /*
1745          * Make sure we only get SCSI I/O.
1746          */
1747         KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1748                 "%#x) encountered", io->io_hdr.io_type));
1749
1750         PRIV(io)->len = 0;
1751
1752         mtx_lock(&be_lun->queue_lock);
1753         STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1754         mtx_unlock(&be_lun->queue_lock);
1755         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1756
1757         return (CTL_RETVAL_COMPLETE);
1758 }
1759
1760 static int
1761 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1762                         int flag, struct thread *td)
1763 {
1764         struct ctl_be_block_softc *softc;
1765         int error;
1766
1767         softc = &backend_block_softc;
1768
1769         error = 0;
1770
1771         switch (cmd) {
1772         case CTL_LUN_REQ: {
1773                 struct ctl_lun_req *lun_req;
1774
1775                 lun_req = (struct ctl_lun_req *)addr;
1776
1777                 switch (lun_req->reqtype) {
1778                 case CTL_LUNREQ_CREATE:
1779                         error = ctl_be_block_create(softc, lun_req);
1780                         break;
1781                 case CTL_LUNREQ_RM:
1782                         error = ctl_be_block_rm(softc, lun_req);
1783                         break;
1784                 case CTL_LUNREQ_MODIFY:
1785                         error = ctl_be_block_modify(softc, lun_req);
1786                         break;
1787                 default:
1788                         lun_req->status = CTL_LUN_ERROR;
1789                         snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1790                                  "invalid LUN request type %d",
1791                                  lun_req->reqtype);
1792                         break;
1793                 }
1794                 break;
1795         }
1796         default:
1797                 error = ENOTTY;
1798                 break;
1799         }
1800
1801         return (error);
1802 }
1803
1804 static int
1805 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1806 {
1807         struct ctl_be_lun *cbe_lun;
1808         struct ctl_be_block_filedata *file_data;
1809         struct ctl_lun_create_params *params;
1810         const char                   *value;
1811         struct vattr                  vattr;
1812         off_t                         ps, pss, po, pos, us, uss, uo, uos;
1813         int                           error;
1814
1815         cbe_lun = &be_lun->cbe_lun;
1816         file_data = &be_lun->backend.file;
1817         params = &be_lun->params;
1818
1819         be_lun->dev_type = CTL_BE_BLOCK_FILE;
1820         be_lun->dispatch = ctl_be_block_dispatch_file;
1821         be_lun->lun_flush = ctl_be_block_flush_file;
1822         be_lun->get_lba_status = ctl_be_block_gls_file;
1823         be_lun->getattr = ctl_be_block_getattr_file;
1824         be_lun->unmap = NULL;
1825         cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1826
1827         error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1828         if (error != 0) {
1829                 snprintf(req->error_str, sizeof(req->error_str),
1830                          "error calling VOP_GETATTR() for file %s",
1831                          be_lun->dev_path);
1832                 return (error);
1833         }
1834
1835         file_data->cred = crhold(curthread->td_ucred);
1836         if (params->lun_size_bytes != 0)
1837                 be_lun->size_bytes = params->lun_size_bytes;
1838         else
1839                 be_lun->size_bytes = vattr.va_size;
1840
1841         /*
1842          * For files we can use any logical block size.  Prefer 512 bytes
1843          * for compatibility reasons.  If file's vattr.va_blocksize
1844          * (preferred I/O block size) is bigger and multiple to chosen
1845          * logical block size -- report it as physical block size.
1846          */
1847         if (params->blocksize_bytes != 0)
1848                 cbe_lun->blocksize = params->blocksize_bytes;
1849         else if (cbe_lun->lun_type == T_CDROM)
1850                 cbe_lun->blocksize = 2048;
1851         else
1852                 cbe_lun->blocksize = 512;
1853         be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1854         cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1855             0 : (be_lun->size_blocks - 1);
1856
1857         us = ps = vattr.va_blocksize;
1858         uo = po = 0;
1859
1860         value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
1861         if (value != NULL)
1862                 ctl_expand_number(value, &ps);
1863         value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
1864         if (value != NULL)
1865                 ctl_expand_number(value, &po);
1866         pss = ps / cbe_lun->blocksize;
1867         pos = po / cbe_lun->blocksize;
1868         if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1869             ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1870                 cbe_lun->pblockexp = fls(pss) - 1;
1871                 cbe_lun->pblockoff = (pss - pos) % pss;
1872         }
1873
1874         value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
1875         if (value != NULL)
1876                 ctl_expand_number(value, &us);
1877         value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
1878         if (value != NULL)
1879                 ctl_expand_number(value, &uo);
1880         uss = us / cbe_lun->blocksize;
1881         uos = uo / cbe_lun->blocksize;
1882         if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1883             ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1884                 cbe_lun->ublockexp = fls(uss) - 1;
1885                 cbe_lun->ublockoff = (uss - uos) % uss;
1886         }
1887
1888         /*
1889          * Sanity check.  The media size has to be at least one
1890          * sector long.
1891          */
1892         if (be_lun->size_bytes < cbe_lun->blocksize) {
1893                 error = EINVAL;
1894                 snprintf(req->error_str, sizeof(req->error_str),
1895                          "file %s size %ju < block size %u", be_lun->dev_path,
1896                          (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1897         }
1898
1899         cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1900         return (error);
1901 }
1902
1903 static int
1904 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1905 {
1906         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1907         struct ctl_lun_create_params *params;
1908         struct cdevsw                *csw;
1909         struct cdev                  *dev;
1910         const char                   *value;
1911         int                           error, atomic, maxio, ref, unmap, tmp;
1912         off_t                         ps, pss, po, pos, us, uss, uo, uos, otmp;
1913
1914         params = &be_lun->params;
1915
1916         be_lun->dev_type = CTL_BE_BLOCK_DEV;
1917         csw = devvn_refthread(be_lun->vn, &dev, &ref);
1918         if (csw == NULL)
1919                 return (ENXIO);
1920         if (strcmp(csw->d_name, "zvol") == 0) {
1921                 be_lun->dispatch = ctl_be_block_dispatch_zvol;
1922                 be_lun->get_lba_status = ctl_be_block_gls_zvol;
1923                 atomic = maxio = CTLBLK_MAX_IO_SIZE;
1924         } else {
1925                 be_lun->dispatch = ctl_be_block_dispatch_dev;
1926                 be_lun->get_lba_status = NULL;
1927                 atomic = 0;
1928                 maxio = dev->si_iosize_max;
1929                 if (maxio <= 0)
1930                         maxio = DFLTPHYS;
1931                 if (maxio > CTLBLK_MAX_IO_SIZE)
1932                         maxio = CTLBLK_MAX_IO_SIZE;
1933         }
1934         be_lun->lun_flush = ctl_be_block_flush_dev;
1935         be_lun->getattr = ctl_be_block_getattr_dev;
1936         be_lun->unmap = ctl_be_block_unmap_dev;
1937
1938         if (!csw->d_ioctl) {
1939                 dev_relthread(dev, ref);
1940                 snprintf(req->error_str, sizeof(req->error_str),
1941                          "no d_ioctl for device %s!", be_lun->dev_path);
1942                 return (ENODEV);
1943         }
1944
1945         error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
1946                                curthread);
1947         if (error) {
1948                 dev_relthread(dev, ref);
1949                 snprintf(req->error_str, sizeof(req->error_str),
1950                          "error %d returned for DIOCGSECTORSIZE ioctl "
1951                          "on %s!", error, be_lun->dev_path);
1952                 return (error);
1953         }
1954
1955         /*
1956          * If the user has asked for a blocksize that is greater than the
1957          * backing device's blocksize, we can do it only if the blocksize
1958          * the user is asking for is an even multiple of the underlying 
1959          * device's blocksize.
1960          */
1961         if ((params->blocksize_bytes != 0) &&
1962             (params->blocksize_bytes >= tmp)) {
1963                 if (params->blocksize_bytes % tmp == 0) {
1964                         cbe_lun->blocksize = params->blocksize_bytes;
1965                 } else {
1966                         dev_relthread(dev, ref);
1967                         snprintf(req->error_str, sizeof(req->error_str),
1968                                  "requested blocksize %u is not an even "
1969                                  "multiple of backing device blocksize %u",
1970                                  params->blocksize_bytes, tmp);
1971                         return (EINVAL);
1972                 }
1973         } else if (params->blocksize_bytes != 0) {
1974                 dev_relthread(dev, ref);
1975                 snprintf(req->error_str, sizeof(req->error_str),
1976                          "requested blocksize %u < backing device "
1977                          "blocksize %u", params->blocksize_bytes, tmp);
1978                 return (EINVAL);
1979         } else if (cbe_lun->lun_type == T_CDROM)
1980                 cbe_lun->blocksize = MAX(tmp, 2048);
1981         else
1982                 cbe_lun->blocksize = tmp;
1983
1984         error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
1985                              curthread);
1986         if (error) {
1987                 dev_relthread(dev, ref);
1988                 snprintf(req->error_str, sizeof(req->error_str),
1989                          "error %d returned for DIOCGMEDIASIZE "
1990                          " ioctl on %s!", error,
1991                          be_lun->dev_path);
1992                 return (error);
1993         }
1994
1995         if (params->lun_size_bytes != 0) {
1996                 if (params->lun_size_bytes > otmp) {
1997                         dev_relthread(dev, ref);
1998                         snprintf(req->error_str, sizeof(req->error_str),
1999                                  "requested LUN size %ju > backing device "
2000                                  "size %ju",
2001                                  (uintmax_t)params->lun_size_bytes,
2002                                  (uintmax_t)otmp);
2003                         return (EINVAL);
2004                 }
2005
2006                 be_lun->size_bytes = params->lun_size_bytes;
2007         } else
2008                 be_lun->size_bytes = otmp;
2009         be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2010         cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2011             0 : (be_lun->size_blocks - 1);
2012
2013         error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2014             curthread);
2015         if (error)
2016                 ps = po = 0;
2017         else {
2018                 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2019                     FREAD, curthread);
2020                 if (error)
2021                         po = 0;
2022         }
2023         us = ps;
2024         uo = po;
2025
2026         value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
2027         if (value != NULL)
2028                 ctl_expand_number(value, &ps);
2029         value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
2030         if (value != NULL)
2031                 ctl_expand_number(value, &po);
2032         pss = ps / cbe_lun->blocksize;
2033         pos = po / cbe_lun->blocksize;
2034         if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2035             ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2036                 cbe_lun->pblockexp = fls(pss) - 1;
2037                 cbe_lun->pblockoff = (pss - pos) % pss;
2038         }
2039
2040         value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
2041         if (value != NULL)
2042                 ctl_expand_number(value, &us);
2043         value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
2044         if (value != NULL)
2045                 ctl_expand_number(value, &uo);
2046         uss = us / cbe_lun->blocksize;
2047         uos = uo / cbe_lun->blocksize;
2048         if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2049             ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2050                 cbe_lun->ublockexp = fls(uss) - 1;
2051                 cbe_lun->ublockoff = (uss - uos) % uss;
2052         }
2053
2054         cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2055         cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2056
2057         if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2058                 unmap = 1;
2059         } else {
2060                 struct diocgattr_arg    arg;
2061
2062                 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2063                 arg.len = sizeof(arg.value.i);
2064                 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2065                     curthread);
2066                 unmap = (error == 0) ? arg.value.i : 0;
2067         }
2068         value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
2069         if (value != NULL)
2070                 unmap = (strcmp(value, "on") == 0);
2071         if (unmap)
2072                 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2073         else
2074                 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2075
2076         dev_relthread(dev, ref);
2077         return (0);
2078 }
2079
2080 static int
2081 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2082 {
2083         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2084         int flags;
2085
2086         if (be_lun->vn) {
2087                 flags = FREAD;
2088                 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2089                         flags |= FWRITE;
2090                 (void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2091                 be_lun->vn = NULL;
2092
2093                 switch (be_lun->dev_type) {
2094                 case CTL_BE_BLOCK_DEV:
2095                         break;
2096                 case CTL_BE_BLOCK_FILE:
2097                         if (be_lun->backend.file.cred != NULL) {
2098                                 crfree(be_lun->backend.file.cred);
2099                                 be_lun->backend.file.cred = NULL;
2100                         }
2101                         break;
2102                 case CTL_BE_BLOCK_NONE:
2103                         break;
2104                 default:
2105                         panic("Unexpected backend type %d", be_lun->dev_type);
2106                         break;
2107                 }
2108                 be_lun->dev_type = CTL_BE_BLOCK_NONE;
2109         }
2110         return (0);
2111 }
2112
2113 static int
2114 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2115 {
2116         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2117         struct nameidata nd;
2118         const char      *value;
2119         int              error, flags;
2120
2121         error = 0;
2122         if (rootvnode == NULL) {
2123                 snprintf(req->error_str, sizeof(req->error_str),
2124                          "Root filesystem is not mounted");
2125                 return (1);
2126         }
2127         pwd_ensure_dirs();
2128
2129         value = dnvlist_get_string(cbe_lun->options, "file", NULL);
2130         if (value == NULL) {
2131                 snprintf(req->error_str, sizeof(req->error_str),
2132                          "no file argument specified");
2133                 return (1);
2134         }
2135         free(be_lun->dev_path, M_CTLBLK);
2136         be_lun->dev_path = strdup(value, M_CTLBLK);
2137
2138         flags = FREAD;
2139         value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
2140         if (value != NULL) {
2141                 if (strcmp(value, "on") != 0)
2142                         flags |= FWRITE;
2143         } else if (cbe_lun->lun_type == T_DIRECT)
2144                 flags |= FWRITE;
2145
2146 again:
2147         NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
2148         error = vn_open(&nd, &flags, 0, NULL);
2149         if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2150                 flags &= ~FWRITE;
2151                 goto again;
2152         }
2153         if (error) {
2154                 /*
2155                  * This is the only reasonable guess we can make as far as
2156                  * path if the user doesn't give us a fully qualified path.
2157                  * If they want to specify a file, they need to specify the
2158                  * full path.
2159                  */
2160                 if (be_lun->dev_path[0] != '/') {
2161                         char *dev_name;
2162
2163                         asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2164                                 be_lun->dev_path);
2165                         free(be_lun->dev_path, M_CTLBLK);
2166                         be_lun->dev_path = dev_name;
2167                         goto again;
2168                 }
2169                 snprintf(req->error_str, sizeof(req->error_str),
2170                     "error opening %s: %d", be_lun->dev_path, error);
2171                 return (error);
2172         }
2173         if (flags & FWRITE)
2174                 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2175         else
2176                 cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2177
2178         NDFREE(&nd, NDF_ONLY_PNBUF);
2179         be_lun->vn = nd.ni_vp;
2180
2181         /* We only support disks and files. */
2182         if (vn_isdisk(be_lun->vn, &error)) {
2183                 error = ctl_be_block_open_dev(be_lun, req);
2184         } else if (be_lun->vn->v_type == VREG) {
2185                 error = ctl_be_block_open_file(be_lun, req);
2186         } else {
2187                 error = EINVAL;
2188                 snprintf(req->error_str, sizeof(req->error_str),
2189                          "%s is not a disk or plain file", be_lun->dev_path);
2190         }
2191         VOP_UNLOCK(be_lun->vn);
2192
2193         if (error != 0)
2194                 ctl_be_block_close(be_lun);
2195         cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2196         if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2197                 cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2198         value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
2199         if (value != NULL && strcmp(value, "on") == 0)
2200                 cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2201         else if (value != NULL && strcmp(value, "read") == 0)
2202                 cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2203         else if (value != NULL && strcmp(value, "off") == 0)
2204                 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2205         return (0);
2206 }
2207
2208 static int
2209 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2210 {
2211         struct ctl_be_lun *cbe_lun;
2212         struct ctl_be_block_lun *be_lun;
2213         struct ctl_lun_create_params *params;
2214         char num_thread_str[16];
2215         char tmpstr[32];
2216         const char *value;
2217         int retval, num_threads;
2218         int tmp_num_threads;
2219
2220         params = &req->reqdata.create;
2221         retval = 0;
2222         req->status = CTL_LUN_OK;
2223
2224         be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2225         cbe_lun = &be_lun->cbe_lun;
2226         cbe_lun->be_lun = be_lun;
2227         be_lun->params = req->reqdata.create;
2228         be_lun->softc = softc;
2229         STAILQ_INIT(&be_lun->input_queue);
2230         STAILQ_INIT(&be_lun->config_read_queue);
2231         STAILQ_INIT(&be_lun->config_write_queue);
2232         STAILQ_INIT(&be_lun->datamove_queue);
2233         sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
2234         mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
2235         mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
2236         cbe_lun->options = nvlist_clone(req->args_nvl);
2237         be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
2238             NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2239         if (be_lun->lun_zone == NULL) {
2240                 snprintf(req->error_str, sizeof(req->error_str),
2241                          "error allocating UMA zone");
2242                 goto bailout_error;
2243         }
2244
2245         if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2246                 cbe_lun->lun_type = params->device_type;
2247         else
2248                 cbe_lun->lun_type = T_DIRECT;
2249         be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
2250         cbe_lun->flags = 0;
2251         value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2252         if (value != NULL) {
2253                 if (strcmp(value, "primary") == 0)
2254                         cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2255         } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2256                 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2257
2258         if (cbe_lun->lun_type == T_DIRECT ||
2259             cbe_lun->lun_type == T_CDROM) {
2260                 be_lun->size_bytes = params->lun_size_bytes;
2261                 if (params->blocksize_bytes != 0)
2262                         cbe_lun->blocksize = params->blocksize_bytes;
2263                 else if (cbe_lun->lun_type == T_CDROM)
2264                         cbe_lun->blocksize = 2048;
2265                 else
2266                         cbe_lun->blocksize = 512;
2267                 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2268                 cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2269                     0 : (be_lun->size_blocks - 1);
2270
2271                 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2272                     control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2273                         retval = ctl_be_block_open(be_lun, req);
2274                         if (retval != 0) {
2275                                 retval = 0;
2276                                 req->status = CTL_LUN_WARNING;
2277                         }
2278                 }
2279                 num_threads = cbb_num_threads;
2280         } else {
2281                 num_threads = 1;
2282         }
2283
2284         value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL);
2285         if (value != NULL) {
2286                 tmp_num_threads = strtol(value, NULL, 0);
2287
2288                 /*
2289                  * We don't let the user specify less than one
2290                  * thread, but hope he's clueful enough not to
2291                  * specify 1000 threads.
2292                  */
2293                 if (tmp_num_threads < 1) {
2294                         snprintf(req->error_str, sizeof(req->error_str),
2295                                  "invalid number of threads %s",
2296                                  num_thread_str);
2297                         goto bailout_error;
2298                 }
2299                 num_threads = tmp_num_threads;
2300         }
2301
2302         if (be_lun->vn == NULL)
2303                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2304         /* Tell the user the blocksize we ended up using */
2305         params->lun_size_bytes = be_lun->size_bytes;
2306         params->blocksize_bytes = cbe_lun->blocksize;
2307         if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2308                 cbe_lun->req_lun_id = params->req_lun_id;
2309                 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2310         } else
2311                 cbe_lun->req_lun_id = 0;
2312
2313         cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2314         cbe_lun->lun_config_status = ctl_be_block_lun_config_status;
2315         cbe_lun->be = &ctl_be_block_driver;
2316
2317         if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2318                 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
2319                          softc->num_luns);
2320                 strncpy((char *)cbe_lun->serial_num, tmpstr,
2321                         MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2322
2323                 /* Tell the user what we used for a serial number */
2324                 strncpy((char *)params->serial_num, tmpstr,
2325                         MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2326         } else { 
2327                 strncpy((char *)cbe_lun->serial_num, params->serial_num,
2328                         MIN(sizeof(cbe_lun->serial_num),
2329                         sizeof(params->serial_num)));
2330         }
2331         if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2332                 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
2333                 strncpy((char *)cbe_lun->device_id, tmpstr,
2334                         MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2335
2336                 /* Tell the user what we used for a device ID */
2337                 strncpy((char *)params->device_id, tmpstr,
2338                         MIN(sizeof(params->device_id), sizeof(tmpstr)));
2339         } else {
2340                 strncpy((char *)cbe_lun->device_id, params->device_id,
2341                         MIN(sizeof(cbe_lun->device_id),
2342                             sizeof(params->device_id)));
2343         }
2344
2345         TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2346
2347         be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2348             taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2349
2350         if (be_lun->io_taskqueue == NULL) {
2351                 snprintf(req->error_str, sizeof(req->error_str),
2352                          "unable to create taskqueue");
2353                 goto bailout_error;
2354         }
2355
2356         /*
2357          * Note that we start the same number of threads by default for
2358          * both the file case and the block device case.  For the file
2359          * case, we need multiple threads to allow concurrency, because the
2360          * vnode interface is designed to be a blocking interface.  For the
2361          * block device case, ZFS zvols at least will block the caller's
2362          * context in many instances, and so we need multiple threads to
2363          * overcome that problem.  Other block devices don't need as many
2364          * threads, but they shouldn't cause too many problems.
2365          *
2366          * If the user wants to just have a single thread for a block
2367          * device, he can specify that when the LUN is created, or change
2368          * the tunable/sysctl to alter the default number of threads.
2369          */
2370         retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue,
2371                                          /*num threads*/num_threads,
2372                                          /*priority*/PUSER,
2373                                          /*proc*/control_softc->ctl_proc,
2374                                          /*thread name*/
2375                                          "%s taskq", be_lun->lunname);
2376
2377         if (retval != 0)
2378                 goto bailout_error;
2379
2380         be_lun->num_threads = num_threads;
2381
2382         mtx_lock(&softc->lock);
2383         softc->num_luns++;
2384         STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2385
2386         mtx_unlock(&softc->lock);
2387
2388         retval = ctl_add_lun(&be_lun->cbe_lun);
2389         if (retval != 0) {
2390                 mtx_lock(&softc->lock);
2391                 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2392                               links);
2393                 softc->num_luns--;
2394                 mtx_unlock(&softc->lock);
2395                 snprintf(req->error_str, sizeof(req->error_str),
2396                          "ctl_add_lun() returned error %d, see dmesg for "
2397                          "details", retval);
2398                 retval = 0;
2399                 goto bailout_error;
2400         }
2401
2402         mtx_lock(&softc->lock);
2403
2404         /*
2405          * Tell the config_status routine that we're waiting so it won't
2406          * clean up the LUN in the event of an error.
2407          */
2408         be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2409
2410         while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2411                 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2412                 if (retval == EINTR)
2413                         break;
2414         }
2415         be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2416
2417         if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2418                 snprintf(req->error_str, sizeof(req->error_str),
2419                          "LUN configuration error, see dmesg for details");
2420                 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2421                               links);
2422                 softc->num_luns--;
2423                 mtx_unlock(&softc->lock);
2424                 goto bailout_error;
2425         } else {
2426                 params->req_lun_id = cbe_lun->lun_id;
2427         }
2428
2429         mtx_unlock(&softc->lock);
2430
2431         be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2432                                                cbe_lun->blocksize,
2433                                                DEVSTAT_ALL_SUPPORTED,
2434                                                cbe_lun->lun_type
2435                                                | DEVSTAT_TYPE_IF_OTHER,
2436                                                DEVSTAT_PRIORITY_OTHER);
2437
2438         return (retval);
2439
2440 bailout_error:
2441         req->status = CTL_LUN_ERROR;
2442
2443         if (be_lun->io_taskqueue != NULL)
2444                 taskqueue_free(be_lun->io_taskqueue);
2445         ctl_be_block_close(be_lun);
2446         if (be_lun->dev_path != NULL)
2447                 free(be_lun->dev_path, M_CTLBLK);
2448         if (be_lun->lun_zone != NULL)
2449                 uma_zdestroy(be_lun->lun_zone);
2450         nvlist_destroy(cbe_lun->options);
2451         mtx_destroy(&be_lun->queue_lock);
2452         mtx_destroy(&be_lun->io_lock);
2453         free(be_lun, M_CTLBLK);
2454
2455         return (retval);
2456 }
2457
2458 static int
2459 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2460 {
2461         struct ctl_lun_rm_params *params;
2462         struct ctl_be_block_lun *be_lun;
2463         struct ctl_be_lun *cbe_lun;
2464         int retval;
2465
2466         params = &req->reqdata.rm;
2467
2468         mtx_lock(&softc->lock);
2469         STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2470                 if (be_lun->cbe_lun.lun_id == params->lun_id)
2471                         break;
2472         }
2473         mtx_unlock(&softc->lock);
2474         if (be_lun == NULL) {
2475                 snprintf(req->error_str, sizeof(req->error_str),
2476                          "LUN %u is not managed by the block backend",
2477                          params->lun_id);
2478                 goto bailout_error;
2479         }
2480         cbe_lun = &be_lun->cbe_lun;
2481
2482         retval = ctl_disable_lun(cbe_lun);
2483         if (retval != 0) {
2484                 snprintf(req->error_str, sizeof(req->error_str),
2485                          "error %d returned from ctl_disable_lun() for "
2486                          "LUN %d", retval, params->lun_id);
2487                 goto bailout_error;
2488         }
2489
2490         if (be_lun->vn != NULL) {
2491                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2492                 ctl_lun_no_media(cbe_lun);
2493                 taskqueue_drain_all(be_lun->io_taskqueue);
2494                 ctl_be_block_close(be_lun);
2495         }
2496
2497         retval = ctl_invalidate_lun(cbe_lun);
2498         if (retval != 0) {
2499                 snprintf(req->error_str, sizeof(req->error_str),
2500                          "error %d returned from ctl_invalidate_lun() for "
2501                          "LUN %d", retval, params->lun_id);
2502                 goto bailout_error;
2503         }
2504
2505         mtx_lock(&softc->lock);
2506         be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2507         while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2508                 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2509                 if (retval == EINTR)
2510                         break;
2511         }
2512         be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2513
2514         if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2515                 snprintf(req->error_str, sizeof(req->error_str),
2516                          "interrupted waiting for LUN to be freed");
2517                 mtx_unlock(&softc->lock);
2518                 goto bailout_error;
2519         }
2520
2521         STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2522
2523         softc->num_luns--;
2524         mtx_unlock(&softc->lock);
2525
2526         taskqueue_drain_all(be_lun->io_taskqueue);
2527         taskqueue_free(be_lun->io_taskqueue);
2528
2529         if (be_lun->disk_stats != NULL)
2530                 devstat_remove_entry(be_lun->disk_stats);
2531
2532         uma_zdestroy(be_lun->lun_zone);
2533
2534         nvlist_destroy(cbe_lun->options);
2535         free(be_lun->dev_path, M_CTLBLK);
2536         mtx_destroy(&be_lun->queue_lock);
2537         mtx_destroy(&be_lun->io_lock);
2538         free(be_lun, M_CTLBLK);
2539
2540         req->status = CTL_LUN_OK;
2541         return (0);
2542
2543 bailout_error:
2544         req->status = CTL_LUN_ERROR;
2545         return (0);
2546 }
2547
2548 static int
2549 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2550 {
2551         struct ctl_lun_modify_params *params;
2552         struct ctl_be_block_lun *be_lun;
2553         struct ctl_be_lun *cbe_lun;
2554         const char *value;
2555         uint64_t oldsize;
2556         int error, wasprim;
2557
2558         params = &req->reqdata.modify;
2559
2560         mtx_lock(&softc->lock);
2561         STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2562                 if (be_lun->cbe_lun.lun_id == params->lun_id)
2563                         break;
2564         }
2565         mtx_unlock(&softc->lock);
2566         if (be_lun == NULL) {
2567                 snprintf(req->error_str, sizeof(req->error_str),
2568                          "LUN %u is not managed by the block backend",
2569                          params->lun_id);
2570                 goto bailout_error;
2571         }
2572         cbe_lun = &be_lun->cbe_lun;
2573
2574         if (params->lun_size_bytes != 0)
2575                 be_lun->params.lun_size_bytes = params->lun_size_bytes;
2576
2577         nvlist_destroy(cbe_lun->options);
2578         cbe_lun->options = nvlist_clone(req->args_nvl);
2579
2580         wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2581         value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2582         if (value != NULL) {
2583                 if (strcmp(value, "primary") == 0)
2584                         cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2585                 else
2586                         cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2587         } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2588                 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2589         else
2590                 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2591         if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2592                 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2593                         ctl_lun_primary(cbe_lun);
2594                 else
2595                         ctl_lun_secondary(cbe_lun);
2596         }
2597
2598         oldsize = be_lun->size_blocks;
2599         if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2600             control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2601                 if (be_lun->vn == NULL)
2602                         error = ctl_be_block_open(be_lun, req);
2603                 else if (vn_isdisk(be_lun->vn, &error))
2604                         error = ctl_be_block_open_dev(be_lun, req);
2605                 else if (be_lun->vn->v_type == VREG) {
2606                         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2607                         error = ctl_be_block_open_file(be_lun, req);
2608                         VOP_UNLOCK(be_lun->vn);
2609                 } else
2610                         error = EINVAL;
2611                 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
2612                     be_lun->vn != NULL) {
2613                         cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2614                         ctl_lun_has_media(cbe_lun);
2615                 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
2616                     be_lun->vn == NULL) {
2617                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2618                         ctl_lun_no_media(cbe_lun);
2619                 }
2620                 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2621         } else {
2622                 if (be_lun->vn != NULL) {
2623                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2624                         ctl_lun_no_media(cbe_lun);
2625                         taskqueue_drain_all(be_lun->io_taskqueue);
2626                         error = ctl_be_block_close(be_lun);
2627                 } else
2628                         error = 0;
2629         }
2630         if (be_lun->size_blocks != oldsize)
2631                 ctl_lun_capacity_changed(cbe_lun);
2632
2633         /* Tell the user the exact size we ended up using */
2634         params->lun_size_bytes = be_lun->size_bytes;
2635
2636         req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2637         return (0);
2638
2639 bailout_error:
2640         req->status = CTL_LUN_ERROR;
2641         return (0);
2642 }
2643
2644 static void
2645 ctl_be_block_lun_shutdown(void *be_lun)
2646 {
2647         struct ctl_be_block_lun *lun = be_lun;
2648         struct ctl_be_block_softc *softc = lun->softc;
2649
2650         mtx_lock(&softc->lock);
2651         lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2652         if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2653                 wakeup(lun);
2654         mtx_unlock(&softc->lock);
2655 }
2656
2657 static void
2658 ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2659 {
2660         struct ctl_be_block_lun *lun;
2661         struct ctl_be_block_softc *softc;
2662
2663         lun = (struct ctl_be_block_lun *)be_lun;
2664         softc = lun->softc;
2665
2666         if (status == CTL_LUN_CONFIG_OK) {
2667                 mtx_lock(&softc->lock);
2668                 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2669                 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2670                         wakeup(lun);
2671                 mtx_unlock(&softc->lock);
2672
2673                 /*
2674                  * We successfully added the LUN, attempt to enable it.
2675                  */
2676                 if (ctl_enable_lun(&lun->cbe_lun) != 0) {
2677                         printf("%s: ctl_enable_lun() failed!\n", __func__);
2678                         if (ctl_invalidate_lun(&lun->cbe_lun) != 0) {
2679                                 printf("%s: ctl_invalidate_lun() failed!\n",
2680                                        __func__);
2681                         }
2682                 }
2683
2684                 return;
2685         }
2686
2687
2688         mtx_lock(&softc->lock);
2689         lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2690         lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2691         wakeup(lun);
2692         mtx_unlock(&softc->lock);
2693 }
2694
2695
2696 static int
2697 ctl_be_block_config_write(union ctl_io *io)
2698 {
2699         struct ctl_be_block_lun *be_lun;
2700         struct ctl_be_lun *cbe_lun;
2701         int retval;
2702
2703         DPRINTF("entered\n");
2704
2705         cbe_lun = CTL_BACKEND_LUN(io);
2706         be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2707
2708         retval = 0;
2709         switch (io->scsiio.cdb[0]) {
2710         case SYNCHRONIZE_CACHE:
2711         case SYNCHRONIZE_CACHE_16:
2712         case WRITE_SAME_10:
2713         case WRITE_SAME_16:
2714         case UNMAP:
2715                 /*
2716                  * The upper level CTL code will filter out any CDBs with
2717                  * the immediate bit set and return the proper error.
2718                  *
2719                  * We don't really need to worry about what LBA range the
2720                  * user asked to be synced out.  When they issue a sync
2721                  * cache command, we'll sync out the whole thing.
2722                  */
2723                 mtx_lock(&be_lun->queue_lock);
2724                 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2725                                    links);
2726                 mtx_unlock(&be_lun->queue_lock);
2727                 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2728                 break;
2729         case START_STOP_UNIT: {
2730                 struct scsi_start_stop_unit *cdb;
2731                 struct ctl_lun_req req;
2732
2733                 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2734                 if ((cdb->how & SSS_PC_MASK) != 0) {
2735                         ctl_set_success(&io->scsiio);
2736                         ctl_config_write_done(io);
2737                         break;
2738                 }
2739                 if (cdb->how & SSS_START) {
2740                         if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
2741                                 retval = ctl_be_block_open(be_lun, &req);
2742                                 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2743                                 if (retval == 0) {
2744                                         cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2745                                         ctl_lun_has_media(cbe_lun);
2746                                 } else {
2747                                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2748                                         ctl_lun_no_media(cbe_lun);
2749                                 }
2750                         }
2751                         ctl_start_lun(cbe_lun);
2752                 } else {
2753                         ctl_stop_lun(cbe_lun);
2754                         if (cdb->how & SSS_LOEJ) {
2755                                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2756                                 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
2757                                 ctl_lun_ejected(cbe_lun);
2758                                 if (be_lun->vn != NULL)
2759                                         ctl_be_block_close(be_lun);
2760                         }
2761                 }
2762
2763                 ctl_set_success(&io->scsiio);
2764                 ctl_config_write_done(io);
2765                 break;
2766         }
2767         case PREVENT_ALLOW:
2768                 ctl_set_success(&io->scsiio);
2769                 ctl_config_write_done(io);
2770                 break;
2771         default:
2772                 ctl_set_invalid_opcode(&io->scsiio);
2773                 ctl_config_write_done(io);
2774                 retval = CTL_RETVAL_COMPLETE;
2775                 break;
2776         }
2777
2778         return (retval);
2779 }
2780
2781 static int
2782 ctl_be_block_config_read(union ctl_io *io)
2783 {
2784         struct ctl_be_block_lun *be_lun;
2785         struct ctl_be_lun *cbe_lun;
2786         int retval = 0;
2787
2788         DPRINTF("entered\n");
2789
2790         cbe_lun = CTL_BACKEND_LUN(io);
2791         be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2792
2793         switch (io->scsiio.cdb[0]) {
2794         case SERVICE_ACTION_IN:
2795                 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2796                         mtx_lock(&be_lun->queue_lock);
2797                         STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2798                             &io->io_hdr, links);
2799                         mtx_unlock(&be_lun->queue_lock);
2800                         taskqueue_enqueue(be_lun->io_taskqueue,
2801                             &be_lun->io_task);
2802                         retval = CTL_RETVAL_QUEUED;
2803                         break;
2804                 }
2805                 ctl_set_invalid_field(&io->scsiio,
2806                                       /*sks_valid*/ 1,
2807                                       /*command*/ 1,
2808                                       /*field*/ 1,
2809                                       /*bit_valid*/ 1,
2810                                       /*bit*/ 4);
2811                 ctl_config_read_done(io);
2812                 retval = CTL_RETVAL_COMPLETE;
2813                 break;
2814         default:
2815                 ctl_set_invalid_opcode(&io->scsiio);
2816                 ctl_config_read_done(io);
2817                 retval = CTL_RETVAL_COMPLETE;
2818                 break;
2819         }
2820
2821         return (retval);
2822 }
2823
2824 static int
2825 ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2826 {
2827         struct ctl_be_block_lun *lun;
2828         int retval;
2829
2830         lun = (struct ctl_be_block_lun *)be_lun;
2831
2832         retval = sbuf_printf(sb, "\t<num_threads>");
2833         if (retval != 0)
2834                 goto bailout;
2835         retval = sbuf_printf(sb, "%d", lun->num_threads);
2836         if (retval != 0)
2837                 goto bailout;
2838         retval = sbuf_printf(sb, "</num_threads>\n");
2839
2840 bailout:
2841         return (retval);
2842 }
2843
2844 static uint64_t
2845 ctl_be_block_lun_attr(void *be_lun, const char *attrname)
2846 {
2847         struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun;
2848
2849         if (lun->getattr == NULL)
2850                 return (UINT64_MAX);
2851         return (lun->getattr(lun, attrname));
2852 }
2853
2854 static int
2855 ctl_be_block_init(void)
2856 {
2857         struct ctl_be_block_softc *softc = &backend_block_softc;
2858
2859         mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2860         softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2861             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2862         STAILQ_INIT(&softc->lun_list);
2863         return (0);
2864 }
2865
2866
2867 static int
2868 ctl_be_block_shutdown(void)
2869 {
2870         struct ctl_be_block_softc *softc = &backend_block_softc;
2871         struct ctl_be_block_lun *lun, *next_lun;
2872
2873         mtx_lock(&softc->lock);
2874         STAILQ_FOREACH_SAFE(lun, &softc->lun_list, links, next_lun) {
2875                 /*
2876                  * Drop our lock here.  Since ctl_invalidate_lun() can call
2877                  * back into us, this could potentially lead to a recursive
2878                  * lock of the same mutex, which would cause a hang.
2879                  */
2880                 mtx_unlock(&softc->lock);
2881                 ctl_disable_lun(&lun->cbe_lun);
2882                 ctl_invalidate_lun(&lun->cbe_lun);
2883                 mtx_lock(&softc->lock);
2884         }
2885         mtx_unlock(&softc->lock);
2886
2887         uma_zdestroy(softc->beio_zone);
2888         mtx_destroy(&softc->lock);
2889         return (0);
2890 }