2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2003 Silicon Graphics International Corp.
5 * Copyright (c) 2009-2011 Spectra Logic Corporation
6 * Copyright (c) 2012 The FreeBSD Foundation
7 * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
10 * Portions of this software were developed by Edward Tomasz Napierala
11 * under sponsorship from the FreeBSD Foundation.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions, and the following disclaimer,
18 * without modification.
19 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
20 * substantially similar to the "NO WARRANTY" disclaimer below
21 * ("Disclaimer") and any redistribution must be conditioned upon
22 * including a substantially similar Disclaimer requirement for further
23 * binary redistribution.
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGES.
38 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
41 * CAM Target Layer driver backend for block devices.
43 * Author: Ken Merry <ken@FreeBSD.org>
45 #include <sys/cdefs.h>
46 __FBSDID("$FreeBSD$");
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/types.h>
52 #include <sys/kthread.h>
54 #include <sys/fcntl.h>
55 #include <sys/limits.h>
57 #include <sys/mutex.h>
58 #include <sys/condvar.h>
59 #include <sys/malloc.h>
61 #include <sys/ioccom.h>
62 #include <sys/queue.h>
64 #include <sys/endian.h>
67 #include <sys/taskqueue.h>
68 #include <sys/vnode.h>
69 #include <sys/namei.h>
70 #include <sys/mount.h>
72 #include <sys/fcntl.h>
73 #include <sys/filedesc.h>
74 #include <sys/filio.h>
77 #include <sys/module.h>
79 #include <sys/devicestat.h>
80 #include <sys/sysctl.h>
85 #include <geom/geom.h>
88 #include <cam/scsi/scsi_all.h>
89 #include <cam/scsi/scsi_da.h>
90 #include <cam/ctl/ctl_io.h>
91 #include <cam/ctl/ctl.h>
92 #include <cam/ctl/ctl_backend.h>
93 #include <cam/ctl/ctl_ioctl.h>
94 #include <cam/ctl/ctl_ha.h>
95 #include <cam/ctl/ctl_scsi_all.h>
96 #include <cam/ctl/ctl_private.h>
97 #include <cam/ctl/ctl_error.h>
100 * The idea here is that we'll allocate enough S/G space to hold a 1MB
101 * I/O. If we get an I/O larger than that, we'll split it.
103 #define CTLBLK_HALF_IO_SIZE (512 * 1024)
104 #define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2)
105 #define CTLBLK_MAX_SEG MAXPHYS
106 #define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
107 #define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2)
110 #define DPRINTF(fmt, args...) \
111 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
113 #define DPRINTF(fmt, args...) do {} while(0)
117 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
119 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
121 SDT_PROVIDER_DEFINE(cbb);
124 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01,
125 CTL_BE_BLOCK_LUN_WAITING = 0x04,
126 } ctl_be_block_lun_flags;
134 struct ctl_be_block_filedata {
138 union ctl_be_block_bedata {
139 struct ctl_be_block_filedata file;
142 struct ctl_be_block_io;
143 struct ctl_be_block_lun;
145 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
146 struct ctl_be_block_io *beio);
147 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
148 const char *attrname);
151 * Backend LUN structure. There is a 1:1 mapping between a block device
152 * and a backend block LUN, and between a backend block LUN and a CTL LUN.
154 struct ctl_be_block_lun {
155 struct ctl_be_lun cbe_lun; /* Must be first element. */
156 struct ctl_lun_create_params params;
158 ctl_be_block_type dev_type;
160 union ctl_be_block_bedata backend;
161 cbb_dispatch_t dispatch;
162 cbb_dispatch_t lun_flush;
163 cbb_dispatch_t unmap;
164 cbb_dispatch_t get_lba_status;
165 cbb_getattr_t getattr;
166 uint64_t size_blocks;
168 struct ctl_be_block_softc *softc;
169 struct devstat *disk_stats;
170 ctl_be_block_lun_flags flags;
171 SLIST_ENTRY(ctl_be_block_lun) links;
172 struct taskqueue *io_taskqueue;
175 STAILQ_HEAD(, ctl_io_hdr) input_queue;
176 STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
177 STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
178 STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
179 struct mtx_padalign io_lock;
180 struct mtx_padalign queue_lock;
184 * Overall softc structure for the block backend module.
186 struct ctl_be_block_softc {
187 struct sx modify_lock;
190 SLIST_HEAD(, ctl_be_block_lun) lun_list;
191 uma_zone_t beio_zone;
195 static struct ctl_be_block_softc backend_block_softc;
198 * Per-I/O information.
200 struct ctl_be_block_io {
202 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS];
203 struct iovec xiovecs[CTLBLK_MAX_SEGS];
211 uint64_t first_error_offset;
212 struct bintime ds_t0;
213 devstat_tag_type ds_tag_type;
214 devstat_trans_flags ds_trans_type;
218 struct ctl_be_block_softc *softc;
219 struct ctl_be_block_lun *lun;
220 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
223 extern struct ctl_softc *control_softc;
225 static int cbb_num_threads = 14;
226 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
227 "CAM Target Layer Block Backend");
228 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
229 &cbb_num_threads, 0, "Number of threads per backing file");
231 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
232 static void ctl_free_beio(struct ctl_be_block_io *beio);
233 static void ctl_complete_beio(struct ctl_be_block_io *beio);
234 static int ctl_be_block_move_done(union ctl_io *io);
235 static void ctl_be_block_biodone(struct bio *bio);
236 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
237 struct ctl_be_block_io *beio);
238 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
239 struct ctl_be_block_io *beio);
240 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
241 struct ctl_be_block_io *beio);
242 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
243 const char *attrname);
244 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
245 struct ctl_be_block_io *beio);
246 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
247 struct ctl_be_block_io *beio);
248 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
249 struct ctl_be_block_io *beio);
250 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
251 const char *attrname);
252 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
254 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
256 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
258 static void ctl_be_block_worker(void *context, int pending);
259 static int ctl_be_block_submit(union ctl_io *io);
260 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
261 int flag, struct thread *td);
262 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
263 struct ctl_lun_req *req);
264 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
265 struct ctl_lun_req *req);
266 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
267 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
268 struct ctl_lun_req *req);
269 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
270 struct ctl_lun_req *req);
271 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
272 struct ctl_lun_req *req);
273 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
274 struct ctl_lun_req *req);
275 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun);
276 static int ctl_be_block_config_write(union ctl_io *io);
277 static int ctl_be_block_config_read(union ctl_io *io);
278 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb);
279 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname);
280 static int ctl_be_block_init(void);
281 static int ctl_be_block_shutdown(void);
283 static struct ctl_backend_driver ctl_be_block_driver =
286 .flags = CTL_BE_FLAG_HAS_CONFIG,
287 .init = ctl_be_block_init,
288 .shutdown = ctl_be_block_shutdown,
289 .data_submit = ctl_be_block_submit,
290 .data_move_done = ctl_be_block_move_done,
291 .config_read = ctl_be_block_config_read,
292 .config_write = ctl_be_block_config_write,
293 .ioctl = ctl_be_block_ioctl,
294 .lun_info = ctl_be_block_lun_info,
295 .lun_attr = ctl_be_block_lun_attr
298 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend");
299 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
301 static struct ctl_be_block_io *
302 ctl_alloc_beio(struct ctl_be_block_softc *softc)
304 struct ctl_be_block_io *beio;
306 beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO);
312 ctl_free_beio(struct ctl_be_block_io *beio)
314 struct ctl_be_block_softc *softc = beio->softc;
317 for (i = 0; i < beio->num_segs; i++) {
318 uma_zfree(softc->buf_zone, beio->sg_segs[i].addr);
320 /* For compare we had two equal S/G lists. */
321 if (beio->two_sglists) {
322 uma_zfree(softc->buf_zone,
323 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
327 uma_zfree(softc->beio_zone, beio);
331 ctl_complete_beio(struct ctl_be_block_io *beio)
333 union ctl_io *io = beio->io;
335 if (beio->beio_cont != NULL) {
336 beio->beio_cont(beio);
339 ctl_data_submit_done(io);
344 cmp(uint8_t *a, uint8_t *b, size_t size)
348 for (i = 0; i < size; i++) {
356 ctl_be_block_compare(union ctl_io *io)
358 struct ctl_be_block_io *beio;
363 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
365 for (i = 0; i < beio->num_segs; i++) {
366 res = cmp(beio->sg_segs[i].addr,
367 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
368 beio->sg_segs[i].len);
370 if (res < beio->sg_segs[i].len)
373 if (i < beio->num_segs) {
374 scsi_u64to8b(off, info);
375 ctl_set_sense(&io->scsiio, /*current_error*/ 1,
376 /*sense_key*/ SSD_KEY_MISCOMPARE,
377 /*asc*/ 0x1D, /*ascq*/ 0x00,
378 /*type*/ SSD_ELEM_INFO,
379 /*size*/ sizeof(info), /*data*/ &info,
380 /*type*/ SSD_ELEM_NONE);
382 ctl_set_success(&io->scsiio);
386 ctl_be_block_move_done(union ctl_io *io)
388 struct ctl_be_block_io *beio;
389 struct ctl_be_block_lun *be_lun;
390 struct ctl_lba_len_flags *lbalen;
392 struct bintime cur_bt;
395 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
398 DPRINTF("entered\n");
401 getbinuptime(&cur_bt);
402 bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
403 bintime_add(&io->io_hdr.dma_bt, &cur_bt);
405 io->io_hdr.num_dmas++;
406 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
409 * We set status at this point for read commands, and write
410 * commands with errors.
412 if (io->io_hdr.flags & CTL_FLAG_ABORT) {
414 } else if ((io->io_hdr.port_status != 0) &&
415 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
416 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
417 ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1,
418 /*retry_count*/ io->io_hdr.port_status);
419 } else if (io->scsiio.kern_data_resid != 0 &&
420 (io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT &&
421 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
422 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
423 ctl_set_invalid_field_ciu(&io->scsiio);
424 } else if ((io->io_hdr.port_status == 0) &&
425 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
426 lbalen = ARGS(beio->io);
427 if (lbalen->flags & CTL_LLF_READ) {
428 ctl_set_success(&io->scsiio);
429 } else if (lbalen->flags & CTL_LLF_COMPARE) {
430 /* We have two data blocks ready for comparison. */
431 ctl_be_block_compare(io);
436 * If this is a read, or a write with errors, it is done.
438 if ((beio->bio_cmd == BIO_READ)
439 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
440 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
441 ctl_complete_beio(beio);
446 * At this point, we have a write and the DMA completed
447 * successfully. We now have to queue it to the task queue to
448 * execute the backend I/O. That is because we do blocking
449 * memory allocations, and in the file backing case, blocking I/O.
450 * This move done routine is generally called in the SIM's
451 * interrupt context, and therefore we cannot block.
453 mtx_lock(&be_lun->queue_lock);
454 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
455 mtx_unlock(&be_lun->queue_lock);
456 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
462 ctl_be_block_biodone(struct bio *bio)
464 struct ctl_be_block_io *beio;
465 struct ctl_be_block_lun *be_lun;
469 beio = bio->bio_caller1;
473 DPRINTF("entered\n");
475 error = bio->bio_error;
476 mtx_lock(&be_lun->io_lock);
478 (beio->first_error == 0 ||
479 bio->bio_offset < beio->first_error_offset)) {
480 beio->first_error = error;
481 beio->first_error_offset = bio->bio_offset;
484 beio->num_bios_done++;
487 * XXX KDM will this cause WITNESS to complain? Holding a lock
488 * during the free might cause it to complain.
493 * If the send complete bit isn't set, or we aren't the last I/O to
494 * complete, then we're done.
496 if ((beio->send_complete == 0)
497 || (beio->num_bios_done < beio->num_bios_sent)) {
498 mtx_unlock(&be_lun->io_lock);
503 * At this point, we've verified that we are the last I/O to
504 * complete, so it's safe to drop the lock.
506 devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
507 beio->ds_tag_type, beio->ds_trans_type,
508 /*now*/ NULL, /*then*/&beio->ds_t0);
509 mtx_unlock(&be_lun->io_lock);
512 * If there are any errors from the backing device, we fail the
513 * entire I/O with a medium error.
515 error = beio->first_error;
517 if (error == EOPNOTSUPP) {
518 ctl_set_invalid_opcode(&io->scsiio);
519 } else if (error == ENOSPC || error == EDQUOT) {
520 ctl_set_space_alloc_fail(&io->scsiio);
521 } else if (error == EROFS || error == EACCES) {
522 ctl_set_hw_write_protected(&io->scsiio);
523 } else if (beio->bio_cmd == BIO_FLUSH) {
524 /* XXX KDM is there is a better error here? */
525 ctl_set_internal_failure(&io->scsiio,
527 /*retry_count*/ 0xbad2);
529 ctl_set_medium_error(&io->scsiio,
530 beio->bio_cmd == BIO_READ);
532 ctl_complete_beio(beio);
537 * If this is a write, a flush, a delete or verify, we're all done.
538 * If this is a read, we can now send the data to the user.
540 if ((beio->bio_cmd == BIO_WRITE)
541 || (beio->bio_cmd == BIO_FLUSH)
542 || (beio->bio_cmd == BIO_DELETE)
543 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
544 ctl_set_success(&io->scsiio);
545 ctl_complete_beio(beio);
547 if ((ARGS(io)->flags & CTL_LLF_READ) &&
548 beio->beio_cont == NULL) {
549 ctl_set_success(&io->scsiio);
553 getbinuptime(&io->io_hdr.dma_start_bt);
560 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
561 struct ctl_be_block_io *beio)
563 union ctl_io *io = beio->io;
564 struct mount *mountpoint;
565 int error, lock_flags;
567 DPRINTF("entered\n");
569 binuptime(&beio->ds_t0);
570 mtx_lock(&be_lun->io_lock);
571 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
572 mtx_unlock(&be_lun->io_lock);
574 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
576 if (MNT_SHARED_WRITES(mountpoint) ||
577 ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
578 lock_flags = LK_SHARED;
580 lock_flags = LK_EXCLUSIVE;
581 vn_lock(be_lun->vn, lock_flags | LK_RETRY);
582 error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
584 VOP_UNLOCK(be_lun->vn, 0);
586 vn_finished_write(mountpoint);
588 mtx_lock(&be_lun->io_lock);
589 devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
590 beio->ds_tag_type, beio->ds_trans_type,
591 /*now*/ NULL, /*then*/&beio->ds_t0);
592 mtx_unlock(&be_lun->io_lock);
595 ctl_set_success(&io->scsiio);
597 /* XXX KDM is there is a better error here? */
598 ctl_set_internal_failure(&io->scsiio,
600 /*retry_count*/ 0xbad1);
603 ctl_complete_beio(beio);
606 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
607 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
608 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
609 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
612 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
613 struct ctl_be_block_io *beio)
615 struct ctl_be_block_filedata *file_data;
618 struct iovec *xiovec;
622 DPRINTF("entered\n");
624 file_data = &be_lun->backend.file;
627 if (ARGS(io)->flags & CTL_LLF_DPO)
629 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
632 bzero(&xuio, sizeof(xuio));
633 if (beio->bio_cmd == BIO_READ) {
634 SDT_PROBE0(cbb, , read, file_start);
635 xuio.uio_rw = UIO_READ;
637 SDT_PROBE0(cbb, , write, file_start);
638 xuio.uio_rw = UIO_WRITE;
640 xuio.uio_offset = beio->io_offset;
641 xuio.uio_resid = beio->io_len;
642 xuio.uio_segflg = UIO_SYSSPACE;
643 xuio.uio_iov = beio->xiovecs;
644 xuio.uio_iovcnt = beio->num_segs;
645 xuio.uio_td = curthread;
647 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
648 xiovec->iov_base = beio->sg_segs[i].addr;
649 xiovec->iov_len = beio->sg_segs[i].len;
652 binuptime(&beio->ds_t0);
653 mtx_lock(&be_lun->io_lock);
654 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
655 mtx_unlock(&be_lun->io_lock);
657 if (beio->bio_cmd == BIO_READ) {
658 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
661 * UFS pays attention to IO_DIRECT for reads. If the
662 * DIRECTIO option is configured into the kernel, it calls
663 * ffs_rawread(). But that only works for single-segment
664 * uios with user space addresses. In our case, with a
665 * kernel uio, it still reads into the buffer cache, but it
666 * will just try to release the buffer from the cache later
669 * ZFS does not pay attention to IO_DIRECT for reads.
671 * UFS does not pay attention to IO_SYNC for reads.
673 * ZFS pays attention to IO_SYNC (which translates into the
674 * Solaris define FRSYNC for zfs_read()) for reads. It
675 * attempts to sync the file before reading.
677 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
679 VOP_UNLOCK(be_lun->vn, 0);
680 SDT_PROBE0(cbb, , read, file_done);
681 if (error == 0 && xuio.uio_resid > 0) {
683 * If we red less then requested (EOF), then
684 * we should clean the rest of the buffer.
686 s = beio->io_len - xuio.uio_resid;
687 for (i = 0; i < beio->num_segs; i++) {
688 if (s >= beio->sg_segs[i].len) {
689 s -= beio->sg_segs[i].len;
692 bzero((uint8_t *)beio->sg_segs[i].addr + s,
693 beio->sg_segs[i].len - s);
698 struct mount *mountpoint;
701 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
703 if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL)
704 && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
705 lock_flags = LK_SHARED;
707 lock_flags = LK_EXCLUSIVE;
708 vn_lock(be_lun->vn, lock_flags | LK_RETRY);
711 * UFS pays attention to IO_DIRECT for writes. The write
712 * is done asynchronously. (Normally the write would just
713 * get put into cache.
715 * UFS pays attention to IO_SYNC for writes. It will
716 * attempt to write the buffer out synchronously if that
719 * ZFS does not pay attention to IO_DIRECT for writes.
721 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
722 * for writes. It will flush the transaction from the
723 * cache before returning.
725 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
726 VOP_UNLOCK(be_lun->vn, 0);
728 vn_finished_write(mountpoint);
729 SDT_PROBE0(cbb, , write, file_done);
732 mtx_lock(&be_lun->io_lock);
733 devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
734 beio->ds_tag_type, beio->ds_trans_type,
735 /*now*/ NULL, /*then*/&beio->ds_t0);
736 mtx_unlock(&be_lun->io_lock);
739 * If we got an error, set the sense data to "MEDIUM ERROR" and
740 * return the I/O to the user.
743 if (error == ENOSPC || error == EDQUOT) {
744 ctl_set_space_alloc_fail(&io->scsiio);
745 } else if (error == EROFS || error == EACCES) {
746 ctl_set_hw_write_protected(&io->scsiio);
748 ctl_set_medium_error(&io->scsiio,
749 beio->bio_cmd == BIO_READ);
751 ctl_complete_beio(beio);
756 * If this is a write or a verify, we're all done.
757 * If this is a read, we can now send the data to the user.
759 if ((beio->bio_cmd == BIO_WRITE) ||
760 (ARGS(io)->flags & CTL_LLF_VERIFY)) {
761 ctl_set_success(&io->scsiio);
762 ctl_complete_beio(beio);
764 if ((ARGS(io)->flags & CTL_LLF_READ) &&
765 beio->beio_cont == NULL) {
766 ctl_set_success(&io->scsiio);
770 getbinuptime(&io->io_hdr.dma_start_bt);
777 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
778 struct ctl_be_block_io *beio)
780 union ctl_io *io = beio->io;
781 struct ctl_lba_len_flags *lbalen = ARGS(io);
782 struct scsi_get_lba_status_data *data;
786 DPRINTF("entered\n");
788 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
789 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
790 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
791 0, curthread->td_ucred, curthread);
792 if (error == 0 && off > roff)
793 status = 0; /* mapped up to off */
795 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
796 0, curthread->td_ucred, curthread);
797 if (error == 0 && off > roff)
798 status = 1; /* deallocated up to off */
800 status = 0; /* unknown up to the end */
801 off = be_lun->size_bytes;
804 VOP_UNLOCK(be_lun->vn, 0);
806 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
807 scsi_u64to8b(lbalen->lba, data->descr[0].addr);
808 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
809 lbalen->lba), data->descr[0].length);
810 data->descr[0].status = status;
812 ctl_complete_beio(beio);
816 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
819 struct statfs statfs;
824 if (be_lun->vn == NULL)
826 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
827 if (strcmp(attrname, "blocksused") == 0) {
828 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
830 val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
832 if (strcmp(attrname, "blocksavail") == 0 &&
833 (be_lun->vn->v_iflag & VI_DOOMED) == 0) {
834 error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
836 val = statfs.f_bavail * statfs.f_bsize /
837 be_lun->cbe_lun.blocksize;
839 VOP_UNLOCK(be_lun->vn, 0);
844 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
845 struct ctl_be_block_io *beio)
851 struct iovec *xiovec;
852 int error, flags, i, ref;
854 DPRINTF("entered\n");
858 if (ARGS(io)->flags & CTL_LLF_DPO)
860 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
863 bzero(&xuio, sizeof(xuio));
864 if (beio->bio_cmd == BIO_READ) {
865 SDT_PROBE0(cbb, , read, file_start);
866 xuio.uio_rw = UIO_READ;
868 SDT_PROBE0(cbb, , write, file_start);
869 xuio.uio_rw = UIO_WRITE;
871 xuio.uio_offset = beio->io_offset;
872 xuio.uio_resid = beio->io_len;
873 xuio.uio_segflg = UIO_SYSSPACE;
874 xuio.uio_iov = beio->xiovecs;
875 xuio.uio_iovcnt = beio->num_segs;
876 xuio.uio_td = curthread;
878 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
879 xiovec->iov_base = beio->sg_segs[i].addr;
880 xiovec->iov_len = beio->sg_segs[i].len;
883 binuptime(&beio->ds_t0);
884 mtx_lock(&be_lun->io_lock);
885 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
886 mtx_unlock(&be_lun->io_lock);
888 csw = devvn_refthread(be_lun->vn, &dev, &ref);
890 if (beio->bio_cmd == BIO_READ)
891 error = csw->d_read(dev, &xuio, flags);
893 error = csw->d_write(dev, &xuio, flags);
894 dev_relthread(dev, ref);
898 if (beio->bio_cmd == BIO_READ)
899 SDT_PROBE0(cbb, , read, file_done);
901 SDT_PROBE0(cbb, , write, file_done);
903 mtx_lock(&be_lun->io_lock);
904 devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
905 beio->ds_tag_type, beio->ds_trans_type,
906 /*now*/ NULL, /*then*/&beio->ds_t0);
907 mtx_unlock(&be_lun->io_lock);
910 * If we got an error, set the sense data to "MEDIUM ERROR" and
911 * return the I/O to the user.
914 if (error == ENOSPC || error == EDQUOT) {
915 ctl_set_space_alloc_fail(&io->scsiio);
916 } else if (error == EROFS || error == EACCES) {
917 ctl_set_hw_write_protected(&io->scsiio);
919 ctl_set_medium_error(&io->scsiio,
920 beio->bio_cmd == BIO_READ);
922 ctl_complete_beio(beio);
927 * If this is a write or a verify, we're all done.
928 * If this is a read, we can now send the data to the user.
930 if ((beio->bio_cmd == BIO_WRITE) ||
931 (ARGS(io)->flags & CTL_LLF_VERIFY)) {
932 ctl_set_success(&io->scsiio);
933 ctl_complete_beio(beio);
935 if ((ARGS(io)->flags & CTL_LLF_READ) &&
936 beio->beio_cont == NULL) {
937 ctl_set_success(&io->scsiio);
941 getbinuptime(&io->io_hdr.dma_start_bt);
948 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
949 struct ctl_be_block_io *beio)
951 union ctl_io *io = beio->io;
954 struct ctl_lba_len_flags *lbalen = ARGS(io);
955 struct scsi_get_lba_status_data *data;
957 int error, ref, status;
959 DPRINTF("entered\n");
961 csw = devvn_refthread(be_lun->vn, &dev, &ref);
963 status = 0; /* unknown up to the end */
964 off = be_lun->size_bytes;
967 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
968 error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
970 if (error == 0 && off > roff)
971 status = 0; /* mapped up to off */
973 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
975 if (error == 0 && off > roff)
976 status = 1; /* deallocated up to off */
978 status = 0; /* unknown up to the end */
979 off = be_lun->size_bytes;
982 dev_relthread(dev, ref);
985 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
986 scsi_u64to8b(lbalen->lba, data->descr[0].addr);
987 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
988 lbalen->lba), data->descr[0].length);
989 data->descr[0].status = status;
991 ctl_complete_beio(beio);
995 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
996 struct ctl_be_block_io *beio)
1003 DPRINTF("entered\n");
1005 /* This can't fail, it's a blocking allocation. */
1006 bio = g_alloc_bio();
1008 bio->bio_cmd = BIO_FLUSH;
1009 bio->bio_offset = 0;
1011 bio->bio_done = ctl_be_block_biodone;
1012 bio->bio_caller1 = beio;
1013 bio->bio_pblkno = 0;
1016 * We don't need to acquire the LUN lock here, because we are only
1017 * sending one bio, and so there is no other context to synchronize
1020 beio->num_bios_sent = 1;
1021 beio->send_complete = 1;
1023 binuptime(&beio->ds_t0);
1024 mtx_lock(&be_lun->io_lock);
1025 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1026 mtx_unlock(&be_lun->io_lock);
1028 csw = devvn_refthread(be_lun->vn, &dev, &ref);
1031 csw->d_strategy(bio);
1032 dev_relthread(dev, ref);
1034 bio->bio_error = ENXIO;
1035 ctl_be_block_biodone(bio);
1040 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1041 struct ctl_be_block_io *beio,
1042 uint64_t off, uint64_t len, int last)
1050 csw = devvn_refthread(be_lun->vn, &dev, &ref);
1051 maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1053 bio = g_alloc_bio();
1054 bio->bio_cmd = BIO_DELETE;
1056 bio->bio_offset = off;
1057 bio->bio_length = MIN(len, maxlen);
1059 bio->bio_done = ctl_be_block_biodone;
1060 bio->bio_caller1 = beio;
1061 bio->bio_pblkno = off / be_lun->cbe_lun.blocksize;
1063 off += bio->bio_length;
1064 len -= bio->bio_length;
1066 mtx_lock(&be_lun->io_lock);
1067 beio->num_bios_sent++;
1068 if (last && len == 0)
1069 beio->send_complete = 1;
1070 mtx_unlock(&be_lun->io_lock);
1073 csw->d_strategy(bio);
1075 bio->bio_error = ENXIO;
1076 ctl_be_block_biodone(bio);
1080 dev_relthread(dev, ref);
1084 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1085 struct ctl_be_block_io *beio)
1088 struct ctl_ptr_len_flags *ptrlen;
1089 struct scsi_unmap_desc *buf, *end;
1094 DPRINTF("entered\n");
1096 binuptime(&beio->ds_t0);
1097 mtx_lock(&be_lun->io_lock);
1098 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1099 mtx_unlock(&be_lun->io_lock);
1101 if (beio->io_offset == -1) {
1103 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1104 buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1105 end = buf + ptrlen->len / sizeof(*buf);
1106 for (; buf < end; buf++) {
1107 len = (uint64_t)scsi_4btoul(buf->length) *
1108 be_lun->cbe_lun.blocksize;
1109 beio->io_len += len;
1110 ctl_be_block_unmap_dev_range(be_lun, beio,
1111 scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1112 len, (end - buf < 2) ? TRUE : FALSE);
1115 ctl_be_block_unmap_dev_range(be_lun, beio,
1116 beio->io_offset, beio->io_len, TRUE);
1120 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1121 struct ctl_be_block_io *beio)
1123 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1128 int i, max_iosize, ref;
1130 DPRINTF("entered\n");
1131 csw = devvn_refthread(be_lun->vn, &dev, &ref);
1134 * We have to limit our I/O size to the maximum supported by the
1135 * backend device. Hopefully it is MAXPHYS. If the driver doesn't
1136 * set it properly, use DFLTPHYS.
1139 max_iosize = dev->si_iosize_max;
1140 if (max_iosize < PAGE_SIZE)
1141 max_iosize = DFLTPHYS;
1143 max_iosize = DFLTPHYS;
1145 cur_offset = beio->io_offset;
1146 for (i = 0; i < beio->num_segs; i++) {
1150 cur_size = beio->sg_segs[i].len;
1151 cur_ptr = beio->sg_segs[i].addr;
1153 while (cur_size > 0) {
1154 /* This can't fail, it's a blocking allocation. */
1155 bio = g_alloc_bio();
1157 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1159 bio->bio_cmd = beio->bio_cmd;
1161 bio->bio_caller1 = beio;
1162 bio->bio_length = min(cur_size, max_iosize);
1163 bio->bio_offset = cur_offset;
1164 bio->bio_data = cur_ptr;
1165 bio->bio_done = ctl_be_block_biodone;
1166 bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1168 cur_offset += bio->bio_length;
1169 cur_ptr += bio->bio_length;
1170 cur_size -= bio->bio_length;
1172 TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1173 beio->num_bios_sent++;
1176 binuptime(&beio->ds_t0);
1177 mtx_lock(&be_lun->io_lock);
1178 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1179 beio->send_complete = 1;
1180 mtx_unlock(&be_lun->io_lock);
1183 * Fire off all allocated requests!
1185 while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1186 TAILQ_REMOVE(&queue, bio, bio_queue);
1188 csw->d_strategy(bio);
1190 bio->bio_error = ENXIO;
1191 ctl_be_block_biodone(bio);
1195 dev_relthread(dev, ref);
1199 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1201 struct diocgattr_arg arg;
1206 csw = devvn_refthread(be_lun->vn, &dev, &ref);
1208 return (UINT64_MAX);
1209 strlcpy(arg.name, attrname, sizeof(arg.name));
1210 arg.len = sizeof(arg.value.off);
1212 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1216 dev_relthread(dev, ref);
1218 return (UINT64_MAX);
1219 return (arg.value.off);
1223 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1226 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1227 struct ctl_be_block_io *beio;
1228 struct ctl_lba_len_flags *lbalen;
1230 DPRINTF("entered\n");
1231 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1232 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1234 beio->io_len = lbalen->len * cbe_lun->blocksize;
1235 beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1236 beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1237 beio->bio_cmd = BIO_FLUSH;
1238 beio->ds_trans_type = DEVSTAT_NO_DATA;
1240 be_lun->lun_flush(be_lun, beio);
1244 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1249 ctl_free_beio(beio);
1250 if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1251 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1252 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1253 ctl_config_write_done(io);
1257 ctl_be_block_config_write(io);
1261 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1264 struct ctl_be_block_softc *softc = be_lun->softc;
1265 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1266 struct ctl_be_block_io *beio;
1267 struct ctl_lba_len_flags *lbalen;
1268 uint64_t len_left, lba;
1269 uint32_t pb, pbo, adj;
1273 DPRINTF("entered\n");
1275 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1276 lbalen = ARGS(beio->io);
1278 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1279 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1280 ctl_free_beio(beio);
1281 ctl_set_invalid_field(&io->scsiio,
1287 ctl_config_write_done(io);
1291 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1292 beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1293 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1294 beio->bio_cmd = BIO_DELETE;
1295 beio->ds_trans_type = DEVSTAT_FREE;
1297 be_lun->unmap(be_lun, beio);
1301 beio->bio_cmd = BIO_WRITE;
1302 beio->ds_trans_type = DEVSTAT_WRITE;
1304 DPRINTF("WRITE SAME at LBA %jx len %u\n",
1305 (uintmax_t)lbalen->lba, lbalen->len);
1307 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1308 if (be_lun->cbe_lun.pblockoff > 0)
1309 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1312 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1313 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1316 * Setup the S/G entry for this chunk.
1318 seglen = MIN(CTLBLK_MAX_SEG, len_left);
1319 if (pb > cbe_lun->blocksize) {
1320 adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1325 seglen -= seglen % cbe_lun->blocksize;
1327 seglen -= seglen % cbe_lun->blocksize;
1328 beio->sg_segs[i].len = seglen;
1329 beio->sg_segs[i].addr = uma_zalloc(softc->buf_zone, M_WAITOK);
1331 DPRINTF("segment %d addr %p len %zd\n", i,
1332 beio->sg_segs[i].addr, beio->sg_segs[i].len);
1337 buf = beio->sg_segs[i].addr;
1339 for (; buf < end; buf += cbe_lun->blocksize) {
1340 if (lbalen->flags & SWS_NDOB) {
1341 memset(buf, 0, cbe_lun->blocksize);
1343 memcpy(buf, io->scsiio.kern_data_ptr,
1344 cbe_lun->blocksize);
1346 if (lbalen->flags & SWS_LBDATA)
1347 scsi_ulto4b(lbalen->lba + lba, buf);
1352 beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1353 beio->io_len = lba * cbe_lun->blocksize;
1355 /* We can not do all in one run. Correct and schedule rerun. */
1359 beio->beio_cont = ctl_be_block_cw_done_ws;
1362 be_lun->dispatch(be_lun, beio);
1366 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1369 struct ctl_be_block_io *beio;
1370 struct ctl_ptr_len_flags *ptrlen;
1372 DPRINTF("entered\n");
1374 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1375 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1377 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1378 ctl_free_beio(beio);
1379 ctl_set_invalid_field(&io->scsiio,
1385 ctl_config_write_done(io);
1390 beio->io_offset = -1;
1391 beio->bio_cmd = BIO_DELETE;
1392 beio->ds_trans_type = DEVSTAT_FREE;
1394 be_lun->unmap(be_lun, beio);
1398 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1403 ctl_free_beio(beio);
1404 ctl_config_read_done(io);
1408 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1411 struct ctl_be_block_io *beio;
1412 struct ctl_be_block_softc *softc;
1414 DPRINTF("entered\n");
1416 softc = be_lun->softc;
1417 beio = ctl_alloc_beio(softc);
1420 beio->beio_cont = ctl_be_block_cr_done;
1421 PRIV(io)->ptr = (void *)beio;
1423 switch (io->scsiio.cdb[0]) {
1424 case SERVICE_ACTION_IN: /* GET LBA STATUS */
1426 beio->ds_trans_type = DEVSTAT_NO_DATA;
1427 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1429 if (be_lun->get_lba_status)
1430 be_lun->get_lba_status(be_lun, beio);
1432 ctl_be_block_cr_done(beio);
1435 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1441 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1446 ctl_free_beio(beio);
1447 ctl_config_write_done(io);
1451 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1454 struct ctl_be_block_io *beio;
1455 struct ctl_be_block_softc *softc;
1457 DPRINTF("entered\n");
1459 softc = be_lun->softc;
1460 beio = ctl_alloc_beio(softc);
1463 beio->beio_cont = ctl_be_block_cw_done;
1464 switch (io->scsiio.tag_type) {
1465 case CTL_TAG_ORDERED:
1466 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1468 case CTL_TAG_HEAD_OF_QUEUE:
1469 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1471 case CTL_TAG_UNTAGGED:
1472 case CTL_TAG_SIMPLE:
1475 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1478 PRIV(io)->ptr = (void *)beio;
1480 switch (io->scsiio.cdb[0]) {
1481 case SYNCHRONIZE_CACHE:
1482 case SYNCHRONIZE_CACHE_16:
1483 ctl_be_block_cw_dispatch_sync(be_lun, io);
1487 ctl_be_block_cw_dispatch_ws(be_lun, io);
1490 ctl_be_block_cw_dispatch_unmap(be_lun, io);
1493 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1498 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
1499 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
1500 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
1501 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
1504 ctl_be_block_next(struct ctl_be_block_io *beio)
1506 struct ctl_be_block_lun *be_lun;
1511 ctl_free_beio(beio);
1512 if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1513 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1514 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1515 ctl_data_submit_done(io);
1519 io->io_hdr.status &= ~CTL_STATUS_MASK;
1520 io->io_hdr.status |= CTL_STATUS_NONE;
1522 mtx_lock(&be_lun->queue_lock);
1523 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1524 mtx_unlock(&be_lun->queue_lock);
1525 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1529 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1532 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1533 struct ctl_be_block_io *beio;
1534 struct ctl_be_block_softc *softc;
1535 struct ctl_lba_len_flags *lbalen;
1536 struct ctl_ptr_len_flags *bptrlen;
1537 uint64_t len_left, lbas;
1540 softc = be_lun->softc;
1542 DPRINTF("entered\n");
1545 if (lbalen->flags & CTL_LLF_WRITE) {
1546 SDT_PROBE0(cbb, , write, start);
1548 SDT_PROBE0(cbb, , read, start);
1551 beio = ctl_alloc_beio(softc);
1555 bptrlen->ptr = (void *)beio;
1557 switch (io->scsiio.tag_type) {
1558 case CTL_TAG_ORDERED:
1559 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1561 case CTL_TAG_HEAD_OF_QUEUE:
1562 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1564 case CTL_TAG_UNTAGGED:
1565 case CTL_TAG_SIMPLE:
1568 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1572 if (lbalen->flags & CTL_LLF_WRITE) {
1573 beio->bio_cmd = BIO_WRITE;
1574 beio->ds_trans_type = DEVSTAT_WRITE;
1576 beio->bio_cmd = BIO_READ;
1577 beio->ds_trans_type = DEVSTAT_READ;
1580 DPRINTF("%s at LBA %jx len %u @%ju\n",
1581 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1582 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1583 if (lbalen->flags & CTL_LLF_COMPARE) {
1584 beio->two_sglists = 1;
1585 lbas = CTLBLK_HALF_IO_SIZE;
1587 lbas = CTLBLK_MAX_IO_SIZE;
1589 lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1590 beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1591 beio->io_len = lbas * cbe_lun->blocksize;
1592 bptrlen->len += lbas;
1594 for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1595 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1596 i, CTLBLK_MAX_SEGS));
1599 * Setup the S/G entry for this chunk.
1601 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1602 beio->sg_segs[i].addr = uma_zalloc(softc->buf_zone, M_WAITOK);
1604 DPRINTF("segment %d addr %p len %zd\n", i,
1605 beio->sg_segs[i].addr, beio->sg_segs[i].len);
1607 /* Set up second segment for compare operation. */
1608 if (beio->two_sglists) {
1609 beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1610 beio->sg_segs[i].len;
1611 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1612 uma_zalloc(softc->buf_zone, M_WAITOK);
1616 len_left -= beio->sg_segs[i].len;
1618 if (bptrlen->len < lbalen->len)
1619 beio->beio_cont = ctl_be_block_next;
1620 io->scsiio.be_move_done = ctl_be_block_move_done;
1621 /* For compare we have separate S/G lists for read and datamove. */
1622 if (beio->two_sglists)
1623 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1625 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1626 io->scsiio.kern_data_len = beio->io_len;
1627 io->scsiio.kern_sg_entries = beio->num_segs;
1628 io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
1631 * For the read case, we need to read the data into our buffers and
1632 * then we can send it back to the user. For the write case, we
1633 * need to get the data from the user first.
1635 if (beio->bio_cmd == BIO_READ) {
1636 SDT_PROBE0(cbb, , read, alloc_done);
1637 be_lun->dispatch(be_lun, beio);
1639 SDT_PROBE0(cbb, , write, alloc_done);
1641 getbinuptime(&io->io_hdr.dma_start_bt);
1648 ctl_be_block_worker(void *context, int pending)
1650 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1651 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1653 struct ctl_be_block_io *beio;
1655 DPRINTF("entered\n");
1657 * Fetch and process I/Os from all queues. If we detect LUN
1658 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
1659 * so make response maximally opaque to not confuse initiator.
1662 mtx_lock(&be_lun->queue_lock);
1663 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1665 DPRINTF("datamove queue\n");
1666 STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1668 mtx_unlock(&be_lun->queue_lock);
1669 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1670 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1671 ctl_set_busy(&io->scsiio);
1672 ctl_complete_beio(beio);
1675 be_lun->dispatch(be_lun, beio);
1678 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1680 DPRINTF("config write queue\n");
1681 STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1683 mtx_unlock(&be_lun->queue_lock);
1684 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1685 ctl_set_busy(&io->scsiio);
1686 ctl_config_write_done(io);
1689 ctl_be_block_cw_dispatch(be_lun, io);
1692 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1694 DPRINTF("config read queue\n");
1695 STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr,
1697 mtx_unlock(&be_lun->queue_lock);
1698 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1699 ctl_set_busy(&io->scsiio);
1700 ctl_config_read_done(io);
1703 ctl_be_block_cr_dispatch(be_lun, io);
1706 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1708 DPRINTF("input queue\n");
1709 STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1711 mtx_unlock(&be_lun->queue_lock);
1712 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1713 ctl_set_busy(&io->scsiio);
1714 ctl_data_submit_done(io);
1717 ctl_be_block_dispatch(be_lun, io);
1722 * If we get here, there is no work left in the queues, so
1723 * just break out and let the task queue go to sleep.
1725 mtx_unlock(&be_lun->queue_lock);
1731 * Entry point from CTL to the backend for I/O. We queue everything to a
1732 * work thread, so this just puts the I/O on a queue and wakes up the
1736 ctl_be_block_submit(union ctl_io *io)
1738 struct ctl_be_block_lun *be_lun;
1740 DPRINTF("entered\n");
1742 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
1745 * Make sure we only get SCSI I/O.
1747 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1748 "%#x) encountered", io->io_hdr.io_type));
1752 mtx_lock(&be_lun->queue_lock);
1753 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1754 mtx_unlock(&be_lun->queue_lock);
1755 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1757 return (CTL_RETVAL_COMPLETE);
1761 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1762 int flag, struct thread *td)
1764 struct ctl_be_block_softc *softc = &backend_block_softc;
1770 struct ctl_lun_req *lun_req;
1772 lun_req = (struct ctl_lun_req *)addr;
1774 switch (lun_req->reqtype) {
1775 case CTL_LUNREQ_CREATE:
1776 error = ctl_be_block_create(softc, lun_req);
1779 error = ctl_be_block_rm(softc, lun_req);
1781 case CTL_LUNREQ_MODIFY:
1782 error = ctl_be_block_modify(softc, lun_req);
1785 lun_req->status = CTL_LUN_ERROR;
1786 snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1787 "invalid LUN request type %d",
1802 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1804 struct ctl_be_lun *cbe_lun;
1805 struct ctl_be_block_filedata *file_data;
1806 struct ctl_lun_create_params *params;
1809 off_t ps, pss, po, pos, us, uss, uo, uos;
1812 cbe_lun = &be_lun->cbe_lun;
1813 file_data = &be_lun->backend.file;
1814 params = &be_lun->params;
1816 be_lun->dev_type = CTL_BE_BLOCK_FILE;
1817 be_lun->dispatch = ctl_be_block_dispatch_file;
1818 be_lun->lun_flush = ctl_be_block_flush_file;
1819 be_lun->get_lba_status = ctl_be_block_gls_file;
1820 be_lun->getattr = ctl_be_block_getattr_file;
1821 be_lun->unmap = NULL;
1822 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1824 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1826 snprintf(req->error_str, sizeof(req->error_str),
1827 "error calling VOP_GETATTR() for file %s",
1832 file_data->cred = crhold(curthread->td_ucred);
1833 if (params->lun_size_bytes != 0)
1834 be_lun->size_bytes = params->lun_size_bytes;
1836 be_lun->size_bytes = vattr.va_size;
1839 * For files we can use any logical block size. Prefer 512 bytes
1840 * for compatibility reasons. If file's vattr.va_blocksize
1841 * (preferred I/O block size) is bigger and multiple to chosen
1842 * logical block size -- report it as physical block size.
1844 if (params->blocksize_bytes != 0)
1845 cbe_lun->blocksize = params->blocksize_bytes;
1846 else if (cbe_lun->lun_type == T_CDROM)
1847 cbe_lun->blocksize = 2048;
1849 cbe_lun->blocksize = 512;
1850 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1851 cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1852 0 : (be_lun->size_blocks - 1);
1854 us = ps = vattr.va_blocksize;
1857 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
1859 ctl_expand_number(value, &ps);
1860 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
1862 ctl_expand_number(value, &po);
1863 pss = ps / cbe_lun->blocksize;
1864 pos = po / cbe_lun->blocksize;
1865 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1866 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1867 cbe_lun->pblockexp = fls(pss) - 1;
1868 cbe_lun->pblockoff = (pss - pos) % pss;
1871 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
1873 ctl_expand_number(value, &us);
1874 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
1876 ctl_expand_number(value, &uo);
1877 uss = us / cbe_lun->blocksize;
1878 uos = uo / cbe_lun->blocksize;
1879 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1880 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1881 cbe_lun->ublockexp = fls(uss) - 1;
1882 cbe_lun->ublockoff = (uss - uos) % uss;
1886 * Sanity check. The media size has to be at least one
1889 if (be_lun->size_bytes < cbe_lun->blocksize) {
1891 snprintf(req->error_str, sizeof(req->error_str),
1892 "file %s size %ju < block size %u", be_lun->dev_path,
1893 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1896 cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1901 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1903 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1904 struct ctl_lun_create_params *params;
1908 int error, atomic, maxio, ref, unmap, tmp;
1909 off_t ps, pss, po, pos, us, uss, uo, uos, otmp;
1911 params = &be_lun->params;
1913 be_lun->dev_type = CTL_BE_BLOCK_DEV;
1914 csw = devvn_refthread(be_lun->vn, &dev, &ref);
1917 if (strcmp(csw->d_name, "zvol") == 0) {
1918 be_lun->dispatch = ctl_be_block_dispatch_zvol;
1919 be_lun->get_lba_status = ctl_be_block_gls_zvol;
1920 atomic = maxio = CTLBLK_MAX_IO_SIZE;
1922 be_lun->dispatch = ctl_be_block_dispatch_dev;
1923 be_lun->get_lba_status = NULL;
1925 maxio = dev->si_iosize_max;
1928 if (maxio > CTLBLK_MAX_IO_SIZE)
1929 maxio = CTLBLK_MAX_IO_SIZE;
1931 be_lun->lun_flush = ctl_be_block_flush_dev;
1932 be_lun->getattr = ctl_be_block_getattr_dev;
1933 be_lun->unmap = ctl_be_block_unmap_dev;
1935 if (!csw->d_ioctl) {
1936 dev_relthread(dev, ref);
1937 snprintf(req->error_str, sizeof(req->error_str),
1938 "no d_ioctl for device %s!", be_lun->dev_path);
1942 error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
1945 dev_relthread(dev, ref);
1946 snprintf(req->error_str, sizeof(req->error_str),
1947 "error %d returned for DIOCGSECTORSIZE ioctl "
1948 "on %s!", error, be_lun->dev_path);
1953 * If the user has asked for a blocksize that is greater than the
1954 * backing device's blocksize, we can do it only if the blocksize
1955 * the user is asking for is an even multiple of the underlying
1956 * device's blocksize.
1958 if ((params->blocksize_bytes != 0) &&
1959 (params->blocksize_bytes >= tmp)) {
1960 if (params->blocksize_bytes % tmp == 0) {
1961 cbe_lun->blocksize = params->blocksize_bytes;
1963 dev_relthread(dev, ref);
1964 snprintf(req->error_str, sizeof(req->error_str),
1965 "requested blocksize %u is not an even "
1966 "multiple of backing device blocksize %u",
1967 params->blocksize_bytes, tmp);
1970 } else if (params->blocksize_bytes != 0) {
1971 dev_relthread(dev, ref);
1972 snprintf(req->error_str, sizeof(req->error_str),
1973 "requested blocksize %u < backing device "
1974 "blocksize %u", params->blocksize_bytes, tmp);
1976 } else if (cbe_lun->lun_type == T_CDROM)
1977 cbe_lun->blocksize = MAX(tmp, 2048);
1979 cbe_lun->blocksize = tmp;
1981 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
1984 dev_relthread(dev, ref);
1985 snprintf(req->error_str, sizeof(req->error_str),
1986 "error %d returned for DIOCGMEDIASIZE "
1987 " ioctl on %s!", error,
1992 if (params->lun_size_bytes != 0) {
1993 if (params->lun_size_bytes > otmp) {
1994 dev_relthread(dev, ref);
1995 snprintf(req->error_str, sizeof(req->error_str),
1996 "requested LUN size %ju > backing device "
1998 (uintmax_t)params->lun_size_bytes,
2003 be_lun->size_bytes = params->lun_size_bytes;
2005 be_lun->size_bytes = otmp;
2006 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2007 cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2008 0 : (be_lun->size_blocks - 1);
2010 error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2015 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2023 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
2025 ctl_expand_number(value, &ps);
2026 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
2028 ctl_expand_number(value, &po);
2029 pss = ps / cbe_lun->blocksize;
2030 pos = po / cbe_lun->blocksize;
2031 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2032 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2033 cbe_lun->pblockexp = fls(pss) - 1;
2034 cbe_lun->pblockoff = (pss - pos) % pss;
2037 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
2039 ctl_expand_number(value, &us);
2040 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
2042 ctl_expand_number(value, &uo);
2043 uss = us / cbe_lun->blocksize;
2044 uos = uo / cbe_lun->blocksize;
2045 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2046 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2047 cbe_lun->ublockexp = fls(uss) - 1;
2048 cbe_lun->ublockoff = (uss - uos) % uss;
2051 cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2052 cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2054 if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2057 struct diocgattr_arg arg;
2059 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2060 arg.len = sizeof(arg.value.i);
2061 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2063 unmap = (error == 0) ? arg.value.i : 0;
2065 value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
2067 unmap = (strcmp(value, "on") == 0);
2069 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2071 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2073 dev_relthread(dev, ref);
2078 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2080 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2085 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2087 (void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2090 switch (be_lun->dev_type) {
2091 case CTL_BE_BLOCK_DEV:
2093 case CTL_BE_BLOCK_FILE:
2094 if (be_lun->backend.file.cred != NULL) {
2095 crfree(be_lun->backend.file.cred);
2096 be_lun->backend.file.cred = NULL;
2099 case CTL_BE_BLOCK_NONE:
2102 panic("Unexpected backend type %d", be_lun->dev_type);
2105 be_lun->dev_type = CTL_BE_BLOCK_NONE;
2111 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2113 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2114 struct nameidata nd;
2119 if (rootvnode == NULL) {
2120 snprintf(req->error_str, sizeof(req->error_str),
2121 "Root filesystem is not mounted");
2126 value = dnvlist_get_string(cbe_lun->options, "file", NULL);
2127 if (value == NULL) {
2128 snprintf(req->error_str, sizeof(req->error_str),
2129 "no file argument specified");
2132 free(be_lun->dev_path, M_CTLBLK);
2133 be_lun->dev_path = strdup(value, M_CTLBLK);
2136 value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
2137 if (value != NULL) {
2138 if (strcmp(value, "on") != 0)
2140 } else if (cbe_lun->lun_type == T_DIRECT)
2144 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
2145 error = vn_open(&nd, &flags, 0, NULL);
2146 if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2152 * This is the only reasonable guess we can make as far as
2153 * path if the user doesn't give us a fully qualified path.
2154 * If they want to specify a file, they need to specify the
2157 if (be_lun->dev_path[0] != '/') {
2160 asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2162 free(be_lun->dev_path, M_CTLBLK);
2163 be_lun->dev_path = dev_name;
2166 snprintf(req->error_str, sizeof(req->error_str),
2167 "error opening %s: %d", be_lun->dev_path, error);
2171 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2173 cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2175 NDFREE(&nd, NDF_ONLY_PNBUF);
2176 be_lun->vn = nd.ni_vp;
2178 /* We only support disks and files. */
2179 if (vn_isdisk(be_lun->vn, &error)) {
2180 error = ctl_be_block_open_dev(be_lun, req);
2181 } else if (be_lun->vn->v_type == VREG) {
2182 error = ctl_be_block_open_file(be_lun, req);
2185 snprintf(req->error_str, sizeof(req->error_str),
2186 "%s is not a disk or plain file", be_lun->dev_path);
2188 VOP_UNLOCK(be_lun->vn, 0);
2191 ctl_be_block_close(be_lun);
2192 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2193 if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2194 cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2195 value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
2196 if (value != NULL && strcmp(value, "on") == 0)
2197 cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2198 else if (value != NULL && strcmp(value, "read") == 0)
2199 cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2200 else if (value != NULL && strcmp(value, "off") == 0)
2201 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2206 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2208 struct ctl_be_lun *cbe_lun;
2209 struct ctl_be_block_lun *be_lun;
2210 struct ctl_lun_create_params *params;
2211 char num_thread_str[16];
2214 int retval, num_threads;
2215 int tmp_num_threads;
2217 params = &req->reqdata.create;
2219 req->status = CTL_LUN_OK;
2221 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2222 cbe_lun = &be_lun->cbe_lun;
2223 be_lun->params = req->reqdata.create;
2224 be_lun->softc = softc;
2225 STAILQ_INIT(&be_lun->input_queue);
2226 STAILQ_INIT(&be_lun->config_read_queue);
2227 STAILQ_INIT(&be_lun->config_write_queue);
2228 STAILQ_INIT(&be_lun->datamove_queue);
2229 mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF);
2230 mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF);
2231 cbe_lun->options = nvlist_clone(req->args_nvl);
2233 if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2234 cbe_lun->lun_type = params->device_type;
2236 cbe_lun->lun_type = T_DIRECT;
2239 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2240 if (value != NULL) {
2241 if (strcmp(value, "primary") == 0)
2242 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2243 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2244 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2246 if (cbe_lun->lun_type == T_DIRECT ||
2247 cbe_lun->lun_type == T_CDROM) {
2248 be_lun->size_bytes = params->lun_size_bytes;
2249 if (params->blocksize_bytes != 0)
2250 cbe_lun->blocksize = params->blocksize_bytes;
2251 else if (cbe_lun->lun_type == T_CDROM)
2252 cbe_lun->blocksize = 2048;
2254 cbe_lun->blocksize = 512;
2255 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2256 cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2257 0 : (be_lun->size_blocks - 1);
2259 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2260 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2261 retval = ctl_be_block_open(be_lun, req);
2264 req->status = CTL_LUN_WARNING;
2267 num_threads = cbb_num_threads;
2272 value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL);
2273 if (value != NULL) {
2274 tmp_num_threads = strtol(value, NULL, 0);
2277 * We don't let the user specify less than one
2278 * thread, but hope he's clueful enough not to
2279 * specify 1000 threads.
2281 if (tmp_num_threads < 1) {
2282 snprintf(req->error_str, sizeof(req->error_str),
2283 "invalid number of threads %s",
2287 num_threads = tmp_num_threads;
2290 if (be_lun->vn == NULL)
2291 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2292 /* Tell the user the blocksize we ended up using */
2293 params->lun_size_bytes = be_lun->size_bytes;
2294 params->blocksize_bytes = cbe_lun->blocksize;
2295 if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2296 cbe_lun->req_lun_id = params->req_lun_id;
2297 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2299 cbe_lun->req_lun_id = 0;
2301 cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2302 cbe_lun->be = &ctl_be_block_driver;
2304 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2305 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
2307 strncpy((char *)cbe_lun->serial_num, tmpstr,
2308 MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2310 /* Tell the user what we used for a serial number */
2311 strncpy((char *)params->serial_num, tmpstr,
2312 MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2314 strncpy((char *)cbe_lun->serial_num, params->serial_num,
2315 MIN(sizeof(cbe_lun->serial_num),
2316 sizeof(params->serial_num)));
2318 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2319 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
2320 strncpy((char *)cbe_lun->device_id, tmpstr,
2321 MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2323 /* Tell the user what we used for a device ID */
2324 strncpy((char *)params->device_id, tmpstr,
2325 MIN(sizeof(params->device_id), sizeof(tmpstr)));
2327 strncpy((char *)cbe_lun->device_id, params->device_id,
2328 MIN(sizeof(cbe_lun->device_id),
2329 sizeof(params->device_id)));
2332 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2334 be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK,
2335 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2337 if (be_lun->io_taskqueue == NULL) {
2338 snprintf(req->error_str, sizeof(req->error_str),
2339 "unable to create taskqueue");
2344 * Note that we start the same number of threads by default for
2345 * both the file case and the block device case. For the file
2346 * case, we need multiple threads to allow concurrency, because the
2347 * vnode interface is designed to be a blocking interface. For the
2348 * block device case, ZFS zvols at least will block the caller's
2349 * context in many instances, and so we need multiple threads to
2350 * overcome that problem. Other block devices don't need as many
2351 * threads, but they shouldn't cause too many problems.
2353 * If the user wants to just have a single thread for a block
2354 * device, he can specify that when the LUN is created, or change
2355 * the tunable/sysctl to alter the default number of threads.
2357 retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue,
2358 /*num threads*/num_threads,
2360 /*proc*/control_softc->ctl_proc,
2361 /*thread name*/"block");
2366 be_lun->num_threads = num_threads;
2368 retval = ctl_add_lun(&be_lun->cbe_lun);
2370 snprintf(req->error_str, sizeof(req->error_str),
2371 "ctl_add_lun() returned error %d, see dmesg for "
2377 be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id,
2379 DEVSTAT_ALL_SUPPORTED,
2381 | DEVSTAT_TYPE_IF_OTHER,
2382 DEVSTAT_PRIORITY_OTHER);
2384 mtx_lock(&softc->lock);
2386 SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links);
2387 mtx_unlock(&softc->lock);
2389 params->req_lun_id = cbe_lun->lun_id;
2394 req->status = CTL_LUN_ERROR;
2396 if (be_lun->io_taskqueue != NULL)
2397 taskqueue_free(be_lun->io_taskqueue);
2398 ctl_be_block_close(be_lun);
2399 if (be_lun->dev_path != NULL)
2400 free(be_lun->dev_path, M_CTLBLK);
2401 nvlist_destroy(cbe_lun->options);
2402 mtx_destroy(&be_lun->queue_lock);
2403 mtx_destroy(&be_lun->io_lock);
2404 free(be_lun, M_CTLBLK);
2410 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2412 struct ctl_lun_rm_params *params;
2413 struct ctl_be_block_lun *be_lun;
2414 struct ctl_be_lun *cbe_lun;
2417 params = &req->reqdata.rm;
2419 sx_xlock(&softc->modify_lock);
2420 mtx_lock(&softc->lock);
2421 SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2422 if (be_lun->cbe_lun.lun_id == params->lun_id) {
2423 SLIST_REMOVE(&softc->lun_list, be_lun,
2424 ctl_be_block_lun, links);
2429 mtx_unlock(&softc->lock);
2430 sx_xunlock(&softc->modify_lock);
2431 if (be_lun == NULL) {
2432 snprintf(req->error_str, sizeof(req->error_str),
2433 "LUN %u is not managed by the block backend",
2437 cbe_lun = &be_lun->cbe_lun;
2439 if (be_lun->vn != NULL) {
2440 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2441 ctl_lun_no_media(cbe_lun);
2442 taskqueue_drain_all(be_lun->io_taskqueue);
2443 ctl_be_block_close(be_lun);
2446 mtx_lock(&softc->lock);
2447 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2448 mtx_unlock(&softc->lock);
2450 retval = ctl_remove_lun(cbe_lun);
2452 snprintf(req->error_str, sizeof(req->error_str),
2453 "error %d returned from ctl_remove_lun() for "
2454 "LUN %d", retval, params->lun_id);
2455 mtx_lock(&softc->lock);
2456 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2457 mtx_unlock(&softc->lock);
2461 mtx_lock(&softc->lock);
2462 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2463 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0);
2464 if (retval == EINTR)
2467 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2468 if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2469 mtx_unlock(&softc->lock);
2470 free(be_lun, M_CTLBLK);
2472 mtx_unlock(&softc->lock);
2476 req->status = CTL_LUN_OK;
2480 req->status = CTL_LUN_ERROR;
2485 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2487 struct ctl_lun_modify_params *params;
2488 struct ctl_be_block_lun *be_lun;
2489 struct ctl_be_lun *cbe_lun;
2494 params = &req->reqdata.modify;
2496 sx_xlock(&softc->modify_lock);
2497 mtx_lock(&softc->lock);
2498 SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2499 if (be_lun->cbe_lun.lun_id == params->lun_id)
2502 mtx_unlock(&softc->lock);
2503 if (be_lun == NULL) {
2504 snprintf(req->error_str, sizeof(req->error_str),
2505 "LUN %u is not managed by the block backend",
2509 cbe_lun = &be_lun->cbe_lun;
2511 if (params->lun_size_bytes != 0)
2512 be_lun->params.lun_size_bytes = params->lun_size_bytes;
2514 if (req->args_nvl != NULL) {
2515 nvlist_destroy(cbe_lun->options);
2516 cbe_lun->options = nvlist_clone(req->args_nvl);
2519 wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2520 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2521 if (value != NULL) {
2522 if (strcmp(value, "primary") == 0)
2523 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2525 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2526 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2527 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2529 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2530 if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2531 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2532 ctl_lun_primary(cbe_lun);
2534 ctl_lun_secondary(cbe_lun);
2537 oldsize = be_lun->size_blocks;
2538 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2539 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2540 if (be_lun->vn == NULL)
2541 error = ctl_be_block_open(be_lun, req);
2542 else if (vn_isdisk(be_lun->vn, &error))
2543 error = ctl_be_block_open_dev(be_lun, req);
2544 else if (be_lun->vn->v_type == VREG) {
2545 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2546 error = ctl_be_block_open_file(be_lun, req);
2547 VOP_UNLOCK(be_lun->vn, 0);
2550 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
2551 be_lun->vn != NULL) {
2552 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2553 ctl_lun_has_media(cbe_lun);
2554 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
2555 be_lun->vn == NULL) {
2556 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2557 ctl_lun_no_media(cbe_lun);
2559 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2561 if (be_lun->vn != NULL) {
2562 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2563 ctl_lun_no_media(cbe_lun);
2564 taskqueue_drain_all(be_lun->io_taskqueue);
2565 error = ctl_be_block_close(be_lun);
2569 if (be_lun->size_blocks != oldsize)
2570 ctl_lun_capacity_changed(cbe_lun);
2572 /* Tell the user the exact size we ended up using */
2573 params->lun_size_bytes = be_lun->size_bytes;
2575 sx_xunlock(&softc->modify_lock);
2576 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2580 sx_xunlock(&softc->modify_lock);
2581 req->status = CTL_LUN_ERROR;
2586 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun)
2588 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun;
2589 struct ctl_be_block_softc *softc = be_lun->softc;
2591 taskqueue_drain_all(be_lun->io_taskqueue);
2592 taskqueue_free(be_lun->io_taskqueue);
2593 if (be_lun->disk_stats != NULL)
2594 devstat_remove_entry(be_lun->disk_stats);
2595 nvlist_destroy(be_lun->cbe_lun.options);
2596 free(be_lun->dev_path, M_CTLBLK);
2597 mtx_destroy(&be_lun->queue_lock);
2598 mtx_destroy(&be_lun->io_lock);
2600 mtx_lock(&softc->lock);
2601 be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2602 if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2605 free(be_lun, M_CTLBLK);
2606 mtx_unlock(&softc->lock);
2610 ctl_be_block_config_write(union ctl_io *io)
2612 struct ctl_be_block_lun *be_lun;
2613 struct ctl_be_lun *cbe_lun;
2616 DPRINTF("entered\n");
2618 cbe_lun = CTL_BACKEND_LUN(io);
2619 be_lun = (struct ctl_be_block_lun *)cbe_lun;
2622 switch (io->scsiio.cdb[0]) {
2623 case SYNCHRONIZE_CACHE:
2624 case SYNCHRONIZE_CACHE_16:
2629 * The upper level CTL code will filter out any CDBs with
2630 * the immediate bit set and return the proper error.
2632 * We don't really need to worry about what LBA range the
2633 * user asked to be synced out. When they issue a sync
2634 * cache command, we'll sync out the whole thing.
2636 mtx_lock(&be_lun->queue_lock);
2637 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2639 mtx_unlock(&be_lun->queue_lock);
2640 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2642 case START_STOP_UNIT: {
2643 struct scsi_start_stop_unit *cdb;
2644 struct ctl_lun_req req;
2646 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2647 if ((cdb->how & SSS_PC_MASK) != 0) {
2648 ctl_set_success(&io->scsiio);
2649 ctl_config_write_done(io);
2652 if (cdb->how & SSS_START) {
2653 if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
2654 retval = ctl_be_block_open(be_lun, &req);
2655 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2657 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2658 ctl_lun_has_media(cbe_lun);
2660 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2661 ctl_lun_no_media(cbe_lun);
2664 ctl_start_lun(cbe_lun);
2666 ctl_stop_lun(cbe_lun);
2667 if (cdb->how & SSS_LOEJ) {
2668 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2669 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
2670 ctl_lun_ejected(cbe_lun);
2671 if (be_lun->vn != NULL)
2672 ctl_be_block_close(be_lun);
2676 ctl_set_success(&io->scsiio);
2677 ctl_config_write_done(io);
2681 ctl_set_success(&io->scsiio);
2682 ctl_config_write_done(io);
2685 ctl_set_invalid_opcode(&io->scsiio);
2686 ctl_config_write_done(io);
2687 retval = CTL_RETVAL_COMPLETE;
2695 ctl_be_block_config_read(union ctl_io *io)
2697 struct ctl_be_block_lun *be_lun;
2700 DPRINTF("entered\n");
2702 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
2704 switch (io->scsiio.cdb[0]) {
2705 case SERVICE_ACTION_IN:
2706 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2707 mtx_lock(&be_lun->queue_lock);
2708 STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2709 &io->io_hdr, links);
2710 mtx_unlock(&be_lun->queue_lock);
2711 taskqueue_enqueue(be_lun->io_taskqueue,
2713 retval = CTL_RETVAL_QUEUED;
2716 ctl_set_invalid_field(&io->scsiio,
2722 ctl_config_read_done(io);
2723 retval = CTL_RETVAL_COMPLETE;
2726 ctl_set_invalid_opcode(&io->scsiio);
2727 ctl_config_read_done(io);
2728 retval = CTL_RETVAL_COMPLETE;
2736 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb)
2738 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2741 retval = sbuf_printf(sb, "\t<num_threads>");
2744 retval = sbuf_printf(sb, "%d", lun->num_threads);
2747 retval = sbuf_printf(sb, "</num_threads>\n");
2754 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname)
2756 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2758 if (lun->getattr == NULL)
2759 return (UINT64_MAX);
2760 return (lun->getattr(lun, attrname));
2764 ctl_be_block_init(void)
2766 struct ctl_be_block_softc *softc = &backend_block_softc;
2768 sx_init(&softc->modify_lock, "ctlblock modify");
2769 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2770 softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2771 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2772 softc->buf_zone = uma_zcreate("ctlblock", CTLBLK_MAX_SEG,
2773 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2774 SLIST_INIT(&softc->lun_list);
2780 ctl_be_block_shutdown(void)
2782 struct ctl_be_block_softc *softc = &backend_block_softc;
2783 struct ctl_be_block_lun *lun;
2785 mtx_lock(&softc->lock);
2786 while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) {
2787 SLIST_REMOVE_HEAD(&softc->lun_list, links);
2790 * Drop our lock here. Since ctl_remove_lun() can call
2791 * back into us, this could potentially lead to a recursive
2792 * lock of the same mutex, which would cause a hang.
2794 mtx_unlock(&softc->lock);
2795 ctl_remove_lun(&lun->cbe_lun);
2796 mtx_lock(&softc->lock);
2798 mtx_unlock(&softc->lock);
2799 uma_zdestroy(softc->buf_zone);
2800 uma_zdestroy(softc->beio_zone);
2801 mtx_destroy(&softc->lock);
2802 sx_destroy(&softc->modify_lock);