]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/cam/ctl/ctl_backend_block.c
MFC r267481, r267952:
[FreeBSD/stable/10.git] / sys / cam / ctl / ctl_backend_block.c
1 /*-
2  * Copyright (c) 2003 Silicon Graphics International Corp.
3  * Copyright (c) 2009-2011 Spectra Logic Corporation
4  * Copyright (c) 2012 The FreeBSD Foundation
5  * All rights reserved.
6  *
7  * Portions of this software were developed by Edward Tomasz Napierala
8  * under sponsorship from the FreeBSD Foundation.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions, and the following disclaimer,
15  *    without modification.
16  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
17  *    substantially similar to the "NO WARRANTY" disclaimer below
18  *    ("Disclaimer") and any redistribution must be conditioned upon
19  *    including a substantially similar Disclaimer requirement for further
20  *    binary redistribution.
21  *
22  * NO WARRANTY
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGES.
34  *
35  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
36  */
37 /*
38  * CAM Target Layer driver backend for block devices.
39  *
40  * Author: Ken Merry <ken@FreeBSD.org>
41  */
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
44
45 #include <opt_kdtrace.h>
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/types.h>
51 #include <sys/kthread.h>
52 #include <sys/bio.h>
53 #include <sys/fcntl.h>
54 #include <sys/limits.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/condvar.h>
58 #include <sys/malloc.h>
59 #include <sys/conf.h>
60 #include <sys/ioccom.h>
61 #include <sys/queue.h>
62 #include <sys/sbuf.h>
63 #include <sys/endian.h>
64 #include <sys/uio.h>
65 #include <sys/buf.h>
66 #include <sys/taskqueue.h>
67 #include <sys/vnode.h>
68 #include <sys/namei.h>
69 #include <sys/mount.h>
70 #include <sys/disk.h>
71 #include <sys/fcntl.h>
72 #include <sys/filedesc.h>
73 #include <sys/proc.h>
74 #include <sys/pcpu.h>
75 #include <sys/module.h>
76 #include <sys/sdt.h>
77 #include <sys/devicestat.h>
78 #include <sys/sysctl.h>
79
80 #include <geom/geom.h>
81
82 #include <cam/cam.h>
83 #include <cam/scsi/scsi_all.h>
84 #include <cam/scsi/scsi_da.h>
85 #include <cam/ctl/ctl_io.h>
86 #include <cam/ctl/ctl.h>
87 #include <cam/ctl/ctl_backend.h>
88 #include <cam/ctl/ctl_frontend_internal.h>
89 #include <cam/ctl/ctl_ioctl.h>
90 #include <cam/ctl/ctl_scsi_all.h>
91 #include <cam/ctl/ctl_error.h>
92
93 /*
94  * The idea here is that we'll allocate enough S/G space to hold a 1MB
95  * I/O.  If we get an I/O larger than that, we'll split it.
96  */
97 #define CTLBLK_MAX_IO_SIZE      (1024 * 1024)
98 #define CTLBLK_MAX_SEG          MAXPHYS
99 #define CTLBLK_MAX_SEGS         MAX(CTLBLK_MAX_IO_SIZE / CTLBLK_MAX_SEG, 1)
100
101 #ifdef CTLBLK_DEBUG
102 #define DPRINTF(fmt, args...) \
103     printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
104 #else
105 #define DPRINTF(fmt, args...) do {} while(0)
106 #endif
107
108 SDT_PROVIDER_DEFINE(cbb);
109
110 typedef enum {
111         CTL_BE_BLOCK_LUN_UNCONFIGURED   = 0x01,
112         CTL_BE_BLOCK_LUN_CONFIG_ERR     = 0x02,
113         CTL_BE_BLOCK_LUN_WAITING        = 0x04,
114         CTL_BE_BLOCK_LUN_MULTI_THREAD   = 0x08
115 } ctl_be_block_lun_flags;
116
117 typedef enum {
118         CTL_BE_BLOCK_NONE,
119         CTL_BE_BLOCK_DEV,
120         CTL_BE_BLOCK_FILE
121 } ctl_be_block_type;
122
123 struct ctl_be_block_devdata {
124         struct cdev *cdev;
125         struct cdevsw *csw;
126         int dev_ref;
127 };
128
129 struct ctl_be_block_filedata {
130         struct ucred *cred;
131 };
132
133 union ctl_be_block_bedata {
134         struct ctl_be_block_devdata dev;
135         struct ctl_be_block_filedata file;
136 };
137
138 struct ctl_be_block_io;
139 struct ctl_be_block_lun;
140
141 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
142                                struct ctl_be_block_io *beio);
143
144 /*
145  * Backend LUN structure.  There is a 1:1 mapping between a block device
146  * and a backend block LUN, and between a backend block LUN and a CTL LUN.
147  */
148 struct ctl_be_block_lun {
149         struct ctl_block_disk *disk;
150         char lunname[32];
151         char *dev_path;
152         ctl_be_block_type dev_type;
153         struct vnode *vn;
154         union ctl_be_block_bedata backend;
155         cbb_dispatch_t dispatch;
156         cbb_dispatch_t lun_flush;
157         cbb_dispatch_t unmap;
158         struct mtx lock;
159         uma_zone_t lun_zone;
160         uint64_t size_blocks;
161         uint64_t size_bytes;
162         uint32_t blocksize;
163         int blocksize_shift;
164         uint16_t pblockexp;
165         uint16_t pblockoff;
166         struct ctl_be_block_softc *softc;
167         struct devstat *disk_stats;
168         ctl_be_block_lun_flags flags;
169         STAILQ_ENTRY(ctl_be_block_lun) links;
170         struct ctl_be_lun ctl_be_lun;
171         struct taskqueue *io_taskqueue;
172         struct task io_task;
173         int num_threads;
174         STAILQ_HEAD(, ctl_io_hdr) input_queue;
175         STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
176         STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
177 };
178
179 /*
180  * Overall softc structure for the block backend module.
181  */
182 struct ctl_be_block_softc {
183         struct mtx                       lock;
184         int                              num_disks;
185         STAILQ_HEAD(, ctl_block_disk)    disk_list;
186         int                              num_luns;
187         STAILQ_HEAD(, ctl_be_block_lun)  lun_list;
188 };
189
190 static struct ctl_be_block_softc backend_block_softc;
191
192 /*
193  * Per-I/O information.
194  */
195 struct ctl_be_block_io {
196         union ctl_io                    *io;
197         struct ctl_sg_entry             sg_segs[CTLBLK_MAX_SEGS];
198         struct iovec                    xiovecs[CTLBLK_MAX_SEGS];
199         int                             bio_cmd;
200         int                             bio_flags;
201         int                             num_segs;
202         int                             num_bios_sent;
203         int                             num_bios_done;
204         int                             send_complete;
205         int                             num_errors;
206         struct bintime                  ds_t0;
207         devstat_tag_type                ds_tag_type;
208         devstat_trans_flags             ds_trans_type;
209         uint64_t                        io_len;
210         uint64_t                        io_offset;
211         struct ctl_be_block_softc       *softc;
212         struct ctl_be_block_lun         *lun;
213         void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
214 };
215
216 static int cbb_num_threads = 14;
217 TUNABLE_INT("kern.cam.ctl.block.num_threads", &cbb_num_threads);
218 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
219             "CAM Target Layer Block Backend");
220 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RW,
221            &cbb_num_threads, 0, "Number of threads per backing file");
222
223 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
224 static void ctl_free_beio(struct ctl_be_block_io *beio);
225 static void ctl_complete_beio(struct ctl_be_block_io *beio);
226 static int ctl_be_block_move_done(union ctl_io *io);
227 static void ctl_be_block_biodone(struct bio *bio);
228 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
229                                     struct ctl_be_block_io *beio);
230 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
231                                        struct ctl_be_block_io *beio);
232 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
233                                    struct ctl_be_block_io *beio);
234 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
235                                    struct ctl_be_block_io *beio);
236 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
237                                       struct ctl_be_block_io *beio);
238 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
239                                     union ctl_io *io);
240 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
241                                   union ctl_io *io);
242 static void ctl_be_block_worker(void *context, int pending);
243 static int ctl_be_block_submit(union ctl_io *io);
244 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
245                                    int flag, struct thread *td);
246 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
247                                   struct ctl_lun_req *req);
248 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
249                                  struct ctl_lun_req *req);
250 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
251 static int ctl_be_block_open(struct ctl_be_block_softc *softc,
252                              struct ctl_be_block_lun *be_lun,
253                              struct ctl_lun_req *req);
254 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
255                                struct ctl_lun_req *req);
256 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
257                            struct ctl_lun_req *req);
258 static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
259                                   struct ctl_lun_req *req);
260 static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
261                                  struct ctl_lun_req *req);
262 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
263                            struct ctl_lun_req *req);
264 static void ctl_be_block_lun_shutdown(void *be_lun);
265 static void ctl_be_block_lun_config_status(void *be_lun,
266                                            ctl_lun_config_status status);
267 static int ctl_be_block_config_write(union ctl_io *io);
268 static int ctl_be_block_config_read(union ctl_io *io);
269 static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
270 int ctl_be_block_init(void);
271
272 static struct ctl_backend_driver ctl_be_block_driver = 
273 {
274         .name = "block",
275         .flags = CTL_BE_FLAG_HAS_CONFIG,
276         .init = ctl_be_block_init,
277         .data_submit = ctl_be_block_submit,
278         .data_move_done = ctl_be_block_move_done,
279         .config_read = ctl_be_block_config_read,
280         .config_write = ctl_be_block_config_write,
281         .ioctl = ctl_be_block_ioctl,
282         .lun_info = ctl_be_block_lun_info
283 };
284
285 MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
286 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
287
288 static uma_zone_t beio_zone;
289
290 static struct ctl_be_block_io *
291 ctl_alloc_beio(struct ctl_be_block_softc *softc)
292 {
293         struct ctl_be_block_io *beio;
294
295         beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
296         beio->softc = softc;
297         return (beio);
298 }
299
300 static void
301 ctl_free_beio(struct ctl_be_block_io *beio)
302 {
303         int duplicate_free;
304         int i;
305
306         duplicate_free = 0;
307
308         for (i = 0; i < beio->num_segs; i++) {
309                 if (beio->sg_segs[i].addr == NULL)
310                         duplicate_free++;
311
312                 uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
313                 beio->sg_segs[i].addr = NULL;
314         }
315
316         if (duplicate_free > 0) {
317                 printf("%s: %d duplicate frees out of %d segments\n", __func__,
318                        duplicate_free, beio->num_segs);
319         }
320
321         uma_zfree(beio_zone, beio);
322 }
323
324 static void
325 ctl_complete_beio(struct ctl_be_block_io *beio)
326 {
327         union ctl_io *io;
328         int io_len;
329
330         io = beio->io;
331
332         if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)
333                 io_len = beio->io_len;
334         else
335                 io_len = 0;
336
337         devstat_end_transaction(beio->lun->disk_stats,
338                                 /*bytes*/ io_len,
339                                 beio->ds_tag_type,
340                                 beio->ds_trans_type,
341                                 /*now*/ NULL,
342                                 /*then*/&beio->ds_t0);
343
344         if (beio->beio_cont != NULL) {
345                 beio->beio_cont(beio);
346         } else {
347                 ctl_free_beio(beio);
348                 ctl_done(io);
349         }
350 }
351
352 static int
353 ctl_be_block_move_done(union ctl_io *io)
354 {
355         struct ctl_be_block_io *beio;
356         struct ctl_be_block_lun *be_lun;
357 #ifdef CTL_TIME_IO
358         struct bintime cur_bt;
359 #endif  
360
361         beio = (struct ctl_be_block_io *)
362                 io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr;
363
364         be_lun = beio->lun;
365
366         DPRINTF("entered\n");
367
368 #ifdef CTL_TIME_IO
369         getbintime(&cur_bt);
370         bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
371         bintime_add(&io->io_hdr.dma_bt, &cur_bt);
372         io->io_hdr.num_dmas++;
373 #endif  
374
375         /*
376          * We set status at this point for read commands, and write
377          * commands with errors.
378          */
379         if ((beio->bio_cmd == BIO_READ)
380          && (io->io_hdr.port_status == 0)
381          && ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0)
382          && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE))
383                 ctl_set_success(&io->scsiio);
384         else if ((io->io_hdr.port_status != 0)
385               && ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0)
386               && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
387                 /*
388                  * For hardware error sense keys, the sense key
389                  * specific value is defined to be a retry count,
390                  * but we use it to pass back an internal FETD
391                  * error code.  XXX KDM  Hopefully the FETD is only
392                  * using 16 bits for an error code, since that's
393                  * all the space we have in the sks field.
394                  */
395                 ctl_set_internal_failure(&io->scsiio,
396                                          /*sks_valid*/ 1,
397                                          /*retry_count*/
398                                          io->io_hdr.port_status);
399         }
400
401         /*
402          * If this is a read, or a write with errors, it is done.
403          */
404         if ((beio->bio_cmd == BIO_READ)
405          || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
406          || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
407                 ctl_complete_beio(beio);
408                 return (0);
409         }
410
411         /*
412          * At this point, we have a write and the DMA completed
413          * successfully.  We now have to queue it to the task queue to
414          * execute the backend I/O.  That is because we do blocking
415          * memory allocations, and in the file backing case, blocking I/O.
416          * This move done routine is generally called in the SIM's
417          * interrupt context, and therefore we cannot block.
418          */
419         mtx_lock(&be_lun->lock);
420         /*
421          * XXX KDM make sure that links is okay to use at this point.
422          * Otherwise, we either need to add another field to ctl_io_hdr,
423          * or deal with resource allocation here.
424          */
425         STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
426         mtx_unlock(&be_lun->lock);
427
428         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
429
430         return (0);
431 }
432
433 static void
434 ctl_be_block_biodone(struct bio *bio)
435 {
436         struct ctl_be_block_io *beio;
437         struct ctl_be_block_lun *be_lun;
438         union ctl_io *io;
439         int error;
440
441         beio = bio->bio_caller1;
442         be_lun = beio->lun;
443         io = beio->io;
444
445         DPRINTF("entered\n");
446
447         error = bio->bio_error;
448         mtx_lock(&be_lun->lock);
449         if (error != 0)
450                 beio->num_errors++;
451
452         beio->num_bios_done++;
453
454         /*
455          * XXX KDM will this cause WITNESS to complain?  Holding a lock
456          * during the free might cause it to complain.
457          */
458         g_destroy_bio(bio);
459
460         /*
461          * If the send complete bit isn't set, or we aren't the last I/O to
462          * complete, then we're done.
463          */
464         if ((beio->send_complete == 0)
465          || (beio->num_bios_done < beio->num_bios_sent)) {
466                 mtx_unlock(&be_lun->lock);
467                 return;
468         }
469
470         /*
471          * At this point, we've verified that we are the last I/O to
472          * complete, so it's safe to drop the lock.
473          */
474         mtx_unlock(&be_lun->lock);
475
476         /*
477          * If there are any errors from the backing device, we fail the
478          * entire I/O with a medium error.
479          */
480         if (beio->num_errors > 0) {
481                 if (error == EOPNOTSUPP) {
482                         ctl_set_invalid_opcode(&io->scsiio);
483                 } else if (beio->bio_cmd == BIO_FLUSH) {
484                         /* XXX KDM is there is a better error here? */
485                         ctl_set_internal_failure(&io->scsiio,
486                                                  /*sks_valid*/ 1,
487                                                  /*retry_count*/ 0xbad2);
488                 } else
489                         ctl_set_medium_error(&io->scsiio);
490                 ctl_complete_beio(beio);
491                 return;
492         }
493
494         /*
495          * If this is a write, a flush or a delete, we're all done.
496          * If this is a read, we can now send the data to the user.
497          */
498         if ((beio->bio_cmd == BIO_WRITE)
499          || (beio->bio_cmd == BIO_FLUSH)
500          || (beio->bio_cmd == BIO_DELETE)) {
501                 ctl_set_success(&io->scsiio);
502                 ctl_complete_beio(beio);
503         } else {
504 #ifdef CTL_TIME_IO
505                 getbintime(&io->io_hdr.dma_start_bt);
506 #endif  
507                 ctl_datamove(io);
508         }
509 }
510
511 static void
512 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
513                         struct ctl_be_block_io *beio)
514 {
515         union ctl_io *io;
516         struct mount *mountpoint;
517         int error, lock_flags;
518
519         DPRINTF("entered\n");
520
521         io = beio->io;
522
523         (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
524
525         if (MNT_SHARED_WRITES(mountpoint)
526          || ((mountpoint == NULL)
527           && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
528                 lock_flags = LK_SHARED;
529         else
530                 lock_flags = LK_EXCLUSIVE;
531
532         vn_lock(be_lun->vn, lock_flags | LK_RETRY);
533
534         binuptime(&beio->ds_t0);
535         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
536
537         error = VOP_FSYNC(be_lun->vn, MNT_WAIT, curthread);
538         VOP_UNLOCK(be_lun->vn, 0);
539
540         vn_finished_write(mountpoint);
541
542         if (error == 0)
543                 ctl_set_success(&io->scsiio);
544         else {
545                 /* XXX KDM is there is a better error here? */
546                 ctl_set_internal_failure(&io->scsiio,
547                                          /*sks_valid*/ 1,
548                                          /*retry_count*/ 0xbad1);
549         }
550
551         ctl_complete_beio(beio);
552 }
553
554 SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t");
555 SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t");
556 SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t");
557 SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t");
558
559 static void
560 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
561                            struct ctl_be_block_io *beio)
562 {
563         struct ctl_be_block_filedata *file_data;
564         union ctl_io *io;
565         struct uio xuio;
566         struct iovec *xiovec;
567         int flags;
568         int error, i;
569
570         DPRINTF("entered\n");
571
572         file_data = &be_lun->backend.file;
573         io = beio->io;
574         flags = beio->bio_flags;
575
576         if (beio->bio_cmd == BIO_READ) {
577                 SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
578         } else {
579                 SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
580         }
581
582         bzero(&xuio, sizeof(xuio));
583         if (beio->bio_cmd == BIO_READ)
584                 xuio.uio_rw = UIO_READ;
585         else
586                 xuio.uio_rw = UIO_WRITE;
587
588         xuio.uio_offset = beio->io_offset;
589         xuio.uio_resid = beio->io_len;
590         xuio.uio_segflg = UIO_SYSSPACE;
591         xuio.uio_iov = beio->xiovecs;
592         xuio.uio_iovcnt = beio->num_segs;
593         xuio.uio_td = curthread;
594
595         for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
596                 xiovec->iov_base = beio->sg_segs[i].addr;
597                 xiovec->iov_len = beio->sg_segs[i].len;
598         }
599
600         if (beio->bio_cmd == BIO_READ) {
601                 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
602
603                 binuptime(&beio->ds_t0);
604                 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
605
606                 /*
607                  * UFS pays attention to IO_DIRECT for reads.  If the
608                  * DIRECTIO option is configured into the kernel, it calls
609                  * ffs_rawread().  But that only works for single-segment
610                  * uios with user space addresses.  In our case, with a
611                  * kernel uio, it still reads into the buffer cache, but it
612                  * will just try to release the buffer from the cache later
613                  * on in ffs_read().
614                  *
615                  * ZFS does not pay attention to IO_DIRECT for reads.
616                  *
617                  * UFS does not pay attention to IO_SYNC for reads.
618                  *
619                  * ZFS pays attention to IO_SYNC (which translates into the
620                  * Solaris define FRSYNC for zfs_read()) for reads.  It
621                  * attempts to sync the file before reading.
622                  *
623                  * So, to attempt to provide some barrier semantics in the
624                  * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC.
625                  */
626                 error = VOP_READ(be_lun->vn, &xuio, (flags & BIO_ORDERED) ?
627                                  (IO_DIRECT|IO_SYNC) : 0, file_data->cred);
628
629                 VOP_UNLOCK(be_lun->vn, 0);
630         } else {
631                 struct mount *mountpoint;
632                 int lock_flags;
633
634                 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
635
636                 if (MNT_SHARED_WRITES(mountpoint)
637                  || ((mountpoint == NULL)
638                   && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
639                         lock_flags = LK_SHARED;
640                 else
641                         lock_flags = LK_EXCLUSIVE;
642
643                 vn_lock(be_lun->vn, lock_flags | LK_RETRY);
644
645                 binuptime(&beio->ds_t0);
646                 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
647
648                 /*
649                  * UFS pays attention to IO_DIRECT for writes.  The write
650                  * is done asynchronously.  (Normally the write would just
651                  * get put into cache.
652                  *
653                  * UFS pays attention to IO_SYNC for writes.  It will
654                  * attempt to write the buffer out synchronously if that
655                  * flag is set.
656                  *
657                  * ZFS does not pay attention to IO_DIRECT for writes.
658                  *
659                  * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
660                  * for writes.  It will flush the transaction from the
661                  * cache before returning.
662                  *
663                  * So if we've got the BIO_ORDERED flag set, we want
664                  * IO_SYNC in either the UFS or ZFS case.
665                  */
666                 error = VOP_WRITE(be_lun->vn, &xuio, (flags & BIO_ORDERED) ?
667                                   IO_SYNC : 0, file_data->cred);
668                 VOP_UNLOCK(be_lun->vn, 0);
669
670                 vn_finished_write(mountpoint);
671         }
672
673         /*
674          * If we got an error, set the sense data to "MEDIUM ERROR" and
675          * return the I/O to the user.
676          */
677         if (error != 0) {
678                 char path_str[32];
679
680                 ctl_scsi_path_string(io, path_str, sizeof(path_str));
681                 /*
682                  * XXX KDM ZFS returns ENOSPC when the underlying
683                  * filesystem fills up.  What kind of SCSI error should we
684                  * return for that?
685                  */
686                 printf("%s%s command returned errno %d\n", path_str,
687                        (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error);
688                 ctl_set_medium_error(&io->scsiio);
689                 ctl_complete_beio(beio);
690                 return;
691         }
692
693         /*
694          * If this is a write, we're all done.
695          * If this is a read, we can now send the data to the user.
696          */
697         if (beio->bio_cmd == BIO_WRITE) {
698                 ctl_set_success(&io->scsiio);
699                 SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
700                 ctl_complete_beio(beio);
701         } else {
702                 SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
703 #ifdef CTL_TIME_IO
704                 getbintime(&io->io_hdr.dma_start_bt);
705 #endif  
706                 ctl_datamove(io);
707         }
708 }
709
710 static void
711 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
712                        struct ctl_be_block_io *beio)
713 {
714         struct bio *bio;
715         union ctl_io *io;
716         struct ctl_be_block_devdata *dev_data;
717
718         dev_data = &be_lun->backend.dev;
719         io = beio->io;
720
721         DPRINTF("entered\n");
722
723         /* This can't fail, it's a blocking allocation. */
724         bio = g_alloc_bio();
725
726         bio->bio_cmd        = BIO_FLUSH;
727         bio->bio_flags     |= BIO_ORDERED;
728         bio->bio_dev        = dev_data->cdev;
729         bio->bio_offset     = 0;
730         bio->bio_data       = 0;
731         bio->bio_done       = ctl_be_block_biodone;
732         bio->bio_caller1    = beio;
733         bio->bio_pblkno     = 0;
734
735         /*
736          * We don't need to acquire the LUN lock here, because we are only
737          * sending one bio, and so there is no other context to synchronize
738          * with.
739          */
740         beio->num_bios_sent = 1;
741         beio->send_complete = 1;
742
743         binuptime(&beio->ds_t0);
744         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
745
746         (*dev_data->csw->d_strategy)(bio);
747 }
748
749 static void
750 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
751                        struct ctl_be_block_io *beio,
752                        uint64_t off, uint64_t len, int last)
753 {
754         struct bio *bio;
755         struct ctl_be_block_devdata *dev_data;
756         uint64_t maxlen;
757
758         dev_data = &be_lun->backend.dev;
759         maxlen = LONG_MAX - (LONG_MAX % be_lun->blocksize);
760         while (len > 0) {
761                 bio = g_alloc_bio();
762                 bio->bio_cmd        = BIO_DELETE;
763                 bio->bio_flags     |= beio->bio_flags;
764                 bio->bio_dev        = dev_data->cdev;
765                 bio->bio_offset     = off;
766                 bio->bio_length     = MIN(len, maxlen);
767                 bio->bio_data       = 0;
768                 bio->bio_done       = ctl_be_block_biodone;
769                 bio->bio_caller1    = beio;
770                 bio->bio_pblkno     = off / be_lun->blocksize;
771
772                 off += bio->bio_length;
773                 len -= bio->bio_length;
774
775                 mtx_lock(&be_lun->lock);
776                 beio->num_bios_sent++;
777                 if (last && len == 0)
778                         beio->send_complete = 1;
779                 mtx_unlock(&be_lun->lock);
780
781                 (*dev_data->csw->d_strategy)(bio);
782         }
783 }
784
785 static void
786 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
787                        struct ctl_be_block_io *beio)
788 {
789         union ctl_io *io;
790         struct ctl_be_block_devdata *dev_data;
791         struct ctl_ptr_len_flags ptrlen;
792         struct scsi_unmap_desc *buf, *end;
793         uint64_t len;
794
795         dev_data = &be_lun->backend.dev;
796         io = beio->io;
797
798         DPRINTF("entered\n");
799
800         binuptime(&beio->ds_t0);
801         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
802
803         if (beio->io_offset == -1) {
804                 beio->io_len = 0;
805                 memcpy(&ptrlen, io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].bytes,
806                        sizeof(ptrlen));
807                 buf = (struct scsi_unmap_desc *)ptrlen.ptr;
808                 end = buf + ptrlen.len / sizeof(*buf);
809                 for (; buf < end; buf++) {
810                         len = (uint64_t)scsi_4btoul(buf->length) *
811                             be_lun->blocksize;
812                         beio->io_len += len;
813                         ctl_be_block_unmap_dev_range(be_lun, beio,
814                             scsi_8btou64(buf->lba) * be_lun->blocksize, len,
815                             (end - buf < 2) ? TRUE : FALSE);
816                 }
817         } else
818                 ctl_be_block_unmap_dev_range(be_lun, beio,
819                     beio->io_offset, beio->io_len, TRUE);
820 }
821
822 static void
823 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
824                           struct ctl_be_block_io *beio)
825 {
826         int i;
827         struct bio *bio;
828         struct ctl_be_block_devdata *dev_data;
829         off_t cur_offset;
830         int max_iosize;
831
832         DPRINTF("entered\n");
833
834         dev_data = &be_lun->backend.dev;
835
836         /*
837          * We have to limit our I/O size to the maximum supported by the
838          * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
839          * set it properly, use DFLTPHYS.
840          */
841         max_iosize = dev_data->cdev->si_iosize_max;
842         if (max_iosize < PAGE_SIZE)
843                 max_iosize = DFLTPHYS;
844
845         cur_offset = beio->io_offset;
846
847         /*
848          * XXX KDM need to accurately reflect the number of I/Os outstanding
849          * to a device.
850          */
851         binuptime(&beio->ds_t0);
852         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
853
854         for (i = 0; i < beio->num_segs; i++) {
855                 size_t cur_size;
856                 uint8_t *cur_ptr;
857
858                 cur_size = beio->sg_segs[i].len;
859                 cur_ptr = beio->sg_segs[i].addr;
860
861                 while (cur_size > 0) {
862                         /* This can't fail, it's a blocking allocation. */
863                         bio = g_alloc_bio();
864
865                         KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
866
867                         bio->bio_cmd = beio->bio_cmd;
868                         bio->bio_flags |= beio->bio_flags;
869                         bio->bio_dev = dev_data->cdev;
870                         bio->bio_caller1 = beio;
871                         bio->bio_length = min(cur_size, max_iosize);
872                         bio->bio_offset = cur_offset;
873                         bio->bio_data = cur_ptr;
874                         bio->bio_done = ctl_be_block_biodone;
875                         bio->bio_pblkno = cur_offset / be_lun->blocksize;
876
877                         cur_offset += bio->bio_length;
878                         cur_ptr += bio->bio_length;
879                         cur_size -= bio->bio_length;
880
881                         /*
882                          * Make sure we set the complete bit just before we
883                          * issue the last bio so we don't wind up with a
884                          * race.
885                          *
886                          * Use the LUN mutex here instead of a combination
887                          * of atomic variables for simplicity.
888                          *
889                          * XXX KDM we could have a per-IO lock, but that
890                          * would cause additional per-IO setup and teardown
891                          * overhead.  Hopefully there won't be too much
892                          * contention on the LUN lock.
893                          */
894                         mtx_lock(&be_lun->lock);
895
896                         beio->num_bios_sent++;
897
898                         if ((i == beio->num_segs - 1)
899                          && (cur_size == 0))
900                                 beio->send_complete = 1;
901
902                         mtx_unlock(&be_lun->lock);
903
904                         (*dev_data->csw->d_strategy)(bio);
905                 }
906         }
907 }
908
909 static void
910 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
911 {
912         union ctl_io *io;
913
914         io = beio->io;
915         ctl_free_beio(beio);
916         if (((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)
917           && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
918                 ctl_config_write_done(io);
919                 return;
920         }
921
922         ctl_be_block_config_write(io);
923 }
924
925 static void
926 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
927                             union ctl_io *io)
928 {
929         struct ctl_be_block_io *beio;
930         struct ctl_be_block_softc *softc;
931         struct ctl_lba_len_flags lbalen;
932         uint64_t len_left, lba;
933         int i, seglen;
934         uint8_t *buf, *end;
935
936         DPRINTF("entered\n");
937
938         beio = io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr;
939         softc = be_lun->softc;
940         memcpy(&lbalen, io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].bytes,
941                sizeof(lbalen));
942
943         if (lbalen.flags & ~(SWS_LBDATA | SWS_UNMAP) ||
944             (lbalen.flags & SWS_UNMAP && be_lun->unmap == NULL)) {
945                 ctl_free_beio(beio);
946                 ctl_set_invalid_field(&io->scsiio,
947                                       /*sks_valid*/ 1,
948                                       /*command*/ 1,
949                                       /*field*/ 1,
950                                       /*bit_valid*/ 0,
951                                       /*bit*/ 0);
952                 ctl_config_write_done(io);
953                 return;
954         }
955
956         /*
957          * If the I/O came down with an ordered or head of queue tag, set
958          * the BIO_ORDERED attribute.  For head of queue tags, that's
959          * pretty much the best we can do.
960          */
961         if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
962          || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
963                 beio->bio_flags = BIO_ORDERED;
964
965         switch (io->scsiio.tag_type) {
966         case CTL_TAG_ORDERED:
967                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
968                 break;
969         case CTL_TAG_HEAD_OF_QUEUE:
970                 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
971                 break;
972         case CTL_TAG_UNTAGGED:
973         case CTL_TAG_SIMPLE:
974         case CTL_TAG_ACA:
975         default:
976                 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
977                 break;
978         }
979
980         if (lbalen.flags & SWS_UNMAP) {
981                 beio->io_offset = lbalen.lba * be_lun->blocksize;
982                 beio->io_len = (uint64_t)lbalen.len * be_lun->blocksize;
983                 beio->bio_cmd = BIO_DELETE;
984                 beio->ds_trans_type = DEVSTAT_FREE;
985
986                 be_lun->unmap(be_lun, beio);
987                 return;
988         }
989
990         beio->bio_cmd = BIO_WRITE;
991         beio->ds_trans_type = DEVSTAT_WRITE;
992
993         DPRINTF("WRITE SAME at LBA %jx len %u\n",
994                (uintmax_t)lbalen.lba, lbalen.len);
995
996         len_left = (uint64_t)lbalen.len * be_lun->blocksize;
997         for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
998
999                 /*
1000                  * Setup the S/G entry for this chunk.
1001                  */
1002                 seglen = MIN(CTLBLK_MAX_SEG, len_left);
1003                 seglen -= seglen % be_lun->blocksize;
1004                 beio->sg_segs[i].len = seglen;
1005                 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1006
1007                 DPRINTF("segment %d addr %p len %zd\n", i,
1008                         beio->sg_segs[i].addr, beio->sg_segs[i].len);
1009
1010                 beio->num_segs++;
1011                 len_left -= seglen;
1012
1013                 buf = beio->sg_segs[i].addr;
1014                 end = buf + seglen;
1015                 for (; buf < end; buf += be_lun->blocksize) {
1016                         memcpy(buf, io->scsiio.kern_data_ptr, be_lun->blocksize);
1017                         if (lbalen.flags & SWS_LBDATA)
1018                                 scsi_ulto4b(lbalen.lba + lba, buf);
1019                         lba++;
1020                 }
1021         }
1022
1023         beio->io_offset = lbalen.lba * be_lun->blocksize;
1024         beio->io_len = lba * be_lun->blocksize;
1025
1026         /* We can not do all in one run. Correct and schedule rerun. */
1027         if (len_left > 0) {
1028                 lbalen.lba += lba;
1029                 lbalen.len -= lba;
1030                 memcpy(io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].bytes, &lbalen,
1031                        sizeof(lbalen));
1032                 beio->beio_cont = ctl_be_block_cw_done_ws;
1033         }
1034
1035         be_lun->dispatch(be_lun, beio);
1036 }
1037
1038 static void
1039 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1040                             union ctl_io *io)
1041 {
1042         struct ctl_be_block_io *beio;
1043         struct ctl_be_block_softc *softc;
1044         struct ctl_ptr_len_flags ptrlen;
1045
1046         DPRINTF("entered\n");
1047
1048         beio = io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr;
1049         softc = be_lun->softc;
1050         memcpy(&ptrlen, io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].bytes,
1051                sizeof(ptrlen));
1052
1053         if (ptrlen.flags != 0 || be_lun->unmap == NULL) {
1054                 ctl_free_beio(beio);
1055                 ctl_set_invalid_field(&io->scsiio,
1056                                       /*sks_valid*/ 0,
1057                                       /*command*/ 1,
1058                                       /*field*/ 0,
1059                                       /*bit_valid*/ 0,
1060                                       /*bit*/ 0);
1061                 ctl_config_write_done(io);
1062                 return;
1063         }
1064
1065         /*
1066          * If the I/O came down with an ordered or head of queue tag, set
1067          * the BIO_ORDERED attribute.  For head of queue tags, that's
1068          * pretty much the best we can do.
1069          */
1070         if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
1071          || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
1072                 beio->bio_flags = BIO_ORDERED;
1073
1074         switch (io->scsiio.tag_type) {
1075         case CTL_TAG_ORDERED:
1076                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1077                 break;
1078         case CTL_TAG_HEAD_OF_QUEUE:
1079                 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1080                 break;
1081         case CTL_TAG_UNTAGGED:
1082         case CTL_TAG_SIMPLE:
1083         case CTL_TAG_ACA:
1084         default:
1085                 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1086                 break;
1087         }
1088
1089         beio->io_len = 0;
1090         beio->io_offset = -1;
1091
1092         beio->bio_cmd = BIO_DELETE;
1093         beio->ds_trans_type = DEVSTAT_FREE;
1094
1095         DPRINTF("WRITE SAME at LBA %jx len %u\n",
1096                (uintmax_t)lbalen.lba, lbalen.len);
1097
1098         be_lun->unmap(be_lun, beio);
1099 }
1100
1101 static void
1102 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1103 {
1104         union ctl_io *io;
1105
1106         io = beio->io;
1107         ctl_free_beio(beio);
1108         ctl_config_write_done(io);
1109 }
1110
1111 static void
1112 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1113                          union ctl_io *io)
1114 {
1115         struct ctl_be_block_io *beio;
1116         struct ctl_be_block_softc *softc;
1117
1118         DPRINTF("entered\n");
1119
1120         softc = be_lun->softc;
1121         beio = ctl_alloc_beio(softc);
1122         beio->io = io;
1123         beio->lun = be_lun;
1124         beio->beio_cont = ctl_be_block_cw_done;
1125         io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr = beio;
1126
1127         switch (io->scsiio.cdb[0]) {
1128         case SYNCHRONIZE_CACHE:
1129         case SYNCHRONIZE_CACHE_16:
1130                 beio->bio_cmd = BIO_FLUSH;
1131                 beio->ds_trans_type = DEVSTAT_NO_DATA;
1132                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1133                 beio->io_len = 0;
1134                 be_lun->lun_flush(be_lun, beio);
1135                 break;
1136         case WRITE_SAME_10:
1137         case WRITE_SAME_16:
1138                 ctl_be_block_cw_dispatch_ws(be_lun, io);
1139                 break;
1140         case UNMAP:
1141                 ctl_be_block_cw_dispatch_unmap(be_lun, io);
1142                 break;
1143         default:
1144                 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1145                 break;
1146         }
1147 }
1148
1149 SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t");
1150 SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t");
1151 SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t");
1152 SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t");
1153
1154 static void
1155 ctl_be_block_next(struct ctl_be_block_io *beio)
1156 {
1157         struct ctl_be_block_lun *be_lun;
1158         union ctl_io *io;
1159
1160         io = beio->io;
1161         be_lun = beio->lun;
1162         ctl_free_beio(beio);
1163         if (((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)
1164           && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1165                 ctl_done(io);
1166                 return;
1167         }
1168
1169         io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
1170         io->io_hdr.status &= ~CTL_STATUS_MASK;
1171         io->io_hdr.status |= CTL_STATUS_NONE;
1172
1173         mtx_lock(&be_lun->lock);
1174         /*
1175          * XXX KDM make sure that links is okay to use at this point.
1176          * Otherwise, we either need to add another field to ctl_io_hdr,
1177          * or deal with resource allocation here.
1178          */
1179         STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1180         mtx_unlock(&be_lun->lock);
1181
1182         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1183 }
1184
1185 static void
1186 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1187                            union ctl_io *io)
1188 {
1189         struct ctl_be_block_io *beio;
1190         struct ctl_be_block_softc *softc;
1191         struct ctl_lba_len lbalen;
1192         uint64_t len_left, lbaoff;
1193         int i;
1194
1195         softc = be_lun->softc;
1196
1197         DPRINTF("entered\n");
1198
1199         if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) {
1200                 SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0);
1201         } else {
1202                 SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0);
1203         }
1204
1205         beio = ctl_alloc_beio(softc);
1206         beio->io = io;
1207         beio->lun = be_lun;
1208         io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr = beio;
1209
1210         /*
1211          * If the I/O came down with an ordered or head of queue tag, set
1212          * the BIO_ORDERED attribute.  For head of queue tags, that's
1213          * pretty much the best we can do.
1214          *
1215          * XXX KDM we don't have a great way to easily know about the FUA
1216          * bit right now (it is decoded in ctl_read_write(), but we don't
1217          * pass that knowledge to the backend), and in any case we would
1218          * need to determine how to handle it.  
1219          */
1220         if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
1221          || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
1222                 beio->bio_flags = BIO_ORDERED;
1223
1224         switch (io->scsiio.tag_type) {
1225         case CTL_TAG_ORDERED:
1226                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1227                 break;
1228         case CTL_TAG_HEAD_OF_QUEUE:
1229                 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1230                 break;
1231         case CTL_TAG_UNTAGGED:
1232         case CTL_TAG_SIMPLE:
1233         case CTL_TAG_ACA:
1234         default:
1235                 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1236                 break;
1237         }
1238
1239         /*
1240          * This path handles read and write only.  The config write path
1241          * handles flush operations.
1242          */
1243         if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) {
1244                 beio->bio_cmd = BIO_READ;
1245                 beio->ds_trans_type = DEVSTAT_READ;
1246         } else {
1247                 beio->bio_cmd = BIO_WRITE;
1248                 beio->ds_trans_type = DEVSTAT_WRITE;
1249         }
1250
1251         memcpy(&lbalen, io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].bytes,
1252                sizeof(lbalen));
1253         DPRINTF("%s at LBA %jx len %u @%ju\n",
1254                (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1255                (uintmax_t)lbalen.lba, lbalen.len, lbaoff);
1256         lbaoff = io->scsiio.kern_rel_offset / be_lun->blocksize;
1257         beio->io_offset = (lbalen.lba + lbaoff) * be_lun->blocksize;
1258         beio->io_len = MIN((lbalen.len - lbaoff) * be_lun->blocksize,
1259             CTLBLK_MAX_IO_SIZE);
1260         beio->io_len -= beio->io_len % be_lun->blocksize;
1261
1262         for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1263                 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1264                     i, CTLBLK_MAX_SEGS));
1265
1266                 /*
1267                  * Setup the S/G entry for this chunk.
1268                  */
1269                 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1270                 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1271
1272                 DPRINTF("segment %d addr %p len %zd\n", i,
1273                         beio->sg_segs[i].addr, beio->sg_segs[i].len);
1274
1275                 beio->num_segs++;
1276                 len_left -= beio->sg_segs[i].len;
1277         }
1278         if (io->scsiio.kern_rel_offset + beio->io_len <
1279             io->scsiio.kern_total_len)
1280                 beio->beio_cont = ctl_be_block_next;
1281         io->scsiio.be_move_done = ctl_be_block_move_done;
1282         io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1283         io->scsiio.kern_data_len = beio->io_len;
1284         io->scsiio.kern_data_resid = 0;
1285         io->scsiio.kern_sg_entries = beio->num_segs;
1286         io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST;
1287
1288         /*
1289          * For the read case, we need to read the data into our buffers and
1290          * then we can send it back to the user.  For the write case, we
1291          * need to get the data from the user first.
1292          */
1293         if (beio->bio_cmd == BIO_READ) {
1294                 SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0);
1295                 be_lun->dispatch(be_lun, beio);
1296         } else {
1297                 SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
1298 #ifdef CTL_TIME_IO
1299                 getbintime(&io->io_hdr.dma_start_bt);
1300 #endif  
1301                 ctl_datamove(io);
1302         }
1303 }
1304
1305 static void
1306 ctl_be_block_worker(void *context, int pending)
1307 {
1308         struct ctl_be_block_lun *be_lun;
1309         struct ctl_be_block_softc *softc;
1310         union ctl_io *io;
1311
1312         be_lun = (struct ctl_be_block_lun *)context;
1313         softc = be_lun->softc;
1314
1315         DPRINTF("entered\n");
1316
1317         mtx_lock(&be_lun->lock);
1318         for (;;) {
1319                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1320                 if (io != NULL) {
1321                         struct ctl_be_block_io *beio;
1322
1323                         DPRINTF("datamove queue\n");
1324
1325                         STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1326                                       ctl_io_hdr, links);
1327
1328                         mtx_unlock(&be_lun->lock);
1329
1330                         beio = (struct ctl_be_block_io *)
1331                             io->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptr;
1332
1333                         be_lun->dispatch(be_lun, beio);
1334
1335                         mtx_lock(&be_lun->lock);
1336                         continue;
1337                 }
1338                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1339                 if (io != NULL) {
1340
1341                         DPRINTF("config write queue\n");
1342
1343                         STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1344                                       ctl_io_hdr, links);
1345
1346                         mtx_unlock(&be_lun->lock);
1347
1348                         ctl_be_block_cw_dispatch(be_lun, io);
1349
1350                         mtx_lock(&be_lun->lock);
1351                         continue;
1352                 }
1353                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1354                 if (io != NULL) {
1355                         DPRINTF("input queue\n");
1356
1357                         STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1358                                       ctl_io_hdr, links);
1359                         mtx_unlock(&be_lun->lock);
1360
1361                         /*
1362                          * We must drop the lock, since this routine and
1363                          * its children may sleep.
1364                          */
1365                         ctl_be_block_dispatch(be_lun, io);
1366
1367                         mtx_lock(&be_lun->lock);
1368                         continue;
1369                 }
1370
1371                 /*
1372                  * If we get here, there is no work left in the queues, so
1373                  * just break out and let the task queue go to sleep.
1374                  */
1375                 break;
1376         }
1377         mtx_unlock(&be_lun->lock);
1378 }
1379
1380 /*
1381  * Entry point from CTL to the backend for I/O.  We queue everything to a
1382  * work thread, so this just puts the I/O on a queue and wakes up the
1383  * thread.
1384  */
1385 static int
1386 ctl_be_block_submit(union ctl_io *io)
1387 {
1388         struct ctl_lba_len lbalen;
1389         struct ctl_be_block_lun *be_lun;
1390         struct ctl_be_lun *ctl_be_lun;
1391         int retval;
1392
1393         DPRINTF("entered\n");
1394
1395         retval = CTL_RETVAL_COMPLETE;
1396
1397         ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
1398                 CTL_PRIV_BACKEND_LUN].ptr;
1399         be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
1400
1401         /*
1402          * Make sure we only get SCSI I/O.
1403          */
1404         KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1405                 "%#x) encountered", io->io_hdr.io_type));
1406
1407         memcpy(&lbalen, io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].bytes,
1408                sizeof(lbalen));
1409         io->scsiio.kern_total_len = lbalen.len * be_lun->blocksize;
1410         io->scsiio.kern_rel_offset = 0;
1411
1412         mtx_lock(&be_lun->lock);
1413         /*
1414          * XXX KDM make sure that links is okay to use at this point.
1415          * Otherwise, we either need to add another field to ctl_io_hdr,
1416          * or deal with resource allocation here.
1417          */
1418         STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1419         mtx_unlock(&be_lun->lock);
1420
1421         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1422
1423         return (retval);
1424 }
1425
1426 static int
1427 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1428                         int flag, struct thread *td)
1429 {
1430         struct ctl_be_block_softc *softc;
1431         int error;
1432
1433         softc = &backend_block_softc;
1434
1435         error = 0;
1436
1437         switch (cmd) {
1438         case CTL_LUN_REQ: {
1439                 struct ctl_lun_req *lun_req;
1440
1441                 lun_req = (struct ctl_lun_req *)addr;
1442
1443                 switch (lun_req->reqtype) {
1444                 case CTL_LUNREQ_CREATE:
1445                         error = ctl_be_block_create(softc, lun_req);
1446                         break;
1447                 case CTL_LUNREQ_RM:
1448                         error = ctl_be_block_rm(softc, lun_req);
1449                         break;
1450                 case CTL_LUNREQ_MODIFY:
1451                         error = ctl_be_block_modify(softc, lun_req);
1452                         break;
1453                 default:
1454                         lun_req->status = CTL_LUN_ERROR;
1455                         snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1456                                  "%s: invalid LUN request type %d", __func__,
1457                                  lun_req->reqtype);
1458                         break;
1459                 }
1460                 break;
1461         }
1462         default:
1463                 error = ENOTTY;
1464                 break;
1465         }
1466
1467         return (error);
1468 }
1469
1470 static int
1471 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1472 {
1473         struct ctl_be_block_filedata *file_data;
1474         struct ctl_lun_create_params *params;
1475         struct vattr                  vattr;
1476         int                           error;
1477
1478         error = 0;
1479         file_data = &be_lun->backend.file;
1480         params = &req->reqdata.create;
1481
1482         be_lun->dev_type = CTL_BE_BLOCK_FILE;
1483         be_lun->dispatch = ctl_be_block_dispatch_file;
1484         be_lun->lun_flush = ctl_be_block_flush_file;
1485
1486         error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1487         if (error != 0) {
1488                 snprintf(req->error_str, sizeof(req->error_str),
1489                          "error calling VOP_GETATTR() for file %s",
1490                          be_lun->dev_path);
1491                 return (error);
1492         }
1493
1494         /*
1495          * Verify that we have the ability to upgrade to exclusive
1496          * access on this file so we can trap errors at open instead
1497          * of reporting them during first access.
1498          */
1499         if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) {
1500                 vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY);
1501                 if (be_lun->vn->v_iflag & VI_DOOMED) {
1502                         error = EBADF;
1503                         snprintf(req->error_str, sizeof(req->error_str),
1504                                  "error locking file %s", be_lun->dev_path);
1505                         return (error);
1506                 }
1507         }
1508
1509
1510         file_data->cred = crhold(curthread->td_ucred);
1511         if (params->lun_size_bytes != 0)
1512                 be_lun->size_bytes = params->lun_size_bytes;
1513         else
1514                 be_lun->size_bytes = vattr.va_size;
1515         /*
1516          * We set the multi thread flag for file operations because all
1517          * filesystems (in theory) are capable of allowing multiple readers
1518          * of a file at once.  So we want to get the maximum possible
1519          * concurrency.
1520          */
1521         be_lun->flags |= CTL_BE_BLOCK_LUN_MULTI_THREAD;
1522
1523         /*
1524          * XXX KDM vattr.va_blocksize may be larger than 512 bytes here.
1525          * With ZFS, it is 131072 bytes.  Block sizes that large don't work
1526          * with disklabel and UFS on FreeBSD at least.  Large block sizes
1527          * may not work with other OSes as well.  So just export a sector
1528          * size of 512 bytes, which should work with any OS or
1529          * application.  Since our backing is a file, any block size will
1530          * work fine for the backing store.
1531          */
1532 #if 0
1533         be_lun->blocksize= vattr.va_blocksize;
1534 #endif
1535         if (params->blocksize_bytes != 0)
1536                 be_lun->blocksize = params->blocksize_bytes;
1537         else
1538                 be_lun->blocksize = 512;
1539
1540         /*
1541          * Sanity check.  The media size has to be at least one
1542          * sector long.
1543          */
1544         if (be_lun->size_bytes < be_lun->blocksize) {
1545                 error = EINVAL;
1546                 snprintf(req->error_str, sizeof(req->error_str),
1547                          "file %s size %ju < block size %u", be_lun->dev_path,
1548                          (uintmax_t)be_lun->size_bytes, be_lun->blocksize);
1549         }
1550         return (error);
1551 }
1552
1553 static int
1554 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1555 {
1556         struct ctl_lun_create_params *params;
1557         struct vattr                  vattr;
1558         struct cdev                  *dev;
1559         struct cdevsw                *devsw;
1560         int                           error;
1561         off_t                         ps, pss, po, pos;
1562
1563         params = &req->reqdata.create;
1564
1565         be_lun->dev_type = CTL_BE_BLOCK_DEV;
1566         be_lun->dispatch = ctl_be_block_dispatch_dev;
1567         be_lun->lun_flush = ctl_be_block_flush_dev;
1568         be_lun->unmap = ctl_be_block_unmap_dev;
1569         be_lun->backend.dev.cdev = be_lun->vn->v_rdev;
1570         be_lun->backend.dev.csw = dev_refthread(be_lun->backend.dev.cdev,
1571                                              &be_lun->backend.dev.dev_ref);
1572         if (be_lun->backend.dev.csw == NULL)
1573                 panic("Unable to retrieve device switch");
1574
1575         error = VOP_GETATTR(be_lun->vn, &vattr, NOCRED);
1576         if (error) {
1577                 snprintf(req->error_str, sizeof(req->error_str),
1578                          "%s: error getting vnode attributes for device %s",
1579                          __func__, be_lun->dev_path);
1580                 return (error);
1581         }
1582
1583         dev = be_lun->vn->v_rdev;
1584         devsw = dev->si_devsw;
1585         if (!devsw->d_ioctl) {
1586                 snprintf(req->error_str, sizeof(req->error_str),
1587                          "%s: no d_ioctl for device %s!", __func__,
1588                          be_lun->dev_path);
1589                 return (ENODEV);
1590         }
1591
1592         error = devsw->d_ioctl(dev, DIOCGSECTORSIZE,
1593                                (caddr_t)&be_lun->blocksize, FREAD,
1594                                curthread);
1595         if (error) {
1596                 snprintf(req->error_str, sizeof(req->error_str),
1597                          "%s: error %d returned for DIOCGSECTORSIZE ioctl "
1598                          "on %s!", __func__, error, be_lun->dev_path);
1599                 return (error);
1600         }
1601
1602         /*
1603          * If the user has asked for a blocksize that is greater than the
1604          * backing device's blocksize, we can do it only if the blocksize
1605          * the user is asking for is an even multiple of the underlying 
1606          * device's blocksize.
1607          */
1608         if ((params->blocksize_bytes != 0)
1609          && (params->blocksize_bytes > be_lun->blocksize)) {
1610                 uint32_t bs_multiple, tmp_blocksize;
1611
1612                 bs_multiple = params->blocksize_bytes / be_lun->blocksize;
1613
1614                 tmp_blocksize = bs_multiple * be_lun->blocksize;
1615
1616                 if (tmp_blocksize == params->blocksize_bytes) {
1617                         be_lun->blocksize = params->blocksize_bytes;
1618                 } else {
1619                         snprintf(req->error_str, sizeof(req->error_str),
1620                                  "%s: requested blocksize %u is not an even "
1621                                  "multiple of backing device blocksize %u",
1622                                  __func__, params->blocksize_bytes,
1623                                  be_lun->blocksize);
1624                         return (EINVAL);
1625                         
1626                 }
1627         } else if ((params->blocksize_bytes != 0)
1628                 && (params->blocksize_bytes != be_lun->blocksize)) {
1629                 snprintf(req->error_str, sizeof(req->error_str),
1630                          "%s: requested blocksize %u < backing device "
1631                          "blocksize %u", __func__, params->blocksize_bytes,
1632                          be_lun->blocksize);
1633                 return (EINVAL);
1634         }
1635
1636         error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
1637                                (caddr_t)&be_lun->size_bytes, FREAD,
1638                                curthread);
1639         if (error) {
1640                 snprintf(req->error_str, sizeof(req->error_str),
1641                          "%s: error %d returned for DIOCGMEDIASIZE "
1642                          " ioctl on %s!", __func__, error,
1643                          be_lun->dev_path);
1644                 return (error);
1645         }
1646
1647         if (params->lun_size_bytes != 0) {
1648                 if (params->lun_size_bytes > be_lun->size_bytes) {
1649                         snprintf(req->error_str, sizeof(req->error_str),
1650                                  "%s: requested LUN size %ju > backing device "
1651                                  "size %ju", __func__,
1652                                  (uintmax_t)params->lun_size_bytes,
1653                                  (uintmax_t)be_lun->size_bytes);
1654                         return (EINVAL);
1655                 }
1656
1657                 be_lun->size_bytes = params->lun_size_bytes;
1658         }
1659
1660         error = devsw->d_ioctl(dev, DIOCGSTRIPESIZE,
1661                                (caddr_t)&ps, FREAD, curthread);
1662         if (error)
1663                 ps = po = 0;
1664         else {
1665                 error = devsw->d_ioctl(dev, DIOCGSTRIPEOFFSET,
1666                                        (caddr_t)&po, FREAD, curthread);
1667                 if (error)
1668                         po = 0;
1669         }
1670         pss = ps / be_lun->blocksize;
1671         pos = po / be_lun->blocksize;
1672         if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) &&
1673             ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) {
1674                 be_lun->pblockexp = fls(pss) - 1;
1675                 be_lun->pblockoff = (pss - pos) % pss;
1676         }
1677
1678         return (0);
1679 }
1680
1681 static int
1682 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
1683 {
1684         DROP_GIANT();
1685         if (be_lun->vn) {
1686                 int flags = FREAD | FWRITE;
1687
1688                 switch (be_lun->dev_type) {
1689                 case CTL_BE_BLOCK_DEV:
1690                         if (be_lun->backend.dev.csw) {
1691                                 dev_relthread(be_lun->backend.dev.cdev,
1692                                               be_lun->backend.dev.dev_ref);
1693                                 be_lun->backend.dev.csw  = NULL;
1694                                 be_lun->backend.dev.cdev = NULL;
1695                         }
1696                         break;
1697                 case CTL_BE_BLOCK_FILE:
1698                         break;
1699                 case CTL_BE_BLOCK_NONE:
1700                         break;
1701                 default:
1702                         panic("Unexpected backend type.");
1703                         break;
1704                 }
1705
1706                 (void)vn_close(be_lun->vn, flags, NOCRED, curthread);
1707                 be_lun->vn = NULL;
1708
1709                 switch (be_lun->dev_type) {
1710                 case CTL_BE_BLOCK_DEV:
1711                         break;
1712                 case CTL_BE_BLOCK_FILE:
1713                         if (be_lun->backend.file.cred != NULL) {
1714                                 crfree(be_lun->backend.file.cred);
1715                                 be_lun->backend.file.cred = NULL;
1716                         }
1717                         break;
1718                 case CTL_BE_BLOCK_NONE:
1719                         break;
1720                 default:
1721                         panic("Unexpected backend type.");
1722                         break;
1723                 }
1724         }
1725         PICKUP_GIANT();
1726
1727         return (0);
1728 }
1729
1730 static int
1731 ctl_be_block_open(struct ctl_be_block_softc *softc,
1732                        struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1733 {
1734         struct nameidata nd;
1735         int              flags;
1736         int              error;
1737
1738         /*
1739          * XXX KDM allow a read-only option?
1740          */
1741         flags = FREAD | FWRITE;
1742         error = 0;
1743
1744         if (rootvnode == NULL) {
1745                 snprintf(req->error_str, sizeof(req->error_str),
1746                          "%s: Root filesystem is not mounted", __func__);
1747                 return (1);
1748         }
1749
1750         if (!curthread->td_proc->p_fd->fd_cdir) {
1751                 curthread->td_proc->p_fd->fd_cdir = rootvnode;
1752                 VREF(rootvnode);
1753         }
1754         if (!curthread->td_proc->p_fd->fd_rdir) {
1755                 curthread->td_proc->p_fd->fd_rdir = rootvnode;
1756                 VREF(rootvnode);
1757         }
1758         if (!curthread->td_proc->p_fd->fd_jdir) {
1759                 curthread->td_proc->p_fd->fd_jdir = rootvnode;
1760                 VREF(rootvnode);
1761         }
1762
1763  again:
1764         NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
1765         error = vn_open(&nd, &flags, 0, NULL);
1766         if (error) {
1767                 /*
1768                  * This is the only reasonable guess we can make as far as
1769                  * path if the user doesn't give us a fully qualified path.
1770                  * If they want to specify a file, they need to specify the
1771                  * full path.
1772                  */
1773                 if (be_lun->dev_path[0] != '/') {
1774                         char *dev_path = "/dev/";
1775                         char *dev_name;
1776
1777                         /* Try adding device path at beginning of name */
1778                         dev_name = malloc(strlen(be_lun->dev_path)
1779                                         + strlen(dev_path) + 1,
1780                                           M_CTLBLK, M_WAITOK);
1781                         if (dev_name) {
1782                                 sprintf(dev_name, "%s%s", dev_path,
1783                                         be_lun->dev_path);
1784                                 free(be_lun->dev_path, M_CTLBLK);
1785                                 be_lun->dev_path = dev_name;
1786                                 goto again;
1787                         }
1788                 }
1789                 snprintf(req->error_str, sizeof(req->error_str),
1790                          "%s: error opening %s", __func__, be_lun->dev_path);
1791                 return (error);
1792         }
1793
1794         NDFREE(&nd, NDF_ONLY_PNBUF);
1795                 
1796         be_lun->vn = nd.ni_vp;
1797
1798         /* We only support disks and files. */
1799         if (vn_isdisk(be_lun->vn, &error)) {
1800                 error = ctl_be_block_open_dev(be_lun, req);
1801         } else if (be_lun->vn->v_type == VREG) {
1802                 error = ctl_be_block_open_file(be_lun, req);
1803         } else {
1804                 error = EINVAL;
1805                 snprintf(req->error_str, sizeof(req->error_str),
1806                          "%s is not a disk or plain file", be_lun->dev_path);
1807         }
1808         VOP_UNLOCK(be_lun->vn, 0);
1809
1810         if (error != 0) {
1811                 ctl_be_block_close(be_lun);
1812                 return (error);
1813         }
1814
1815         be_lun->blocksize_shift = fls(be_lun->blocksize) - 1;
1816         be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift;
1817
1818         return (0);
1819 }
1820
1821 static int
1822 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
1823 {
1824         struct ctl_be_block_lun *be_lun;
1825         struct ctl_lun_create_params *params;
1826         struct ctl_be_arg *file_arg;
1827         char num_thread_str[16];
1828         char tmpstr[32];
1829         char *value;
1830         int retval, num_threads, unmap;
1831         int i;
1832         int tmp_num_threads;
1833
1834         params = &req->reqdata.create;
1835         retval = 0;
1836
1837         num_threads = cbb_num_threads;
1838
1839         file_arg = NULL;
1840
1841         be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
1842
1843         be_lun->softc = softc;
1844         STAILQ_INIT(&be_lun->input_queue);
1845         STAILQ_INIT(&be_lun->config_write_queue);
1846         STAILQ_INIT(&be_lun->datamove_queue);
1847         sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
1848         mtx_init(&be_lun->lock, be_lun->lunname, NULL, MTX_DEF);
1849         ctl_init_opts(&be_lun->ctl_be_lun, req);
1850
1851         be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
1852             NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
1853
1854         if (be_lun->lun_zone == NULL) {
1855                 snprintf(req->error_str, sizeof(req->error_str),
1856                          "%s: error allocating UMA zone", __func__);
1857                 goto bailout_error;
1858         }
1859
1860         if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
1861                 be_lun->ctl_be_lun.lun_type = params->device_type;
1862         else
1863                 be_lun->ctl_be_lun.lun_type = T_DIRECT;
1864
1865         if (be_lun->ctl_be_lun.lun_type == T_DIRECT) {
1866                 for (i = 0; i < req->num_be_args; i++) {
1867                         if (strcmp(req->kern_be_args[i].kname, "file") == 0) {
1868                                 file_arg = &req->kern_be_args[i];
1869                                 break;
1870                         }
1871                 }
1872
1873                 if (file_arg == NULL) {
1874                         snprintf(req->error_str, sizeof(req->error_str),
1875                                  "%s: no file argument specified", __func__);
1876                         goto bailout_error;
1877                 }
1878
1879                 be_lun->dev_path = malloc(file_arg->vallen, M_CTLBLK,
1880                                           M_WAITOK | M_ZERO);
1881
1882                 strlcpy(be_lun->dev_path, (char *)file_arg->kvalue,
1883                         file_arg->vallen);
1884
1885                 retval = ctl_be_block_open(softc, be_lun, req);
1886                 if (retval != 0) {
1887                         retval = 0;
1888                         goto bailout_error;
1889                 }
1890
1891                 /*
1892                  * Tell the user the size of the file/device.
1893                  */
1894                 params->lun_size_bytes = be_lun->size_bytes;
1895
1896                 /*
1897                  * The maximum LBA is the size - 1.
1898                  */
1899                 be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1;
1900         } else {
1901                 /*
1902                  * For processor devices, we don't have any size.
1903                  */
1904                 be_lun->blocksize = 0;
1905                 be_lun->pblockexp = 0;
1906                 be_lun->pblockoff = 0;
1907                 be_lun->size_blocks = 0;
1908                 be_lun->size_bytes = 0;
1909                 be_lun->ctl_be_lun.maxlba = 0;
1910                 params->lun_size_bytes = 0;
1911
1912                 /*
1913                  * Default to just 1 thread for processor devices.
1914                  */
1915                 num_threads = 1;
1916         }
1917
1918         /*
1919          * XXX This searching loop might be refactored to be combined with
1920          * the loop above,
1921          */
1922         value = ctl_get_opt(&be_lun->ctl_be_lun, "num_threads");
1923         if (value != NULL) {
1924                 tmp_num_threads = strtol(value, NULL, 0);
1925
1926                 /*
1927                  * We don't let the user specify less than one
1928                  * thread, but hope he's clueful enough not to
1929                  * specify 1000 threads.
1930                  */
1931                 if (tmp_num_threads < 1) {
1932                         snprintf(req->error_str, sizeof(req->error_str),
1933                                  "%s: invalid number of threads %s",
1934                                  __func__, num_thread_str);
1935                         goto bailout_error;
1936                 }
1937                 num_threads = tmp_num_threads;
1938         }
1939         unmap = 0;
1940         value = ctl_get_opt(&be_lun->ctl_be_lun, "unmap");
1941         if (value != NULL && strcmp(value, "on") == 0)
1942                 unmap = 1;
1943
1944         be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
1945         be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY;
1946         if (unmap)
1947                 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP;
1948         be_lun->ctl_be_lun.be_lun = be_lun;
1949         be_lun->ctl_be_lun.blocksize = be_lun->blocksize;
1950         be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp;
1951         be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff;
1952         /* Tell the user the blocksize we ended up using */
1953         params->blocksize_bytes = be_lun->blocksize;
1954         if (params->flags & CTL_LUN_FLAG_ID_REQ) {
1955                 be_lun->ctl_be_lun.req_lun_id = params->req_lun_id;
1956                 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ;
1957         } else
1958                 be_lun->ctl_be_lun.req_lun_id = 0;
1959
1960         be_lun->ctl_be_lun.lun_shutdown = ctl_be_block_lun_shutdown;
1961         be_lun->ctl_be_lun.lun_config_status =
1962                 ctl_be_block_lun_config_status;
1963         be_lun->ctl_be_lun.be = &ctl_be_block_driver;
1964
1965         if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
1966                 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
1967                          softc->num_luns);
1968                 strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr,
1969                         ctl_min(sizeof(be_lun->ctl_be_lun.serial_num),
1970                         sizeof(tmpstr)));
1971
1972                 /* Tell the user what we used for a serial number */
1973                 strncpy((char *)params->serial_num, tmpstr,
1974                         ctl_min(sizeof(params->serial_num), sizeof(tmpstr)));
1975         } else { 
1976                 strncpy((char *)be_lun->ctl_be_lun.serial_num,
1977                         params->serial_num,
1978                         ctl_min(sizeof(be_lun->ctl_be_lun.serial_num),
1979                         sizeof(params->serial_num)));
1980         }
1981         if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
1982                 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
1983                 strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr,
1984                         ctl_min(sizeof(be_lun->ctl_be_lun.device_id),
1985                         sizeof(tmpstr)));
1986
1987                 /* Tell the user what we used for a device ID */
1988                 strncpy((char *)params->device_id, tmpstr,
1989                         ctl_min(sizeof(params->device_id), sizeof(tmpstr)));
1990         } else {
1991                 strncpy((char *)be_lun->ctl_be_lun.device_id,
1992                         params->device_id,
1993                         ctl_min(sizeof(be_lun->ctl_be_lun.device_id),
1994                                 sizeof(params->device_id)));
1995         }
1996
1997         TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
1998
1999         be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2000             taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2001
2002         if (be_lun->io_taskqueue == NULL) {
2003                 snprintf(req->error_str, sizeof(req->error_str),
2004                          "%s: Unable to create taskqueue", __func__);
2005                 goto bailout_error;
2006         }
2007
2008         /*
2009          * Note that we start the same number of threads by default for
2010          * both the file case and the block device case.  For the file
2011          * case, we need multiple threads to allow concurrency, because the
2012          * vnode interface is designed to be a blocking interface.  For the
2013          * block device case, ZFS zvols at least will block the caller's
2014          * context in many instances, and so we need multiple threads to
2015          * overcome that problem.  Other block devices don't need as many
2016          * threads, but they shouldn't cause too many problems.
2017          *
2018          * If the user wants to just have a single thread for a block
2019          * device, he can specify that when the LUN is created, or change
2020          * the tunable/sysctl to alter the default number of threads.
2021          */
2022         retval = taskqueue_start_threads(&be_lun->io_taskqueue,
2023                                          /*num threads*/num_threads,
2024                                          /*priority*/PWAIT,
2025                                          /*thread name*/
2026                                          "%s taskq", be_lun->lunname);
2027
2028         if (retval != 0)
2029                 goto bailout_error;
2030
2031         be_lun->num_threads = num_threads;
2032
2033         mtx_lock(&softc->lock);
2034         softc->num_luns++;
2035         STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2036
2037         mtx_unlock(&softc->lock);
2038
2039         retval = ctl_add_lun(&be_lun->ctl_be_lun);
2040         if (retval != 0) {
2041                 mtx_lock(&softc->lock);
2042                 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2043                               links);
2044                 softc->num_luns--;
2045                 mtx_unlock(&softc->lock);
2046                 snprintf(req->error_str, sizeof(req->error_str),
2047                          "%s: ctl_add_lun() returned error %d, see dmesg for "
2048                         "details", __func__, retval);
2049                 retval = 0;
2050                 goto bailout_error;
2051         }
2052
2053         mtx_lock(&softc->lock);
2054
2055         /*
2056          * Tell the config_status routine that we're waiting so it won't
2057          * clean up the LUN in the event of an error.
2058          */
2059         be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2060
2061         while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2062                 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2063                 if (retval == EINTR)
2064                         break;
2065         }
2066         be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2067
2068         if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2069                 snprintf(req->error_str, sizeof(req->error_str),
2070                          "%s: LUN configuration error, see dmesg for details",
2071                          __func__);
2072                 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2073                               links);
2074                 softc->num_luns--;
2075                 mtx_unlock(&softc->lock);
2076                 goto bailout_error;
2077         } else {
2078                 params->req_lun_id = be_lun->ctl_be_lun.lun_id;
2079         }
2080
2081         mtx_unlock(&softc->lock);
2082
2083         be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2084                                                be_lun->blocksize,
2085                                                DEVSTAT_ALL_SUPPORTED,
2086                                                be_lun->ctl_be_lun.lun_type
2087                                                | DEVSTAT_TYPE_IF_OTHER,
2088                                                DEVSTAT_PRIORITY_OTHER);
2089
2090
2091         req->status = CTL_LUN_OK;
2092
2093         return (retval);
2094
2095 bailout_error:
2096         req->status = CTL_LUN_ERROR;
2097
2098         if (be_lun->io_taskqueue != NULL)
2099                 taskqueue_free(be_lun->io_taskqueue);
2100         ctl_be_block_close(be_lun);
2101         if (be_lun->dev_path != NULL)
2102                 free(be_lun->dev_path, M_CTLBLK);
2103         if (be_lun->lun_zone != NULL)
2104                 uma_zdestroy(be_lun->lun_zone);
2105         ctl_free_opts(&be_lun->ctl_be_lun);
2106         mtx_destroy(&be_lun->lock);
2107         free(be_lun, M_CTLBLK);
2108
2109         return (retval);
2110 }
2111
2112 static int
2113 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2114 {
2115         struct ctl_lun_rm_params *params;
2116         struct ctl_be_block_lun *be_lun;
2117         int retval;
2118
2119         params = &req->reqdata.rm;
2120
2121         mtx_lock(&softc->lock);
2122
2123         be_lun = NULL;
2124
2125         STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2126                 if (be_lun->ctl_be_lun.lun_id == params->lun_id)
2127                         break;
2128         }
2129         mtx_unlock(&softc->lock);
2130
2131         if (be_lun == NULL) {
2132                 snprintf(req->error_str, sizeof(req->error_str),
2133                          "%s: LUN %u is not managed by the block backend",
2134                          __func__, params->lun_id);
2135                 goto bailout_error;
2136         }
2137
2138         retval = ctl_disable_lun(&be_lun->ctl_be_lun);
2139
2140         if (retval != 0) {
2141                 snprintf(req->error_str, sizeof(req->error_str),
2142                          "%s: error %d returned from ctl_disable_lun() for "
2143                          "LUN %d", __func__, retval, params->lun_id);
2144                 goto bailout_error;
2145
2146         }
2147
2148         retval = ctl_invalidate_lun(&be_lun->ctl_be_lun);
2149         if (retval != 0) {
2150                 snprintf(req->error_str, sizeof(req->error_str),
2151                          "%s: error %d returned from ctl_invalidate_lun() for "
2152                          "LUN %d", __func__, retval, params->lun_id);
2153                 goto bailout_error;
2154         }
2155
2156         mtx_lock(&softc->lock);
2157
2158         be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2159
2160         while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2161                 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2162                 if (retval == EINTR)
2163                         break;
2164         }
2165
2166         be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2167
2168         if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2169                 snprintf(req->error_str, sizeof(req->error_str),
2170                          "%s: interrupted waiting for LUN to be freed", 
2171                          __func__);
2172                 mtx_unlock(&softc->lock);
2173                 goto bailout_error;
2174         }
2175
2176         STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2177
2178         softc->num_luns--;
2179         mtx_unlock(&softc->lock);
2180
2181         taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task);
2182
2183         taskqueue_free(be_lun->io_taskqueue);
2184
2185         ctl_be_block_close(be_lun);
2186
2187         if (be_lun->disk_stats != NULL)
2188                 devstat_remove_entry(be_lun->disk_stats);
2189
2190         uma_zdestroy(be_lun->lun_zone);
2191
2192         ctl_free_opts(&be_lun->ctl_be_lun);
2193         free(be_lun->dev_path, M_CTLBLK);
2194
2195         free(be_lun, M_CTLBLK);
2196
2197         req->status = CTL_LUN_OK;
2198
2199         return (0);
2200
2201 bailout_error:
2202
2203         req->status = CTL_LUN_ERROR;
2204
2205         return (0);
2206 }
2207
2208 static int
2209 ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
2210                          struct ctl_lun_req *req)
2211 {
2212         struct vattr vattr;
2213         int error;
2214         struct ctl_lun_modify_params *params;
2215
2216         params = &req->reqdata.modify;
2217
2218         if (params->lun_size_bytes != 0) {
2219                 be_lun->size_bytes = params->lun_size_bytes;
2220         } else  {
2221                 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
2222                 if (error != 0) {
2223                         snprintf(req->error_str, sizeof(req->error_str),
2224                                  "error calling VOP_GETATTR() for file %s",
2225                                  be_lun->dev_path);
2226                         return (error);
2227                 }
2228
2229                 be_lun->size_bytes = vattr.va_size;
2230         }
2231
2232         return (0);
2233 }
2234
2235 static int
2236 ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
2237                         struct ctl_lun_req *req)
2238 {
2239         struct cdev *dev;
2240         struct cdevsw *devsw;
2241         int error;
2242         struct ctl_lun_modify_params *params;
2243         uint64_t size_bytes;
2244
2245         params = &req->reqdata.modify;
2246
2247         dev = be_lun->vn->v_rdev;
2248         devsw = dev->si_devsw;
2249         if (!devsw->d_ioctl) {
2250                 snprintf(req->error_str, sizeof(req->error_str),
2251                          "%s: no d_ioctl for device %s!", __func__,
2252                          be_lun->dev_path);
2253                 return (ENODEV);
2254         }
2255
2256         error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
2257                                (caddr_t)&size_bytes, FREAD,
2258                                curthread);
2259         if (error) {
2260                 snprintf(req->error_str, sizeof(req->error_str),
2261                          "%s: error %d returned for DIOCGMEDIASIZE ioctl "
2262                          "on %s!", __func__, error, be_lun->dev_path);
2263                 return (error);
2264         }
2265
2266         if (params->lun_size_bytes != 0) {
2267                 if (params->lun_size_bytes > size_bytes) {
2268                         snprintf(req->error_str, sizeof(req->error_str),
2269                                  "%s: requested LUN size %ju > backing device "
2270                                  "size %ju", __func__,
2271                                  (uintmax_t)params->lun_size_bytes,
2272                                  (uintmax_t)size_bytes);
2273                         return (EINVAL);
2274                 }
2275
2276                 be_lun->size_bytes = params->lun_size_bytes;
2277         } else {
2278                 be_lun->size_bytes = size_bytes;
2279         }
2280
2281         return (0);
2282 }
2283
2284 static int
2285 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2286 {
2287         struct ctl_lun_modify_params *params;
2288         struct ctl_be_block_lun *be_lun;
2289         int error;
2290
2291         params = &req->reqdata.modify;
2292
2293         mtx_lock(&softc->lock);
2294
2295         be_lun = NULL;
2296
2297         STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2298                 if (be_lun->ctl_be_lun.lun_id == params->lun_id)
2299                         break;
2300         }
2301         mtx_unlock(&softc->lock);
2302
2303         if (be_lun == NULL) {
2304                 snprintf(req->error_str, sizeof(req->error_str),
2305                          "%s: LUN %u is not managed by the block backend",
2306                          __func__, params->lun_id);
2307                 goto bailout_error;
2308         }
2309
2310         if (params->lun_size_bytes != 0) {
2311                 if (params->lun_size_bytes < be_lun->blocksize) {
2312                         snprintf(req->error_str, sizeof(req->error_str),
2313                                 "%s: LUN size %ju < blocksize %u", __func__,
2314                                 params->lun_size_bytes, be_lun->blocksize);
2315                         goto bailout_error;
2316                 }
2317         }
2318
2319         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2320
2321         if (be_lun->vn->v_type == VREG)
2322                 error = ctl_be_block_modify_file(be_lun, req);
2323         else
2324                 error = ctl_be_block_modify_dev(be_lun, req);
2325
2326         VOP_UNLOCK(be_lun->vn, 0);
2327
2328         if (error != 0)
2329                 goto bailout_error;
2330
2331         be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift;
2332
2333         /*
2334          * The maximum LBA is the size - 1.
2335          *
2336          * XXX: Note that this field is being updated without locking,
2337          *      which might cause problems on 32-bit architectures.
2338          */
2339         be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1;
2340         ctl_lun_capacity_changed(&be_lun->ctl_be_lun);
2341
2342         /* Tell the user the exact size we ended up using */
2343         params->lun_size_bytes = be_lun->size_bytes;
2344
2345         req->status = CTL_LUN_OK;
2346
2347         return (0);
2348
2349 bailout_error:
2350         req->status = CTL_LUN_ERROR;
2351
2352         return (0);
2353 }
2354
2355 static void
2356 ctl_be_block_lun_shutdown(void *be_lun)
2357 {
2358         struct ctl_be_block_lun *lun;
2359         struct ctl_be_block_softc *softc;
2360
2361         lun = (struct ctl_be_block_lun *)be_lun;
2362
2363         softc = lun->softc;
2364
2365         mtx_lock(&softc->lock);
2366         lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2367         if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2368                 wakeup(lun);
2369         mtx_unlock(&softc->lock);
2370
2371 }
2372
2373 static void
2374 ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2375 {
2376         struct ctl_be_block_lun *lun;
2377         struct ctl_be_block_softc *softc;
2378
2379         lun = (struct ctl_be_block_lun *)be_lun;
2380         softc = lun->softc;
2381
2382         if (status == CTL_LUN_CONFIG_OK) {
2383                 mtx_lock(&softc->lock);
2384                 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2385                 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2386                         wakeup(lun);
2387                 mtx_unlock(&softc->lock);
2388
2389                 /*
2390                  * We successfully added the LUN, attempt to enable it.
2391                  */
2392                 if (ctl_enable_lun(&lun->ctl_be_lun) != 0) {
2393                         printf("%s: ctl_enable_lun() failed!\n", __func__);
2394                         if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) {
2395                                 printf("%s: ctl_invalidate_lun() failed!\n",
2396                                        __func__);
2397                         }
2398                 }
2399
2400                 return;
2401         }
2402
2403
2404         mtx_lock(&softc->lock);
2405         lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2406         lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2407         wakeup(lun);
2408         mtx_unlock(&softc->lock);
2409 }
2410
2411
2412 static int
2413 ctl_be_block_config_write(union ctl_io *io)
2414 {
2415         struct ctl_be_block_lun *be_lun;
2416         struct ctl_be_lun *ctl_be_lun;
2417         int retval;
2418
2419         retval = 0;
2420
2421         DPRINTF("entered\n");
2422
2423         ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2424                 CTL_PRIV_BACKEND_LUN].ptr;
2425         be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
2426
2427         switch (io->scsiio.cdb[0]) {
2428         case SYNCHRONIZE_CACHE:
2429         case SYNCHRONIZE_CACHE_16:
2430         case WRITE_SAME_10:
2431         case WRITE_SAME_16:
2432         case UNMAP:
2433                 /*
2434                  * The upper level CTL code will filter out any CDBs with
2435                  * the immediate bit set and return the proper error.
2436                  *
2437                  * We don't really need to worry about what LBA range the
2438                  * user asked to be synced out.  When they issue a sync
2439                  * cache command, we'll sync out the whole thing.
2440                  */
2441                 mtx_lock(&be_lun->lock);
2442                 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2443                                    links);
2444                 mtx_unlock(&be_lun->lock);
2445                 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2446                 break;
2447         case START_STOP_UNIT: {
2448                 struct scsi_start_stop_unit *cdb;
2449
2450                 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2451
2452                 if (cdb->how & SSS_START)
2453                         retval = ctl_start_lun(ctl_be_lun);
2454                 else {
2455                         retval = ctl_stop_lun(ctl_be_lun);
2456                         /*
2457                          * XXX KDM Copan-specific offline behavior.
2458                          * Figure out a reasonable way to port this?
2459                          */
2460 #ifdef NEEDTOPORT
2461                         if ((retval == 0)
2462                          && (cdb->byte2 & SSS_ONOFFLINE))
2463                                 retval = ctl_lun_offline(ctl_be_lun);
2464 #endif
2465                 }
2466
2467                 /*
2468                  * In general, the above routines should not fail.  They
2469                  * just set state for the LUN.  So we've got something
2470                  * pretty wrong here if we can't start or stop the LUN.
2471                  */
2472                 if (retval != 0) {
2473                         ctl_set_internal_failure(&io->scsiio,
2474                                                  /*sks_valid*/ 1,
2475                                                  /*retry_count*/ 0xf051);
2476                         retval = CTL_RETVAL_COMPLETE;
2477                 } else {
2478                         ctl_set_success(&io->scsiio);
2479                 }
2480                 ctl_config_write_done(io);
2481                 break;
2482         }
2483         default:
2484                 ctl_set_invalid_opcode(&io->scsiio);
2485                 ctl_config_write_done(io);
2486                 retval = CTL_RETVAL_COMPLETE;
2487                 break;
2488         }
2489
2490         return (retval);
2491
2492 }
2493
2494 static int
2495 ctl_be_block_config_read(union ctl_io *io)
2496 {
2497         return (0);
2498 }
2499
2500 static int
2501 ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2502 {
2503         struct ctl_be_block_lun *lun;
2504         int retval;
2505
2506         lun = (struct ctl_be_block_lun *)be_lun;
2507         retval = 0;
2508
2509         retval = sbuf_printf(sb, "<num_threads>");
2510
2511         if (retval != 0)
2512                 goto bailout;
2513
2514         retval = sbuf_printf(sb, "%d", lun->num_threads);
2515
2516         if (retval != 0)
2517                 goto bailout;
2518
2519         retval = sbuf_printf(sb, "</num_threads>");
2520
2521         /*
2522          * For processor devices, we don't have a path variable.
2523          */
2524         if ((retval != 0)
2525          || (lun->dev_path == NULL))
2526                 goto bailout;
2527
2528         retval = sbuf_printf(sb, "<file>");
2529
2530         if (retval != 0)
2531                 goto bailout;
2532
2533         retval = ctl_sbuf_printf_esc(sb, lun->dev_path);
2534
2535         if (retval != 0)
2536                 goto bailout;
2537
2538         retval = sbuf_printf(sb, "</file>\n");
2539
2540 bailout:
2541
2542         return (retval);
2543 }
2544
2545 int
2546 ctl_be_block_init(void)
2547 {
2548         struct ctl_be_block_softc *softc;
2549         int retval;
2550
2551         softc = &backend_block_softc;
2552         retval = 0;
2553
2554         mtx_init(&softc->lock, "ctlblk", NULL, MTX_DEF);
2555         beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2556             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2557         STAILQ_INIT(&softc->disk_list);
2558         STAILQ_INIT(&softc->lun_list);
2559
2560         return (retval);
2561 }