]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/camdd/camdd.c
ident(1): Normalizing date format
[FreeBSD/FreeBSD.git] / usr.sbin / camdd / camdd.c
1 /*-
2  * Copyright (c) 1997-2007 Kenneth D. Merry
3  * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions, and the following disclaimer,
11  *    without modification.
12  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13  *    substantially similar to the "NO WARRANTY" disclaimer below
14  *    ("Disclaimer") and any redistribution must be conditioned upon
15  *    including a substantially similar Disclaimer requirement for further
16  *    binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGES.
30  *
31  * Authors: Ken Merry           (Spectra Logic Corporation)
32  */
33
34 /*
35  * This is eventually intended to be:
36  * - A basic data transfer/copy utility
37  * - A simple benchmark utility
38  * - An example of how to use the asynchronous pass(4) driver interface.
39  */
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 #include <sys/ioctl.h>
44 #include <sys/stdint.h>
45 #include <sys/types.h>
46 #include <sys/endian.h>
47 #include <sys/param.h>
48 #include <sys/sbuf.h>
49 #include <sys/stat.h>
50 #include <sys/event.h>
51 #include <sys/time.h>
52 #include <sys/uio.h>
53 #include <vm/vm.h>
54 #include <sys/bus.h>
55 #include <sys/bus_dma.h>
56 #include <sys/mtio.h>
57 #include <sys/conf.h>
58 #include <sys/disk.h>
59
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <semaphore.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <inttypes.h>
66 #include <limits.h>
67 #include <fcntl.h>
68 #include <ctype.h>
69 #include <err.h>
70 #include <libutil.h>
71 #include <pthread.h>
72 #include <assert.h>
73 #include <bsdxml.h>
74
75 #include <cam/cam.h>
76 #include <cam/cam_debug.h>
77 #include <cam/cam_ccb.h>
78 #include <cam/scsi/scsi_all.h>
79 #include <cam/scsi/scsi_da.h>
80 #include <cam/scsi/scsi_pass.h>
81 #include <cam/scsi/scsi_message.h>
82 #include <cam/scsi/smp_all.h>
83 #include <cam/nvme/nvme_all.h>
84 #include <camlib.h>
85 #include <mtlib.h>
86 #include <zlib.h>
87
88 typedef enum {
89         CAMDD_CMD_NONE          = 0x00000000,
90         CAMDD_CMD_HELP          = 0x00000001,
91         CAMDD_CMD_WRITE         = 0x00000002,
92         CAMDD_CMD_READ          = 0x00000003
93 } camdd_cmdmask;
94
95 typedef enum {
96         CAMDD_ARG_NONE          = 0x00000000,
97         CAMDD_ARG_VERBOSE       = 0x00000001,
98         CAMDD_ARG_DEVICE        = 0x00000002,
99         CAMDD_ARG_BUS           = 0x00000004,
100         CAMDD_ARG_TARGET        = 0x00000008,
101         CAMDD_ARG_LUN           = 0x00000010,
102         CAMDD_ARG_UNIT          = 0x00000020,
103         CAMDD_ARG_TIMEOUT       = 0x00000040,
104         CAMDD_ARG_ERR_RECOVER   = 0x00000080,
105         CAMDD_ARG_RETRIES       = 0x00000100
106 } camdd_argmask;
107
108 typedef enum {
109         CAMDD_DEV_NONE          = 0x00,
110         CAMDD_DEV_PASS          = 0x01,
111         CAMDD_DEV_FILE          = 0x02
112 } camdd_dev_type;
113
114 struct camdd_io_opts {
115         camdd_dev_type  dev_type;
116         char            *dev_name;
117         uint64_t        blocksize;
118         uint64_t        queue_depth;
119         uint64_t        offset;
120         int             min_cmd_size;
121         int             write_dev;
122         uint64_t        debug;
123 };
124
125 typedef enum {
126         CAMDD_BUF_NONE,
127         CAMDD_BUF_DATA,
128         CAMDD_BUF_INDIRECT
129 } camdd_buf_type;
130
131 struct camdd_buf_indirect {
132         /*
133          * Pointer to the source buffer.
134          */
135         struct camdd_buf *src_buf;
136
137         /*
138          * Offset into the source buffer, in bytes.
139          */
140         uint64_t          offset;
141         /*
142          * Pointer to the starting point in the source buffer.
143          */
144         uint8_t          *start_ptr;
145
146         /*
147          * Length of this chunk in bytes.
148          */
149         size_t            len;
150 };
151
152 struct camdd_buf_data {
153         /*
154          * Buffer allocated when we allocate this camdd_buf.  This should
155          * be the size of the blocksize for this device.
156          */
157         uint8_t                 *buf;
158
159         /*
160          * The amount of backing store allocated in buf.  Generally this
161          * will be the blocksize of the device.
162          */
163         uint32_t                 alloc_len;
164
165         /*
166          * The amount of data that was put into the buffer (on reads) or
167          * the amount of data we have put onto the src_list so far (on
168          * writes).
169          */
170         uint32_t                 fill_len;
171
172         /*
173          * The amount of data that was not transferred.
174          */
175         uint32_t                 resid;
176
177         /*
178          * Starting byte offset on the reader.
179          */
180         uint64_t                 src_start_offset;
181         
182         /*
183          * CCB used for pass(4) device targets.
184          */
185         union ccb                ccb;
186
187         /*
188          * Number of scatter/gather segments.
189          */
190         int                      sg_count;
191
192         /*
193          * Set if we had to tack on an extra buffer to round the transfer
194          * up to a sector size.
195          */
196         int                      extra_buf;
197
198         /*
199          * Scatter/gather list used generally when we're the writer for a
200          * pass(4) device. 
201          */
202         bus_dma_segment_t       *segs;
203
204         /*
205          * Scatter/gather list used generally when we're the writer for a
206          * file or block device;
207          */
208         struct iovec            *iovec;
209 };
210
211 union camdd_buf_types {
212         struct camdd_buf_indirect       indirect;
213         struct camdd_buf_data           data;
214 };
215
216 typedef enum {
217         CAMDD_STATUS_NONE,
218         CAMDD_STATUS_OK,
219         CAMDD_STATUS_SHORT_IO,
220         CAMDD_STATUS_EOF,
221         CAMDD_STATUS_ERROR
222 } camdd_buf_status;
223
224 struct camdd_buf {
225         camdd_buf_type           buf_type;
226         union camdd_buf_types    buf_type_spec;
227
228         camdd_buf_status         status;
229
230         uint64_t                 lba;
231         size_t                   len;
232
233         /*
234          * A reference count of how many indirect buffers point to this
235          * buffer.
236          */
237         int                      refcount;
238
239         /*
240          * A link back to our parent device.
241          */
242         struct camdd_dev        *dev;
243         STAILQ_ENTRY(camdd_buf)  links;
244         STAILQ_ENTRY(camdd_buf)  work_links;
245
246         /*
247          * A count of the buffers on the src_list.
248          */
249         int                      src_count;
250
251         /*
252          * List of buffers from our partner thread that are the components
253          * of this buffer for the I/O.  Uses src_links.
254          */
255         STAILQ_HEAD(,camdd_buf)  src_list;
256         STAILQ_ENTRY(camdd_buf)  src_links;
257 };
258
259 #define NUM_DEV_TYPES   2
260
261 struct camdd_dev_pass {
262         int                      scsi_dev_type;
263         int                      protocol;
264         struct cam_device       *dev;
265         uint64_t                 max_sector;
266         uint32_t                 block_len;
267         uint32_t                 cpi_maxio;
268 };
269
270 typedef enum {
271         CAMDD_FILE_NONE,
272         CAMDD_FILE_REG,
273         CAMDD_FILE_STD,
274         CAMDD_FILE_PIPE,
275         CAMDD_FILE_DISK,
276         CAMDD_FILE_TAPE,
277         CAMDD_FILE_TTY,
278         CAMDD_FILE_MEM
279 } camdd_file_type;
280
281 typedef enum {
282         CAMDD_FF_NONE           = 0x00,
283         CAMDD_FF_CAN_SEEK       = 0x01
284 } camdd_file_flags;
285
286 struct camdd_dev_file {
287         int                      fd;
288         struct stat              sb;
289         char                     filename[MAXPATHLEN + 1];
290         camdd_file_type          file_type;
291         camdd_file_flags         file_flags;
292         uint8_t                 *tmp_buf;
293 };
294
295 struct camdd_dev_block {
296         int                      fd;
297         uint64_t                 size_bytes;
298         uint32_t                 block_len;
299 };
300
301 union camdd_dev_spec {
302         struct camdd_dev_pass   pass;
303         struct camdd_dev_file   file;
304         struct camdd_dev_block  block;
305 };
306
307 typedef enum {
308         CAMDD_DEV_FLAG_NONE             = 0x00,
309         CAMDD_DEV_FLAG_EOF              = 0x01,
310         CAMDD_DEV_FLAG_PEER_EOF         = 0x02,
311         CAMDD_DEV_FLAG_ACTIVE           = 0x04,
312         CAMDD_DEV_FLAG_EOF_SENT         = 0x08,
313         CAMDD_DEV_FLAG_EOF_QUEUED       = 0x10
314 } camdd_dev_flags;
315
316 struct camdd_dev {
317         camdd_dev_type           dev_type;
318         union camdd_dev_spec     dev_spec;
319         camdd_dev_flags          flags;
320         char                     device_name[MAXPATHLEN+1];
321         uint32_t                 blocksize;
322         uint32_t                 sector_size;
323         uint64_t                 max_sector;
324         uint64_t                 sector_io_limit;
325         int                      min_cmd_size;
326         int                      write_dev;
327         int                      retry_count;
328         int                      io_timeout;
329         int                      debug;
330         uint64_t                 start_offset_bytes;
331         uint64_t                 next_io_pos_bytes;
332         uint64_t                 next_peer_pos_bytes;
333         uint64_t                 next_completion_pos_bytes;
334         uint64_t                 peer_bytes_queued;
335         uint64_t                 bytes_transferred;
336         uint32_t                 target_queue_depth;
337         uint32_t                 cur_active_io;
338         uint8_t                 *extra_buf;
339         uint32_t                 extra_buf_len;
340         struct camdd_dev        *peer_dev;
341         pthread_mutex_t          mutex;
342         pthread_cond_t           cond;
343         int                      kq;
344
345         int                      (*run)(struct camdd_dev *dev);
346         int                      (*fetch)(struct camdd_dev *dev);
347
348         /*
349          * Buffers that are available for I/O.  Uses links.
350          */
351         STAILQ_HEAD(,camdd_buf)  free_queue;
352
353         /*
354          * Free indirect buffers.  These are used for breaking a large
355          * buffer into multiple pieces.
356          */
357         STAILQ_HEAD(,camdd_buf)  free_indirect_queue;
358
359         /*
360          * Buffers that have been queued to the kernel.  Uses links.
361          */
362         STAILQ_HEAD(,camdd_buf)  active_queue;
363
364         /*
365          * Will generally contain one of our buffers that is waiting for enough
366          * I/O from our partner thread to be able to execute.  This will
367          * generally happen when our per-I/O-size is larger than the
368          * partner thread's per-I/O-size.  Uses links.
369          */
370         STAILQ_HEAD(,camdd_buf)  pending_queue;
371
372         /*
373          * Number of buffers on the pending queue
374          */
375         int                      num_pending_queue;
376
377         /*
378          * Buffers that are filled and ready to execute.  This is used when
379          * our partner (reader) thread sends us blocks that are larger than
380          * our blocksize, and so we have to split them into multiple pieces.
381          */
382         STAILQ_HEAD(,camdd_buf)  run_queue;
383
384         /*
385          * Number of buffers on the run queue.
386          */
387         int                      num_run_queue;
388
389         STAILQ_HEAD(,camdd_buf)  reorder_queue;
390
391         int                      num_reorder_queue;
392
393         /*
394          * Buffers that have been queued to us by our partner thread
395          * (generally the reader thread) to be written out.  Uses
396          * work_links.
397          */
398         STAILQ_HEAD(,camdd_buf)  work_queue;
399
400         /*
401          * Buffers that have been completed by our partner thread.  Uses
402          * work_links.
403          */
404         STAILQ_HEAD(,camdd_buf)  peer_done_queue;
405
406         /*
407          * Number of buffers on the peer done queue.
408          */
409         uint32_t                 num_peer_done_queue;
410
411         /*
412          * A list of buffers that we have queued to our peer thread.  Uses
413          * links.
414          */
415         STAILQ_HEAD(,camdd_buf)  peer_work_queue;
416
417         /*
418          * Number of buffers on the peer work queue.
419          */
420         uint32_t                 num_peer_work_queue;
421 };
422
423 static sem_t camdd_sem;
424 static sig_atomic_t need_exit = 0;
425 static sig_atomic_t error_exit = 0;
426 static sig_atomic_t need_status = 0;
427
428 #ifndef min
429 #define min(a, b) (a < b) ? a : b
430 #endif
431
432
433 /* Generically useful offsets into the peripheral private area */
434 #define ppriv_ptr0 periph_priv.entries[0].ptr
435 #define ppriv_ptr1 periph_priv.entries[1].ptr
436 #define ppriv_field0 periph_priv.entries[0].field
437 #define ppriv_field1 periph_priv.entries[1].field
438
439 #define ccb_buf ppriv_ptr0
440
441 #define CAMDD_FILE_DEFAULT_BLOCK        524288
442 #define CAMDD_FILE_DEFAULT_DEPTH        1
443 #define CAMDD_PASS_MAX_BLOCK            1048576
444 #define CAMDD_PASS_DEFAULT_DEPTH        6
445 #define CAMDD_PASS_RW_TIMEOUT           60 * 1000
446
447 static int parse_btl(char *tstr, int *bus, int *target, int *lun,
448                      camdd_argmask *arglst);
449 void camdd_free_dev(struct camdd_dev *dev);
450 struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
451                                   struct kevent *new_ke, int num_ke,
452                                   int retry_count, int timeout);
453 static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
454                                          camdd_buf_type buf_type);
455 void camdd_release_buf(struct camdd_buf *buf);
456 struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
457 int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
458                         uint32_t sector_size, uint32_t *num_sectors_used,
459                         int *double_buf_needed);
460 uint32_t camdd_buf_get_len(struct camdd_buf *buf);
461 void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
462 int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
463                      uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
464 int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
465          camdd_argmask arglist, int probe_retry_count,
466          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
467 int camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
468          camdd_argmask arglist, int probe_retry_count,
469          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
470 struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
471                                    int retry_count, int timeout);
472 struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
473                                    struct camdd_io_opts *io_opts,
474                                    camdd_argmask arglist, int probe_retry_count,
475                                    int probe_timeout, int io_retry_count,
476                                    int io_timeout);
477 void nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
478                 void (*cbfcnp)(struct cam_periph *, union ccb *),
479                 uint32_t nsid, int readop, uint64_t lba,
480                 uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
481                 uint32_t timeout);
482 void *camdd_file_worker(void *arg);
483 camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol);
484 int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd);
485 int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
486 int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
487 void camdd_peer_done(struct camdd_buf *buf);
488 void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
489                         int *error_count);
490 int camdd_pass_fetch(struct camdd_dev *dev);
491 int camdd_file_run(struct camdd_dev *dev);
492 int camdd_pass_run(struct camdd_dev *dev);
493 int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
494 int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
495 void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
496                      uint32_t *peer_depth, uint32_t *our_bytes,
497                      uint32_t *peer_bytes);
498 void *camdd_worker(void *arg);
499 void camdd_sig_handler(int sig);
500 void camdd_print_status(struct camdd_dev *camdd_dev,
501                         struct camdd_dev *other_dev,
502                         struct timespec *start_time);
503 int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts,
504              uint64_t max_io, int retry_count, int timeout);
505 int camdd_parse_io_opts(char *args, int is_write,
506                         struct camdd_io_opts *io_opts);
507 void usage(void);
508
509 /*
510  * Parse out a bus, or a bus, target and lun in the following
511  * format:
512  * bus
513  * bus:target
514  * bus:target:lun
515  *
516  * Returns the number of parsed components, or 0.
517  */
518 static int
519 parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst)
520 {
521         char *tmpstr;
522         int convs = 0;
523
524         while (isspace(*tstr) && (*tstr != '\0'))
525                 tstr++;
526
527         tmpstr = (char *)strtok(tstr, ":");
528         if ((tmpstr != NULL) && (*tmpstr != '\0')) {
529                 *bus = strtol(tmpstr, NULL, 0);
530                 *arglst |= CAMDD_ARG_BUS;
531                 convs++;
532                 tmpstr = (char *)strtok(NULL, ":");
533                 if ((tmpstr != NULL) && (*tmpstr != '\0')) {
534                         *target = strtol(tmpstr, NULL, 0);
535                         *arglst |= CAMDD_ARG_TARGET;
536                         convs++;
537                         tmpstr = (char *)strtok(NULL, ":");
538                         if ((tmpstr != NULL) && (*tmpstr != '\0')) {
539                                 *lun = strtol(tmpstr, NULL, 0);
540                                 *arglst |= CAMDD_ARG_LUN;
541                                 convs++;
542                         }
543                 }
544         }
545
546         return convs;
547 }
548
549 /*
550  * XXX KDM clean up and free all of the buffers on the queue!
551  */
552 void
553 camdd_free_dev(struct camdd_dev *dev)
554 {
555         if (dev == NULL)
556                 return;
557
558         switch (dev->dev_type) {
559         case CAMDD_DEV_FILE: {
560                 struct camdd_dev_file *file_dev = &dev->dev_spec.file;
561
562                 if (file_dev->fd != -1)
563                         close(file_dev->fd);
564                 free(file_dev->tmp_buf);
565                 break;
566         }
567         case CAMDD_DEV_PASS: {
568                 struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
569
570                 if (pass_dev->dev != NULL)
571                         cam_close_device(pass_dev->dev);
572                 break;
573         }
574         default:
575                 break;
576         }
577
578         free(dev);
579 }
580
581 struct camdd_dev *
582 camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
583                 int retry_count, int timeout)
584 {
585         struct camdd_dev *dev = NULL;
586         struct kevent *ke;
587         size_t ke_size;
588         int retval = 0;
589
590         dev = calloc(1, sizeof(*dev));
591         if (dev == NULL) {
592                 warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
593                 goto bailout;
594         }
595
596         dev->dev_type = dev_type;
597         dev->io_timeout = timeout;
598         dev->retry_count = retry_count;
599         STAILQ_INIT(&dev->free_queue);
600         STAILQ_INIT(&dev->free_indirect_queue);
601         STAILQ_INIT(&dev->active_queue);
602         STAILQ_INIT(&dev->pending_queue);
603         STAILQ_INIT(&dev->run_queue);
604         STAILQ_INIT(&dev->reorder_queue);
605         STAILQ_INIT(&dev->work_queue);
606         STAILQ_INIT(&dev->peer_done_queue);
607         STAILQ_INIT(&dev->peer_work_queue);
608         retval = pthread_mutex_init(&dev->mutex, NULL);
609         if (retval != 0) {
610                 warnc(retval, "%s: failed to initialize mutex", __func__);
611                 goto bailout;
612         }
613
614         retval = pthread_cond_init(&dev->cond, NULL);
615         if (retval != 0) {
616                 warnc(retval, "%s: failed to initialize condition variable",
617                       __func__);
618                 goto bailout;
619         }
620
621         dev->kq = kqueue();
622         if (dev->kq == -1) {
623                 warn("%s: Unable to create kqueue", __func__);
624                 goto bailout;
625         }
626
627         ke_size = sizeof(struct kevent) * (num_ke + 4);
628         ke = calloc(1, ke_size);
629         if (ke == NULL) {
630                 warn("%s: unable to malloc %zu bytes", __func__, ke_size);
631                 goto bailout;
632         }
633         if (num_ke > 0)
634                 bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
635
636         EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
637                EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
638         EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
639                EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
640         EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
641         EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
642
643         retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
644         if (retval == -1) {
645                 warn("%s: Unable to register kevents", __func__);
646                 goto bailout;
647         }
648
649
650         return (dev);
651
652 bailout:
653         free(dev);
654
655         return (NULL);
656 }
657
658 static struct camdd_buf *
659 camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
660 {
661         struct camdd_buf *buf = NULL;
662         uint8_t *data_ptr = NULL;
663
664         /*
665          * We only need to allocate data space for data buffers.
666          */
667         switch (buf_type) {
668         case CAMDD_BUF_DATA:
669                 data_ptr = malloc(dev->blocksize);
670                 if (data_ptr == NULL) {
671                         warn("unable to allocate %u bytes", dev->blocksize);
672                         goto bailout_error;
673                 }
674                 break;
675         default:
676                 break;
677         }
678         
679         buf = calloc(1, sizeof(*buf));
680         if (buf == NULL) {
681                 warn("unable to allocate %zu bytes", sizeof(*buf));
682                 goto bailout_error;
683         }
684
685         buf->buf_type = buf_type;
686         buf->dev = dev;
687         switch (buf_type) {
688         case CAMDD_BUF_DATA: {
689                 struct camdd_buf_data *data;
690
691                 data = &buf->buf_type_spec.data;
692
693                 data->alloc_len = dev->blocksize;
694                 data->buf = data_ptr;
695                 break;
696         }
697         case CAMDD_BUF_INDIRECT:
698                 break;
699         default:
700                 break;
701         }
702         STAILQ_INIT(&buf->src_list);
703
704         return (buf);
705
706 bailout_error:
707         free(data_ptr);
708
709         return (NULL);
710 }
711
712 void
713 camdd_release_buf(struct camdd_buf *buf)
714 {
715         struct camdd_dev *dev;
716
717         dev = buf->dev;
718
719         switch (buf->buf_type) {
720         case CAMDD_BUF_DATA: {
721                 struct camdd_buf_data *data;
722
723                 data = &buf->buf_type_spec.data;
724
725                 if (data->segs != NULL) {
726                         if (data->extra_buf != 0) {
727                                 void *extra_buf;
728
729                                 extra_buf = (void *)
730                                     data->segs[data->sg_count - 1].ds_addr;
731                                 free(extra_buf);
732                                 data->extra_buf = 0;
733                         }
734                         free(data->segs);
735                         data->segs = NULL;
736                         data->sg_count = 0;
737                 } else if (data->iovec != NULL) {
738                         if (data->extra_buf != 0) {
739                                 free(data->iovec[data->sg_count - 1].iov_base);
740                                 data->extra_buf = 0;
741                         }
742                         free(data->iovec);
743                         data->iovec = NULL;
744                         data->sg_count = 0;
745                 }
746                 STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
747                 break;
748         }
749         case CAMDD_BUF_INDIRECT:
750                 STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
751                 break;
752         default:
753                 err(1, "%s: Invalid buffer type %d for released buffer",
754                     __func__, buf->buf_type);
755                 break;
756         }
757 }
758
759 struct camdd_buf *
760 camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
761 {
762         struct camdd_buf *buf = NULL;
763
764         switch (buf_type) {
765         case CAMDD_BUF_DATA:
766                 buf = STAILQ_FIRST(&dev->free_queue);
767                 if (buf != NULL) {
768                         struct camdd_buf_data *data;
769                         uint8_t *data_ptr;
770                         uint32_t alloc_len;
771
772                         STAILQ_REMOVE_HEAD(&dev->free_queue, links);
773                         data = &buf->buf_type_spec.data;
774                         data_ptr = data->buf;
775                         alloc_len = data->alloc_len;
776                         bzero(buf, sizeof(*buf));
777                         data->buf = data_ptr;
778                         data->alloc_len = alloc_len;
779                 }
780                 break;
781         case CAMDD_BUF_INDIRECT:
782                 buf = STAILQ_FIRST(&dev->free_indirect_queue);
783                 if (buf != NULL) {
784                         STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
785
786                         bzero(buf, sizeof(*buf));
787                 }
788                 break;
789         default:
790                 warnx("Unknown buffer type %d requested", buf_type);
791                 break;
792         }
793
794
795         if (buf == NULL)
796                 return (camdd_alloc_buf(dev, buf_type));
797         else {
798                 STAILQ_INIT(&buf->src_list);
799                 buf->dev = dev;
800                 buf->buf_type = buf_type;
801
802                 return (buf);
803         }
804 }
805
806 int
807 camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
808                     uint32_t *num_sectors_used, int *double_buf_needed)
809 {
810         struct camdd_buf *tmp_buf;
811         struct camdd_buf_data *data;
812         uint8_t *extra_buf = NULL;
813         size_t extra_buf_len = 0;
814         int extra_buf_attached = 0;
815         int i, retval = 0;
816
817         data = &buf->buf_type_spec.data;
818
819         data->sg_count = buf->src_count;
820         /*
821          * Compose a scatter/gather list from all of the buffers in the list.
822          * If the length of the buffer isn't a multiple of the sector size,
823          * we'll have to add an extra buffer.  This should only happen
824          * at the end of a transfer.
825          */
826         if ((data->fill_len % sector_size) != 0) {
827                 extra_buf_len = sector_size - (data->fill_len % sector_size);
828                 extra_buf = calloc(extra_buf_len, 1);
829                 if (extra_buf == NULL) {
830                         warn("%s: unable to allocate %zu bytes for extra "
831                             "buffer space", __func__, extra_buf_len);
832                         retval = 1;
833                         goto bailout;
834                 }
835                 data->extra_buf = 1;
836                 data->sg_count++;
837         }
838         if (iovec == 0) {
839                 data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
840                 if (data->segs == NULL) {
841                         warn("%s: unable to allocate %zu bytes for S/G list",
842                             __func__, sizeof(bus_dma_segment_t) *
843                             data->sg_count);
844                         retval = 1;
845                         goto bailout;
846                 }
847
848         } else {
849                 data->iovec = calloc(data->sg_count, sizeof(struct iovec));
850                 if (data->iovec == NULL) {
851                         warn("%s: unable to allocate %zu bytes for S/G list",
852                             __func__, sizeof(struct iovec) * data->sg_count);
853                         retval = 1;
854                         goto bailout;
855                 }
856         }
857
858         for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
859              i < buf->src_count && tmp_buf != NULL; i++,
860              tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
861
862                 if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
863                         struct camdd_buf_data *tmp_data;
864
865                         tmp_data = &tmp_buf->buf_type_spec.data;
866                         if (iovec == 0) {
867                                 data->segs[i].ds_addr =
868                                     (bus_addr_t) tmp_data->buf;
869                                 data->segs[i].ds_len = tmp_data->fill_len -
870                                     tmp_data->resid;
871                         } else {
872                                 data->iovec[i].iov_base = tmp_data->buf;
873                                 data->iovec[i].iov_len = tmp_data->fill_len -
874                                     tmp_data->resid;
875                         }
876                         if (((tmp_data->fill_len - tmp_data->resid) %
877                              sector_size) != 0)
878                                 *double_buf_needed = 1;
879                 } else {
880                         struct camdd_buf_indirect *tmp_ind;
881
882                         tmp_ind = &tmp_buf->buf_type_spec.indirect;
883                         if (iovec == 0) {
884                                 data->segs[i].ds_addr =
885                                     (bus_addr_t)tmp_ind->start_ptr;
886                                 data->segs[i].ds_len = tmp_ind->len;
887                         } else {
888                                 data->iovec[i].iov_base = tmp_ind->start_ptr;
889                                 data->iovec[i].iov_len = tmp_ind->len;
890                         }
891                         if ((tmp_ind->len % sector_size) != 0)
892                                 *double_buf_needed = 1;
893                 }
894         }
895
896         if (extra_buf != NULL) {
897                 if (iovec == 0) {
898                         data->segs[i].ds_addr = (bus_addr_t)extra_buf;
899                         data->segs[i].ds_len = extra_buf_len;
900                 } else {
901                         data->iovec[i].iov_base = extra_buf;
902                         data->iovec[i].iov_len = extra_buf_len;
903                 }
904                 extra_buf_attached = 1;
905                 i++;
906         }
907         if ((tmp_buf != NULL) || (i != data->sg_count)) {
908                 warnx("buffer source count does not match "
909                       "number of buffers in list!");
910                 retval = 1;
911                 goto bailout;
912         }
913
914 bailout:
915         if (retval == 0) {
916                 *num_sectors_used = (data->fill_len + extra_buf_len) /
917                     sector_size;
918         } else if (extra_buf_attached == 0) {
919                 /*
920                  * If extra_buf isn't attached yet, we need to free it
921                  * to avoid leaking.
922                  */
923                 free(extra_buf);
924                 data->extra_buf = 0;
925                 data->sg_count--;
926         }
927         return (retval);
928 }
929
930 uint32_t
931 camdd_buf_get_len(struct camdd_buf *buf)
932 {
933         uint32_t len = 0;
934
935         if (buf->buf_type != CAMDD_BUF_DATA) {
936                 struct camdd_buf_indirect *indirect;
937
938                 indirect = &buf->buf_type_spec.indirect;
939                 len = indirect->len;
940         } else {
941                 struct camdd_buf_data *data;
942
943                 data = &buf->buf_type_spec.data;
944                 len = data->fill_len;
945         }
946
947         return (len);
948 }
949
950 void
951 camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
952 {
953         struct camdd_buf_data *data;
954
955         assert(buf->buf_type == CAMDD_BUF_DATA);
956
957         data = &buf->buf_type_spec.data;
958
959         STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
960         buf->src_count++;
961
962         data->fill_len += camdd_buf_get_len(child_buf);
963 }
964
965 typedef enum {
966         CAMDD_TS_MAX_BLK,
967         CAMDD_TS_MIN_BLK,
968         CAMDD_TS_BLK_GRAN,
969         CAMDD_TS_EFF_IOSIZE
970 } camdd_status_item_index;
971
972 static struct camdd_status_items {
973         const char *name;
974         struct mt_status_entry *entry;
975 } req_status_items[] = {
976         { "max_blk", NULL },
977         { "min_blk", NULL },
978         { "blk_gran", NULL },
979         { "max_effective_iosize", NULL }
980 };
981
982 int
983 camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
984                  uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
985 {
986         struct mt_status_data status_data;
987         char *xml_str = NULL;
988         unsigned int i;
989         int retval = 0;
990         
991         retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
992         if (retval != 0)
993                 err(1, "Couldn't get XML string from %s", filename);
994
995         retval = mt_get_status(xml_str, &status_data);
996         if (retval != XML_STATUS_OK) {
997                 warn("couldn't get status for %s", filename);
998                 retval = 1;
999                 goto bailout;
1000         } else
1001                 retval = 0;
1002
1003         if (status_data.error != 0) {
1004                 warnx("%s", status_data.error_str);
1005                 retval = 1;
1006                 goto bailout;
1007         }
1008
1009         for (i = 0; i < nitems(req_status_items); i++) {
1010                 char *name;
1011
1012                 name = __DECONST(char *, req_status_items[i].name);
1013                 req_status_items[i].entry = mt_status_entry_find(&status_data,
1014                     name);
1015                 if (req_status_items[i].entry == NULL) {
1016                         errx(1, "Cannot find status entry %s",
1017                             req_status_items[i].name);
1018                 }
1019         }
1020
1021         *max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
1022         *max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
1023         *min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
1024         *blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
1025 bailout:
1026
1027         free(xml_str);
1028         mt_status_free(&status_data);
1029
1030         return (retval);
1031 }
1032
1033 struct camdd_dev *
1034 camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
1035     int timeout)
1036 {
1037         struct camdd_dev *dev = NULL;
1038         struct camdd_dev_file *file_dev;
1039         uint64_t blocksize = io_opts->blocksize;
1040
1041         dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
1042         if (dev == NULL)
1043                 goto bailout;
1044
1045         file_dev = &dev->dev_spec.file;
1046         file_dev->fd = fd;
1047         strlcpy(file_dev->filename, io_opts->dev_name,
1048             sizeof(file_dev->filename));
1049         strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
1050         if (blocksize == 0)
1051                 dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
1052         else
1053                 dev->blocksize = blocksize;
1054
1055         if ((io_opts->queue_depth != 0)
1056          && (io_opts->queue_depth != 1)) {
1057                 warnx("Queue depth %ju for %s ignored, only 1 outstanding "
1058                     "command supported", (uintmax_t)io_opts->queue_depth,
1059                     io_opts->dev_name);
1060         }
1061         dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
1062         dev->run = camdd_file_run;
1063         dev->fetch = NULL;
1064
1065         /*
1066          * We can effectively access files on byte boundaries.  We'll reset
1067          * this for devices like disks that can be accessed on sector
1068          * boundaries.
1069          */
1070         dev->sector_size = 1;
1071
1072         if ((fd != STDIN_FILENO)
1073          && (fd != STDOUT_FILENO)) {
1074                 int retval;
1075
1076                 retval = fstat(fd, &file_dev->sb);
1077                 if (retval != 0) {
1078                         warn("Cannot stat %s", dev->device_name);
1079                         goto bailout_error;
1080                 }
1081                 if (S_ISREG(file_dev->sb.st_mode)) {
1082                         file_dev->file_type = CAMDD_FILE_REG;
1083                 } else if (S_ISCHR(file_dev->sb.st_mode)) {
1084                         int type;
1085
1086                         if (ioctl(fd, FIODTYPE, &type) == -1)
1087                                 err(1, "FIODTYPE ioctl failed on %s",
1088                                     dev->device_name);
1089                         else {
1090                                 if (type & D_TAPE)
1091                                         file_dev->file_type = CAMDD_FILE_TAPE;
1092                                 else if (type & D_DISK)
1093                                         file_dev->file_type = CAMDD_FILE_DISK;
1094                                 else if (type & D_MEM)
1095                                         file_dev->file_type = CAMDD_FILE_MEM;
1096                                 else if (type & D_TTY)
1097                                         file_dev->file_type = CAMDD_FILE_TTY;
1098                         }
1099                 } else if (S_ISDIR(file_dev->sb.st_mode)) {
1100                         errx(1, "cannot operate on directory %s",
1101                             dev->device_name);
1102                 } else if (S_ISFIFO(file_dev->sb.st_mode)) {
1103                         file_dev->file_type = CAMDD_FILE_PIPE;
1104                 } else
1105                         errx(1, "Cannot determine file type for %s",
1106                             dev->device_name);
1107
1108                 switch (file_dev->file_type) {
1109                 case CAMDD_FILE_REG:
1110                         if (file_dev->sb.st_size != 0)
1111                                 dev->max_sector = file_dev->sb.st_size - 1;
1112                         else
1113                                 dev->max_sector = 0;
1114                         file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1115                         break;
1116                 case CAMDD_FILE_TAPE: {
1117                         uint64_t max_iosize, max_blk, min_blk, blk_gran;
1118                         /*
1119                          * Check block limits and maximum effective iosize.
1120                          * Make sure the blocksize is within the block
1121                          * limits (and a multiple of the minimum blocksize)
1122                          * and that the blocksize is <= maximum effective
1123                          * iosize.
1124                          */
1125                         retval = camdd_probe_tape(fd, dev->device_name,
1126                             &max_iosize, &max_blk, &min_blk, &blk_gran);
1127                         if (retval != 0)
1128                                 errx(1, "Unable to probe tape %s",
1129                                     dev->device_name);
1130
1131                         /*
1132                          * The blocksize needs to be <= the maximum
1133                          * effective I/O size of the tape device.  Note
1134                          * that this also takes into account the maximum
1135                          * blocksize reported by READ BLOCK LIMITS.
1136                          */
1137                         if (dev->blocksize > max_iosize) {
1138                                 warnx("Blocksize %u too big for %s, limiting "
1139                                     "to %ju", dev->blocksize, dev->device_name,
1140                                     max_iosize);
1141                                 dev->blocksize = max_iosize;
1142                         }
1143
1144                         /*
1145                          * The blocksize needs to be at least min_blk;
1146                          */
1147                         if (dev->blocksize < min_blk) {
1148                                 warnx("Blocksize %u too small for %s, "
1149                                     "increasing to %ju", dev->blocksize,
1150                                     dev->device_name, min_blk);
1151                                 dev->blocksize = min_blk;
1152                         }
1153
1154                         /*
1155                          * And the blocksize needs to be a multiple of
1156                          * the block granularity.
1157                          */
1158                         if ((blk_gran != 0)
1159                          && (dev->blocksize % (1 << blk_gran))) {
1160                                 warnx("Blocksize %u for %s not a multiple of "
1161                                     "%d, adjusting to %d", dev->blocksize,
1162                                     dev->device_name, (1 << blk_gran),
1163                                     dev->blocksize & ~((1 << blk_gran) - 1));
1164                                 dev->blocksize &= ~((1 << blk_gran) - 1);
1165                         }
1166
1167                         if (dev->blocksize == 0) {
1168                                 errx(1, "Unable to derive valid blocksize for "
1169                                     "%s", dev->device_name);
1170                         }
1171
1172                         /*
1173                          * For tape drives, set the sector size to the
1174                          * blocksize so that we make sure not to write
1175                          * less than the blocksize out to the drive.
1176                          */
1177                         dev->sector_size = dev->blocksize;
1178                         break;
1179                 }
1180                 case CAMDD_FILE_DISK: {
1181                         off_t media_size;
1182                         unsigned int sector_size;
1183
1184                         file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1185
1186                         if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
1187                                 err(1, "DIOCGSECTORSIZE ioctl failed on %s",
1188                                     dev->device_name);
1189                         }
1190
1191                         if (sector_size == 0) {
1192                                 errx(1, "DIOCGSECTORSIZE ioctl returned "
1193                                     "invalid sector size %u for %s",
1194                                     sector_size, dev->device_name);
1195                         }
1196
1197                         if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
1198                                 err(1, "DIOCGMEDIASIZE ioctl failed on %s",
1199                                     dev->device_name);
1200                         }
1201
1202                         if (media_size == 0) {
1203                                 errx(1, "DIOCGMEDIASIZE ioctl returned "
1204                                     "invalid media size %ju for %s",
1205                                     (uintmax_t)media_size, dev->device_name);
1206                         }
1207
1208                         if (dev->blocksize % sector_size) {
1209                                 errx(1, "%s blocksize %u not a multiple of "
1210                                     "sector size %u", dev->device_name,
1211                                     dev->blocksize, sector_size);
1212                         }
1213
1214                         dev->sector_size = sector_size;
1215                         dev->max_sector = (media_size / sector_size) - 1;
1216                         break;
1217                 }
1218                 case CAMDD_FILE_MEM:
1219                         file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1220                         break;
1221                 default:
1222                         break;
1223                 }
1224         }
1225
1226         if ((io_opts->offset != 0)
1227          && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
1228                 warnx("Offset %ju specified for %s, but we cannot seek on %s",
1229                     io_opts->offset, io_opts->dev_name, io_opts->dev_name);
1230                 goto bailout_error;
1231         }
1232 #if 0
1233         else if ((io_opts->offset != 0)
1234                 && ((io_opts->offset % dev->sector_size) != 0)) {
1235                 warnx("Offset %ju for %s is not a multiple of the "
1236                       "sector size %u", io_opts->offset, 
1237                       io_opts->dev_name, dev->sector_size);
1238                 goto bailout_error;
1239         } else {
1240                 dev->start_offset_bytes = io_opts->offset;
1241         }
1242 #endif
1243
1244 bailout:
1245         return (dev);
1246
1247 bailout_error:
1248         camdd_free_dev(dev);
1249         return (NULL);
1250 }
1251
1252 /*
1253  * Get a get device CCB for the specified device.
1254  */
1255 int
1256 camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd)
1257 {
1258         union ccb *ccb;
1259         int retval = 0;
1260
1261         ccb = cam_getccb(device);
1262  
1263         if (ccb == NULL) {
1264                 warnx("%s: couldn't allocate CCB", __func__);
1265                 return -1;
1266         }
1267
1268         CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd);
1269
1270         ccb->ccb_h.func_code = XPT_GDEV_TYPE;
1271  
1272         if (cam_send_ccb(device, ccb) < 0) {
1273                 warn("%s: error sending Get Device Information CCB", __func__);
1274                         cam_error_print(device, ccb, CAM_ESF_ALL,
1275                                         CAM_EPF_ALL, stderr);
1276                 retval = -1;
1277                 goto bailout;
1278         }
1279
1280         if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1281                         cam_error_print(device, ccb, CAM_ESF_ALL,
1282                                         CAM_EPF_ALL, stderr);
1283                 retval = -1;
1284                 goto bailout;
1285         }
1286
1287         bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev));
1288
1289 bailout:
1290         cam_freeccb(ccb);
1291  
1292         return retval;
1293 }
1294
1295 int
1296 camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
1297                  camdd_argmask arglist, int probe_retry_count,
1298                  int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1299 {
1300         struct scsi_read_capacity_data rcap;
1301         struct scsi_read_capacity_data_long rcaplong;
1302         int retval = -1;
1303
1304         if (ccb == NULL) {
1305                 warnx("%s: error passed ccb is NULL", __func__);
1306                 goto bailout;
1307         }
1308
1309         CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
1310
1311         scsi_read_capacity(&ccb->csio,
1312                            /*retries*/ probe_retry_count,
1313                            /*cbfcnp*/ NULL,
1314                            /*tag_action*/ MSG_SIMPLE_Q_TAG,
1315                            &rcap,
1316                            SSD_FULL_SIZE,
1317                            /*timeout*/ probe_timeout ? probe_timeout : 5000);
1318
1319         /* Disable freezing the device queue */
1320         ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1321
1322         if (arglist & CAMDD_ARG_ERR_RECOVER)
1323                 ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1324
1325         if (cam_send_ccb(cam_dev, ccb) < 0) {
1326                 warn("error sending READ CAPACITY command");
1327
1328                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1329                                 CAM_EPF_ALL, stderr);
1330
1331                 goto bailout;
1332         }
1333
1334         if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1335                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1336                 goto bailout;
1337         }
1338
1339         *maxsector = scsi_4btoul(rcap.addr);
1340         *block_len = scsi_4btoul(rcap.length);
1341
1342         /*
1343          * A last block of 2^32-1 means that the true capacity is over 2TB,
1344          * and we need to issue the long READ CAPACITY to get the real
1345          * capacity.  Otherwise, we're all set.
1346          */
1347         if (*maxsector != 0xffffffff) {
1348                 retval = 0;
1349                 goto bailout;
1350         }
1351
1352         scsi_read_capacity_16(&ccb->csio,
1353                               /*retries*/ probe_retry_count,
1354                               /*cbfcnp*/ NULL,
1355                               /*tag_action*/ MSG_SIMPLE_Q_TAG,
1356                               /*lba*/ 0,
1357                               /*reladdr*/ 0,
1358                               /*pmi*/ 0,
1359                               (uint8_t *)&rcaplong,
1360                               sizeof(rcaplong),
1361                               /*sense_len*/ SSD_FULL_SIZE,
1362                               /*timeout*/ probe_timeout ? probe_timeout : 5000);
1363
1364         /* Disable freezing the device queue */
1365         ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1366
1367         if (arglist & CAMDD_ARG_ERR_RECOVER)
1368                 ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1369
1370         if (cam_send_ccb(cam_dev, ccb) < 0) {
1371                 warn("error sending READ CAPACITY (16) command");
1372                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1373                                 CAM_EPF_ALL, stderr);
1374                 goto bailout;
1375         }
1376
1377         if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1378                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1379                 goto bailout;
1380         }
1381
1382         *maxsector = scsi_8btou64(rcaplong.addr);
1383         *block_len = scsi_4btoul(rcaplong.length);
1384
1385         retval = 0;
1386
1387 bailout:
1388         return retval;
1389 }
1390
1391 int
1392 camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
1393                  camdd_argmask arglist, int probe_retry_count,
1394                  int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1395 {
1396         struct nvme_command *nc = NULL;
1397         struct nvme_namespace_data nsdata;
1398         uint32_t nsid = cam_dev->target_lun & UINT32_MAX;
1399         uint8_t format = 0, lbads = 0;
1400         int retval = -1;
1401
1402         if (ccb == NULL) {
1403                 warnx("%s: error passed ccb is NULL", __func__);
1404                 goto bailout;
1405         }
1406
1407         CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
1408
1409         /* Send Identify Namespace to get block size and capacity */
1410         nc = &ccb->nvmeio.cmd;
1411         nc->opc = NVME_OPC_IDENTIFY;
1412
1413         nc->nsid = nsid;
1414         nc->cdw10 = 0; /* Identify Namespace is CNS = 0 */
1415
1416         cam_fill_nvmeadmin(&ccb->nvmeio,
1417                         /*retries*/ probe_retry_count,
1418                         /*cbfcnp*/ NULL,
1419                         CAM_DIR_IN,
1420                         (uint8_t *)&nsdata,
1421                         sizeof(nsdata),
1422                         probe_timeout);
1423
1424         /* Disable freezing the device queue */
1425         ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1426
1427         if (arglist & CAMDD_ARG_ERR_RECOVER)
1428                 ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1429
1430         if (cam_send_ccb(cam_dev, ccb) < 0) {
1431                 warn("error sending Identify Namespace command");
1432
1433                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1434                                 CAM_EPF_ALL, stderr);
1435
1436                 goto bailout;
1437         }
1438
1439         if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1440                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1441                 goto bailout;
1442         }
1443
1444         *maxsector = nsdata.nsze;
1445         /* The LBA Data Size (LBADS) is reported as a power of 2 */
1446         format = nsdata.flbas & NVME_NS_DATA_FLBAS_FORMAT_MASK;
1447         lbads = (nsdata.lbaf[format] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) &
1448             NVME_NS_DATA_LBAF_LBADS_MASK;
1449         *block_len = 1 << lbads;
1450
1451         retval = 0;
1452
1453 bailout:
1454         return retval;
1455 }
1456
1457 /*
1458  * Need to implement this.  Do a basic probe:
1459  * - Check the inquiry data, make sure we're talking to a device that we
1460  *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
1461  * - Send a test unit ready, make sure the device is available.
1462  * - Get the capacity and block size.
1463  */
1464 struct camdd_dev *
1465 camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
1466                  camdd_argmask arglist, int probe_retry_count,
1467                  int probe_timeout, int io_retry_count, int io_timeout)
1468 {
1469         union ccb *ccb;
1470         uint64_t maxsector = 0;
1471         uint32_t cpi_maxio, max_iosize, pass_numblocks;
1472         uint32_t block_len = 0;
1473         struct camdd_dev *dev = NULL;
1474         struct camdd_dev_pass *pass_dev;
1475         struct kevent ke;
1476         struct ccb_getdev cgd;
1477         int retval;
1478         int scsi_dev_type = T_NODEVICE;
1479
1480         if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) {
1481                 warnx("%s: error retrieving CGD", __func__);
1482                 return NULL;
1483         }
1484
1485         ccb = cam_getccb(cam_dev);
1486
1487         if (ccb == NULL) {
1488                 warnx("%s: error allocating ccb", __func__);
1489                 goto bailout;
1490         }
1491
1492         switch (cgd.protocol) {
1493         case PROTO_SCSI:
1494                 scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
1495
1496                 /*
1497                  * For devices that support READ CAPACITY, we'll attempt to get the
1498                  * capacity.  Otherwise, we really don't support tape or other
1499                  * devices via SCSI passthrough, so just return an error in that case.
1500                  */
1501                 switch (scsi_dev_type) {
1502                 case T_DIRECT:
1503                 case T_WORM:
1504                 case T_CDROM:
1505                 case T_OPTICAL:
1506                 case T_RBC:
1507                 case T_ZBC_HM:
1508                         break;
1509                 default:
1510                         errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
1511                         break; /*NOTREACHED*/
1512                 }
1513
1514                 if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count,
1515                                                 arglist, probe_timeout, &maxsector,
1516                                                 &block_len))) {
1517                         goto bailout;
1518                 }
1519                 break;
1520         case PROTO_NVME:
1521                 if ((retval = camdd_probe_pass_nvme(cam_dev, ccb, probe_retry_count,
1522                                                 arglist, probe_timeout, &maxsector,
1523                                                 &block_len))) {
1524                         goto bailout;
1525                 }
1526                 break;
1527         default:
1528                 errx(1, "Unsupported PROTO type %d", cgd.protocol);
1529                 break; /*NOTREACHED*/
1530         }
1531
1532         if (block_len == 0) {
1533                 warnx("Sector size for %s%u is 0, cannot continue",
1534                     cam_dev->device_name, cam_dev->dev_unit_num);
1535                 goto bailout_error;
1536         }
1537
1538         CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi);
1539
1540         ccb->ccb_h.func_code = XPT_PATH_INQ;
1541         ccb->ccb_h.flags = CAM_DIR_NONE;
1542         ccb->ccb_h.retry_count = 1;
1543         
1544         if (cam_send_ccb(cam_dev, ccb) < 0) {
1545                 warn("error sending XPT_PATH_INQ CCB");
1546
1547                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1548                                 CAM_EPF_ALL, stderr);
1549                 goto bailout;
1550         }
1551
1552         EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1553
1554         dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
1555                               io_timeout);
1556         if (dev == NULL)
1557                 goto bailout;
1558
1559         pass_dev = &dev->dev_spec.pass;
1560         pass_dev->scsi_dev_type = scsi_dev_type;
1561         pass_dev->protocol = cgd.protocol;
1562         pass_dev->dev = cam_dev;
1563         pass_dev->max_sector = maxsector;
1564         pass_dev->block_len = block_len;
1565         pass_dev->cpi_maxio = ccb->cpi.maxio;
1566         snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
1567                  pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
1568         dev->sector_size = block_len;
1569         dev->max_sector = maxsector;
1570         
1571
1572         /*
1573          * Determine the optimal blocksize to use for this device.
1574          */
1575
1576         /*
1577          * If the controller has not specified a maximum I/O size,
1578          * just go with 128K as a somewhat conservative value.
1579          */
1580         if (pass_dev->cpi_maxio == 0)
1581                 cpi_maxio = 131072;
1582         else
1583                 cpi_maxio = pass_dev->cpi_maxio;
1584
1585         /*
1586          * If the controller has a large maximum I/O size, limit it
1587          * to something smaller so that the kernel doesn't have trouble
1588          * allocating buffers to copy data in and out for us.
1589          * XXX KDM this is until we have unmapped I/O support in the kernel.
1590          */
1591         max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
1592
1593         /*
1594          * If we weren't able to get a block size for some reason,
1595          * default to 512 bytes.
1596          */
1597         block_len = pass_dev->block_len;
1598         if (block_len == 0)
1599                 block_len = 512;
1600
1601         /*
1602          * Figure out how many blocksize chunks will fit in the
1603          * maximum I/O size.
1604          */
1605         pass_numblocks = max_iosize / block_len;
1606
1607         /*
1608          * And finally, multiple the number of blocks by the LBA
1609          * length to get our maximum block size;
1610          */
1611         dev->blocksize = pass_numblocks * block_len;
1612
1613         if (io_opts->blocksize != 0) {
1614                 if ((io_opts->blocksize % dev->sector_size) != 0) {
1615                         warnx("Blocksize %ju for %s is not a multiple of "
1616                               "sector size %u", (uintmax_t)io_opts->blocksize, 
1617                               dev->device_name, dev->sector_size);
1618                         goto bailout_error;
1619                 }
1620                 dev->blocksize = io_opts->blocksize;
1621         }
1622         dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
1623         if (io_opts->queue_depth != 0)
1624                 dev->target_queue_depth = io_opts->queue_depth;
1625
1626         if (io_opts->offset != 0) {
1627                 if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
1628                         warnx("Offset %ju is past the end of device %s",
1629                             io_opts->offset, dev->device_name);
1630                         goto bailout_error;
1631                 }
1632 #if 0
1633                 else if ((io_opts->offset % dev->sector_size) != 0) {
1634                         warnx("Offset %ju for %s is not a multiple of the "
1635                               "sector size %u", io_opts->offset, 
1636                               dev->device_name, dev->sector_size);
1637                         goto bailout_error;
1638                 }
1639                 dev->start_offset_bytes = io_opts->offset;
1640 #endif
1641         }
1642
1643         dev->min_cmd_size = io_opts->min_cmd_size;
1644
1645         dev->run = camdd_pass_run;
1646         dev->fetch = camdd_pass_fetch;
1647
1648 bailout:
1649         cam_freeccb(ccb);
1650
1651         return (dev);
1652
1653 bailout_error:
1654         cam_freeccb(ccb);
1655
1656         camdd_free_dev(dev);
1657
1658         return (NULL);
1659 }
1660
1661 void
1662 nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
1663                 void (*cbfcnp)(struct cam_periph *, union ccb *),
1664                 uint32_t nsid, int readop, uint64_t lba,
1665                 uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
1666                 uint32_t timeout)
1667 {
1668         struct nvme_command *nc = &nvmeio->cmd;
1669
1670         nc->opc = readop ? NVME_OPC_READ : NVME_OPC_WRITE;
1671
1672         nc->nsid = nsid;
1673
1674         nc->cdw10 = lba & UINT32_MAX;
1675         nc->cdw11 = lba >> 32;
1676
1677         /* NLB (bits 15:0) is a zero based value */
1678         nc->cdw12 = (block_count - 1) & UINT16_MAX;
1679
1680         cam_fill_nvmeio(nvmeio,
1681                         retries,
1682                         cbfcnp,
1683                         readop ? CAM_DIR_IN : CAM_DIR_OUT,
1684                         data_ptr,
1685                         dxfer_len,
1686                         timeout);
1687 }
1688
1689 void *
1690 camdd_worker(void *arg)
1691 {
1692         struct camdd_dev *dev = arg;
1693         struct camdd_buf *buf;
1694         struct timespec ts, *kq_ts;
1695
1696         ts.tv_sec = 0;
1697         ts.tv_nsec = 0;
1698
1699         pthread_mutex_lock(&dev->mutex);
1700
1701         dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
1702
1703         for (;;) {
1704                 struct kevent ke;
1705                 int retval = 0;
1706
1707                 /*
1708                  * XXX KDM check the reorder queue depth?
1709                  */
1710                 if (dev->write_dev == 0) {
1711                         uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
1712                         uint32_t target_depth = dev->target_queue_depth;
1713                         uint32_t peer_target_depth =
1714                             dev->peer_dev->target_queue_depth;
1715                         uint32_t peer_blocksize = dev->peer_dev->blocksize;
1716
1717                         camdd_get_depth(dev, &our_depth, &peer_depth,
1718                                         &our_bytes, &peer_bytes);
1719
1720 #if 0
1721                         while (((our_depth < target_depth)
1722                              && (peer_depth < peer_target_depth))
1723                             || ((peer_bytes + our_bytes) <
1724                                  (peer_blocksize * 2))) {
1725 #endif
1726                         while (((our_depth + peer_depth) <
1727                                 (target_depth + peer_target_depth))
1728                             || ((peer_bytes + our_bytes) <
1729                                 (peer_blocksize * 3))) {
1730
1731                                 retval = camdd_queue(dev, NULL);
1732                                 if (retval == 1)
1733                                         break;
1734                                 else if (retval != 0) {
1735                                         error_exit = 1;
1736                                         goto bailout;
1737                                 }
1738
1739                                 camdd_get_depth(dev, &our_depth, &peer_depth,
1740                                                 &our_bytes, &peer_bytes);
1741                         }
1742                 }
1743                 /*
1744                  * See if we have any I/O that is ready to execute.
1745                  */
1746                 buf = STAILQ_FIRST(&dev->run_queue);
1747                 if (buf != NULL) {
1748                         while (dev->target_queue_depth > dev->cur_active_io) {
1749                                 retval = dev->run(dev);
1750                                 if (retval == -1) {
1751                                         dev->flags |= CAMDD_DEV_FLAG_EOF;
1752                                         error_exit = 1;
1753                                         break;
1754                                 } else if (retval != 0) {
1755                                         break;
1756                                 }
1757                         }
1758                 }
1759
1760                 /*
1761                  * We've reached EOF, or our partner has reached EOF.
1762                  */
1763                 if ((dev->flags & CAMDD_DEV_FLAG_EOF)
1764                  || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
1765                         if (dev->write_dev != 0) {
1766                                 if ((STAILQ_EMPTY(&dev->work_queue))
1767                                  && (dev->num_run_queue == 0)
1768                                  && (dev->cur_active_io == 0)) {
1769                                         goto bailout;
1770                                 }
1771                         } else {
1772                                 /*
1773                                  * If we're the reader, and the writer
1774                                  * got EOF, he is already done.  If we got
1775                                  * the EOF, then we need to wait until
1776                                  * everything is flushed out for the writer.
1777                                  */
1778                                 if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
1779                                         goto bailout;
1780                                 } else if ((dev->num_peer_work_queue == 0)
1781                                         && (dev->num_peer_done_queue == 0)
1782                                         && (dev->cur_active_io == 0)
1783                                         && (dev->num_run_queue == 0)) {
1784                                         goto bailout;
1785                                 }
1786                         }
1787                         /*
1788                          * XXX KDM need to do something about the pending
1789                          * queue and cleanup resources.
1790                          */
1791                 } 
1792
1793                 if ((dev->write_dev == 0)
1794                  && (dev->cur_active_io == 0)
1795                  && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
1796                         kq_ts = &ts;
1797                 else
1798                         kq_ts = NULL;
1799
1800                 /*
1801                  * Run kevent to see if there are events to process.
1802                  */
1803                 pthread_mutex_unlock(&dev->mutex);
1804                 retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
1805                 pthread_mutex_lock(&dev->mutex);
1806                 if (retval == -1) {
1807                         warn("%s: error returned from kevent",__func__);
1808                         goto bailout;
1809                 } else if (retval != 0) {
1810                         switch (ke.filter) {
1811                         case EVFILT_READ:
1812                                 if (dev->fetch != NULL) {
1813                                         retval = dev->fetch(dev);
1814                                         if (retval == -1) {
1815                                                 error_exit = 1;
1816                                                 goto bailout;
1817                                         }
1818                                 }
1819                                 break;
1820                         case EVFILT_SIGNAL:
1821                                 /*
1822                                  * We register for this so we don't get
1823                                  * an error as a result of a SIGINFO or a
1824                                  * SIGINT.  It will actually get handled
1825                                  * by the signal handler.  If we get a
1826                                  * SIGINT, bail out without printing an
1827                                  * error message.  Any other signals 
1828                                  * will result in the error message above.
1829                                  */
1830                                 if (ke.ident == SIGINT)
1831                                         goto bailout;
1832                                 break;
1833                         case EVFILT_USER:
1834                                 retval = 0;
1835                                 /*
1836                                  * Check to see if the other thread has
1837                                  * queued any I/O for us to do.  (In this
1838                                  * case we're the writer.)
1839                                  */
1840                                 for (buf = STAILQ_FIRST(&dev->work_queue);
1841                                      buf != NULL;
1842                                      buf = STAILQ_FIRST(&dev->work_queue)) {
1843                                         STAILQ_REMOVE_HEAD(&dev->work_queue,
1844                                                            work_links);
1845                                         retval = camdd_queue(dev, buf);
1846                                         /*
1847                                          * We keep going unless we get an
1848                                          * actual error.  If we get EOF, we
1849                                          * still want to remove the buffers
1850                                          * from the queue and send the back
1851                                          * to the reader thread.
1852                                          */
1853                                         if (retval == -1) {
1854                                                 error_exit = 1;
1855                                                 goto bailout;
1856                                         } else
1857                                                 retval = 0;
1858                                 }
1859
1860                                 /*
1861                                  * Next check to see if the other thread has
1862                                  * queued any completed buffers back to us.
1863                                  * (In this case we're the reader.)
1864                                  */
1865                                 for (buf = STAILQ_FIRST(&dev->peer_done_queue);
1866                                      buf != NULL;
1867                                      buf = STAILQ_FIRST(&dev->peer_done_queue)){
1868                                         STAILQ_REMOVE_HEAD(
1869                                             &dev->peer_done_queue, work_links);
1870                                         dev->num_peer_done_queue--;
1871                                         camdd_peer_done(buf);
1872                                 }
1873                                 break;
1874                         default:
1875                                 warnx("%s: unknown kevent filter %d",
1876                                       __func__, ke.filter);
1877                                 break;
1878                         }
1879                 }
1880         }
1881
1882 bailout:
1883
1884         dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
1885
1886         /* XXX KDM cleanup resources here? */
1887
1888         pthread_mutex_unlock(&dev->mutex);
1889
1890         need_exit = 1;
1891         sem_post(&camdd_sem);
1892
1893         return (NULL);
1894 }
1895
1896 /*
1897  * Simplistic translation of CCB status to our local status.
1898  */
1899 camdd_buf_status
1900 camdd_ccb_status(union ccb *ccb, int protocol)
1901 {
1902         camdd_buf_status status = CAMDD_STATUS_NONE;
1903         cam_status ccb_status;
1904
1905         ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
1906
1907         switch (protocol) {
1908         case PROTO_SCSI:
1909                 switch (ccb_status) {
1910                 case CAM_REQ_CMP: {
1911                         if (ccb->csio.resid == 0) {
1912                                 status = CAMDD_STATUS_OK;
1913                         } else if (ccb->csio.dxfer_len > ccb->csio.resid) {
1914                                 status = CAMDD_STATUS_SHORT_IO;
1915                         } else {
1916                                 status = CAMDD_STATUS_EOF;
1917                         }
1918                         break;
1919                 }
1920                 case CAM_SCSI_STATUS_ERROR: {
1921                         switch (ccb->csio.scsi_status) {
1922                         case SCSI_STATUS_OK:
1923                         case SCSI_STATUS_COND_MET:
1924                         case SCSI_STATUS_INTERMED:
1925                         case SCSI_STATUS_INTERMED_COND_MET:
1926                                 status = CAMDD_STATUS_OK;
1927                                 break;
1928                         case SCSI_STATUS_CMD_TERMINATED:
1929                         case SCSI_STATUS_CHECK_COND:
1930                         case SCSI_STATUS_QUEUE_FULL:
1931                         case SCSI_STATUS_BUSY:
1932                         case SCSI_STATUS_RESERV_CONFLICT:
1933                         default:
1934                                 status = CAMDD_STATUS_ERROR;
1935                                 break;
1936                         }
1937                         break;
1938                 }
1939                 default:
1940                         status = CAMDD_STATUS_ERROR;
1941                         break;
1942                 }
1943                 break;
1944         case PROTO_NVME:
1945                 switch (ccb_status) {
1946                 case CAM_REQ_CMP:
1947                         status = CAMDD_STATUS_OK;
1948                         break;
1949                 default:
1950                         status = CAMDD_STATUS_ERROR;
1951                         break;
1952                 }
1953                 break;
1954         default:
1955                 status = CAMDD_STATUS_ERROR;
1956                 break;
1957         }
1958
1959         return (status);
1960 }
1961
1962 /*
1963  * Queue a buffer to our peer's work thread for writing.
1964  *
1965  * Returns 0 for success, -1 for failure, 1 if the other thread exited.
1966  */
1967 int
1968 camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
1969 {
1970         struct kevent ke;
1971         STAILQ_HEAD(, camdd_buf) local_queue;
1972         struct camdd_buf *buf1, *buf2;
1973         struct camdd_buf_data *data = NULL;
1974         uint64_t peer_bytes_queued = 0;
1975         int active = 1;
1976         int retval = 0;
1977
1978         STAILQ_INIT(&local_queue);
1979
1980         /*
1981          * Since we're the reader, we need to queue our I/O to the writer
1982          * in sequential order in order to make sure it gets written out
1983          * in sequential order.
1984          *
1985          * Check the next expected I/O starting offset.  If this doesn't
1986          * match, put it on the reorder queue.
1987          */
1988         if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
1989
1990                 /*
1991                  * If there is nothing on the queue, there is no sorting
1992                  * needed.
1993                  */
1994                 if (STAILQ_EMPTY(&dev->reorder_queue)) {
1995                         STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
1996                         dev->num_reorder_queue++;
1997                         goto bailout;
1998                 }
1999
2000                 /*
2001                  * Sort in ascending order by starting LBA.  There should
2002                  * be no identical LBAs.
2003                  */
2004                 for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
2005                      buf1 = buf2) {
2006                         buf2 = STAILQ_NEXT(buf1, links);
2007                         if (buf->lba < buf1->lba) {
2008                                 /*
2009                                  * If we're less than the first one, then
2010                                  * we insert at the head of the list
2011                                  * because this has to be the first element
2012                                  * on the list.
2013                                  */
2014                                 STAILQ_INSERT_HEAD(&dev->reorder_queue,
2015                                                    buf, links);
2016                                 dev->num_reorder_queue++;
2017                                 break;
2018                         } else if (buf->lba > buf1->lba) {
2019                                 if (buf2 == NULL) {
2020                                         STAILQ_INSERT_TAIL(&dev->reorder_queue, 
2021                                             buf, links);
2022                                         dev->num_reorder_queue++;
2023                                         break;
2024                                 } else if (buf->lba < buf2->lba) {
2025                                         STAILQ_INSERT_AFTER(&dev->reorder_queue,
2026                                             buf1, buf, links);
2027                                         dev->num_reorder_queue++;
2028                                         break;
2029                                 }
2030                         } else {
2031                                 errx(1, "Found buffers with duplicate LBA %ju!",
2032                                      buf->lba);
2033                         }
2034                 }
2035                 goto bailout;
2036         } else {
2037
2038                 /*
2039                  * We're the next expected I/O completion, so put ourselves
2040                  * on the local queue to be sent to the writer.  We use
2041                  * work_links here so that we can queue this to the 
2042                  * peer_work_queue before taking the buffer off of the
2043                  * local_queue.
2044                  */
2045                 dev->next_completion_pos_bytes += buf->len;
2046                 STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
2047
2048                 /*
2049                  * Go through the reorder queue looking for more sequential
2050                  * I/O and add it to the local queue.
2051                  */
2052                 for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
2053                      buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
2054                         /*
2055                          * As soon as we see an I/O that is out of sequence,
2056                          * we're done.
2057                          */
2058                         if ((buf1->lba * dev->sector_size) !=
2059                              dev->next_completion_pos_bytes)
2060                                 break;
2061
2062                         STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
2063                         dev->num_reorder_queue--;
2064                         STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
2065                         dev->next_completion_pos_bytes += buf1->len;
2066                 }
2067         }
2068
2069         /*
2070          * Setup the event to let the other thread know that it has work
2071          * pending.
2072          */
2073         EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
2074                NOTE_TRIGGER, 0, NULL);
2075
2076         /*
2077          * Put this on our shadow queue so that we know what we've queued
2078          * to the other thread.
2079          */
2080         STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
2081                 if (buf1->buf_type != CAMDD_BUF_DATA) {
2082                         errx(1, "%s: should have a data buffer, not an "
2083                             "indirect buffer", __func__);
2084                 }
2085                 data = &buf1->buf_type_spec.data;
2086
2087                 /*
2088                  * We only need to send one EOF to the writer, and don't
2089                  * need to continue sending EOFs after that.
2090                  */
2091                 if (buf1->status == CAMDD_STATUS_EOF) {
2092                         if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
2093                                 STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
2094                                     work_links);
2095                                 camdd_release_buf(buf1);
2096                                 retval = 1;
2097                                 continue;
2098                         }
2099                         dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
2100                 }
2101
2102
2103                 STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
2104                 peer_bytes_queued += (data->fill_len - data->resid);
2105                 dev->peer_bytes_queued += (data->fill_len - data->resid);
2106                 dev->num_peer_work_queue++;
2107         }
2108
2109         if (STAILQ_FIRST(&local_queue) == NULL)
2110                 goto bailout;
2111
2112         /*
2113          * Drop our mutex and pick up the other thread's mutex.  We need to
2114          * do this to avoid deadlocks.
2115          */
2116         pthread_mutex_unlock(&dev->mutex);
2117         pthread_mutex_lock(&dev->peer_dev->mutex);
2118
2119         if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
2120                 /*
2121                  * Put the buffers on the other thread's incoming work queue.
2122                  */
2123                 for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2124                      buf1 = STAILQ_FIRST(&local_queue)) {
2125                         STAILQ_REMOVE_HEAD(&local_queue, work_links);
2126                         STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
2127                                            work_links);
2128                 }
2129                 /*
2130                  * Send an event to the other thread's kqueue to let it know
2131                  * that there is something on the work queue.
2132                  */
2133                 retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2134                 if (retval == -1)
2135                         warn("%s: unable to add peer work_queue kevent",
2136                              __func__);
2137                 else
2138                         retval = 0;
2139         } else
2140                 active = 0;
2141
2142         pthread_mutex_unlock(&dev->peer_dev->mutex);
2143         pthread_mutex_lock(&dev->mutex);
2144
2145         /*
2146          * If the other side isn't active, run through the queue and
2147          * release all of the buffers.
2148          */
2149         if (active == 0) {
2150                 for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2151                      buf1 = STAILQ_FIRST(&local_queue)) {
2152                         STAILQ_REMOVE_HEAD(&local_queue, work_links);
2153                         STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
2154                                       links);
2155                         dev->num_peer_work_queue--;
2156                         camdd_release_buf(buf1);
2157                 }
2158                 dev->peer_bytes_queued -= peer_bytes_queued;
2159                 retval = 1;
2160         }
2161
2162 bailout:
2163         return (retval);
2164 }
2165
2166 /*
2167  * Return a buffer to the reader thread when we have completed writing it.
2168  */
2169 int
2170 camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
2171 {
2172         struct kevent ke;
2173         int retval = 0;
2174
2175         /*
2176          * Setup the event to let the other thread know that we have
2177          * completed a buffer.
2178          */
2179         EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
2180                NOTE_TRIGGER, 0, NULL);
2181
2182         /*
2183          * Drop our lock and acquire the other thread's lock before
2184          * manipulating 
2185          */
2186         pthread_mutex_unlock(&dev->mutex);
2187         pthread_mutex_lock(&dev->peer_dev->mutex);
2188
2189         /*
2190          * Put the buffer on the reader thread's peer done queue now that
2191          * we have completed it.
2192          */
2193         STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
2194                            work_links);
2195         dev->peer_dev->num_peer_done_queue++;
2196
2197         /*
2198          * Send an event to the peer thread to let it know that we've added
2199          * something to its peer done queue.
2200          */
2201         retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2202         if (retval == -1)
2203                 warn("%s: unable to add peer_done_queue kevent", __func__);
2204         else
2205                 retval = 0;
2206
2207         /*
2208          * Drop the other thread's lock and reacquire ours.
2209          */
2210         pthread_mutex_unlock(&dev->peer_dev->mutex);
2211         pthread_mutex_lock(&dev->mutex);
2212
2213         return (retval);
2214 }
2215
2216 /*
2217  * Free a buffer that was written out by the writer thread and returned to
2218  * the reader thread.
2219  */
2220 void
2221 camdd_peer_done(struct camdd_buf *buf)
2222 {
2223         struct camdd_dev *dev;
2224         struct camdd_buf_data *data;
2225
2226         dev = buf->dev;
2227         if (buf->buf_type != CAMDD_BUF_DATA) {
2228                 errx(1, "%s: should have a data buffer, not an "
2229                     "indirect buffer", __func__);
2230         }
2231
2232         data = &buf->buf_type_spec.data;
2233
2234         STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
2235         dev->num_peer_work_queue--;
2236         dev->peer_bytes_queued -= (data->fill_len - data->resid);
2237
2238         if (buf->status == CAMDD_STATUS_EOF)
2239                 dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2240
2241         STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2242 }
2243
2244 /*
2245  * Assumes caller holds the lock for this device.
2246  */
2247 void
2248 camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
2249                    int *error_count)
2250 {
2251         int retval = 0;
2252
2253         /*
2254          * If we're the reader, we need to send the completed I/O
2255          * to the writer.  If we're the writer, we need to just
2256          * free up resources, or let the reader know if we've
2257          * encountered an error.
2258          */
2259         if (dev->write_dev == 0) {
2260                 retval = camdd_queue_peer_buf(dev, buf);
2261                 if (retval != 0)
2262                         (*error_count)++;
2263         } else {
2264                 struct camdd_buf *tmp_buf, *next_buf;
2265
2266                 STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
2267                                     next_buf) {
2268                         struct camdd_buf *src_buf;
2269                         struct camdd_buf_indirect *indirect;
2270
2271                         STAILQ_REMOVE(&buf->src_list, tmp_buf,
2272                                       camdd_buf, src_links);
2273
2274                         tmp_buf->status = buf->status;
2275
2276                         if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
2277                                 camdd_complete_peer_buf(dev, tmp_buf);
2278                                 continue;
2279                         }
2280
2281                         indirect = &tmp_buf->buf_type_spec.indirect;
2282                         src_buf = indirect->src_buf;
2283                         src_buf->refcount--;
2284                         /*
2285                          * XXX KDM we probably need to account for
2286                          * exactly how many bytes we were able to
2287                          * write.  Allocate the residual to the
2288                          * first N buffers?  Or just track the
2289                          * number of bytes written?  Right now the reader
2290                          * doesn't do anything with a residual.
2291                          */
2292                         src_buf->status = buf->status;
2293                         if (src_buf->refcount <= 0)
2294                                 camdd_complete_peer_buf(dev, src_buf);
2295                         STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
2296                                            tmp_buf, links);
2297                 }
2298
2299                 STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2300         }
2301 }
2302
2303 /*
2304  * Fetch all completed commands from the pass(4) device.
2305  *
2306  * Returns the number of commands received, or -1 if any of the commands
2307  * completed with an error.  Returns 0 if no commands are available.
2308  */
2309 int
2310 camdd_pass_fetch(struct camdd_dev *dev)
2311 {
2312         struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2313         union ccb ccb;
2314         int retval = 0, num_fetched = 0, error_count = 0;
2315
2316         pthread_mutex_unlock(&dev->mutex);
2317         /*
2318          * XXX KDM we don't distinguish between EFAULT and ENOENT.
2319          */
2320         while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
2321                 struct camdd_buf *buf;
2322                 struct camdd_buf_data *data;
2323                 cam_status ccb_status;
2324                 union ccb *buf_ccb;
2325
2326                 buf = ccb.ccb_h.ccb_buf;
2327                 data = &buf->buf_type_spec.data;
2328                 buf_ccb = &data->ccb;
2329
2330                 num_fetched++;
2331
2332                 /*
2333                  * Copy the CCB back out so we get status, sense data, etc.
2334                  */
2335                 bcopy(&ccb, buf_ccb, sizeof(ccb));
2336
2337                 pthread_mutex_lock(&dev->mutex);
2338
2339                 /*
2340                  * We're now done, so take this off the active queue.
2341                  */
2342                 STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
2343                 dev->cur_active_io--;
2344
2345                 ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
2346                 if (ccb_status != CAM_REQ_CMP) {
2347                         cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
2348                                         CAM_EPF_ALL, stderr);
2349                 }
2350
2351                 switch (pass_dev->protocol) {
2352                 case PROTO_SCSI:
2353                         data->resid = ccb.csio.resid;
2354                         dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
2355                         break;
2356                 case PROTO_NVME:
2357                         data->resid = 0;
2358                         dev->bytes_transferred += ccb.nvmeio.dxfer_len;
2359                         break;
2360                 default:
2361                         return -1;
2362                         break;
2363                 }
2364
2365                 if (buf->status == CAMDD_STATUS_NONE)
2366                         buf->status = camdd_ccb_status(&ccb, pass_dev->protocol);
2367                 if (buf->status == CAMDD_STATUS_ERROR)
2368                         error_count++;
2369                 else if (buf->status == CAMDD_STATUS_EOF) {
2370                         /*
2371                          * Once we queue this buffer to our partner thread,
2372                          * he will know that we've hit EOF.
2373                          */
2374                         dev->flags |= CAMDD_DEV_FLAG_EOF;
2375                 }
2376
2377                 camdd_complete_buf(dev, buf, &error_count);
2378
2379                 /*
2380                  * Unlock in preparation for the ioctl call.
2381                  */
2382                 pthread_mutex_unlock(&dev->mutex);
2383         }
2384
2385         pthread_mutex_lock(&dev->mutex);
2386
2387         if (error_count > 0)
2388                 return (-1);
2389         else
2390                 return (num_fetched);
2391 }
2392
2393 /*
2394  * Returns -1 for error, 0 for success/continue, and 1 for resource
2395  * shortage/stop processing.
2396  */
2397 int
2398 camdd_file_run(struct camdd_dev *dev)
2399 {
2400         struct camdd_dev_file *file_dev = &dev->dev_spec.file;
2401         struct camdd_buf_data *data;
2402         struct camdd_buf *buf;
2403         off_t io_offset;
2404         int retval = 0, write_dev = dev->write_dev;
2405         int error_count = 0, no_resources = 0, double_buf_needed = 0;
2406         uint32_t num_sectors = 0, db_len = 0;
2407
2408         buf = STAILQ_FIRST(&dev->run_queue);
2409         if (buf == NULL) {
2410                 no_resources = 1;
2411                 goto bailout;
2412         } else if ((dev->write_dev == 0)
2413                 && (dev->flags & (CAMDD_DEV_FLAG_EOF |
2414                                   CAMDD_DEV_FLAG_EOF_SENT))) {
2415                 STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2416                 dev->num_run_queue--;
2417                 buf->status = CAMDD_STATUS_EOF;
2418                 error_count++;
2419                 goto bailout;
2420         }
2421
2422         /*
2423          * If we're writing, we need to go through the source buffer list
2424          * and create an S/G list.
2425          */
2426         if (write_dev != 0) {
2427                 retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
2428                     dev->sector_size, &num_sectors, &double_buf_needed);
2429                 if (retval != 0) {
2430                         no_resources = 1;
2431                         goto bailout;
2432                 }
2433         }
2434
2435         STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2436         dev->num_run_queue--;
2437
2438         data = &buf->buf_type_spec.data;
2439
2440         /*
2441          * pread(2) and pwrite(2) offsets are byte offsets.
2442          */
2443         io_offset = buf->lba * dev->sector_size;
2444
2445         /*
2446          * Unlock the mutex while we read or write.
2447          */
2448         pthread_mutex_unlock(&dev->mutex);
2449
2450         /*
2451          * Note that we don't need to double buffer if we're the reader
2452          * because in that case, we have allocated a single buffer of
2453          * sufficient size to do the read.  This copy is necessary on
2454          * writes because if one of the components of the S/G list is not
2455          * a sector size multiple, the kernel will reject the write.  This
2456          * is unfortunate but not surprising.  So this will make sure that
2457          * we're using a single buffer that is a multiple of the sector size.
2458          */
2459         if ((double_buf_needed != 0)
2460          && (data->sg_count > 1)
2461          && (write_dev != 0)) {
2462                 uint32_t cur_offset;
2463                 int i;
2464
2465                 if (file_dev->tmp_buf == NULL)
2466                         file_dev->tmp_buf = calloc(dev->blocksize, 1);
2467                 if (file_dev->tmp_buf == NULL) {
2468                         buf->status = CAMDD_STATUS_ERROR;
2469                         error_count++;
2470                         pthread_mutex_lock(&dev->mutex);
2471                         goto bailout;
2472                 }
2473                 for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
2474                         bcopy(data->iovec[i].iov_base,
2475                             &file_dev->tmp_buf[cur_offset],
2476                             data->iovec[i].iov_len);
2477                         cur_offset += data->iovec[i].iov_len;
2478                 }
2479                 db_len = cur_offset;
2480         }
2481
2482         if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
2483                 if (write_dev == 0) {
2484                         /*
2485                          * XXX KDM is there any way we would need a S/G
2486                          * list here?
2487                          */
2488                         retval = pread(file_dev->fd, data->buf,
2489                             buf->len, io_offset);
2490                 } else {
2491                         if (double_buf_needed != 0) {
2492                                 retval = pwrite(file_dev->fd, file_dev->tmp_buf,
2493                                     db_len, io_offset);
2494                         } else if (data->sg_count == 0) {
2495                                 retval = pwrite(file_dev->fd, data->buf,
2496                                     data->fill_len, io_offset);
2497                         } else {
2498                                 retval = pwritev(file_dev->fd, data->iovec,
2499                                     data->sg_count, io_offset);
2500                         }
2501                 }
2502         } else {
2503                 if (write_dev == 0) {
2504                         /*
2505                          * XXX KDM is there any way we would need a S/G
2506                          * list here?
2507                          */
2508                         retval = read(file_dev->fd, data->buf, buf->len);
2509                 } else {
2510                         if (double_buf_needed != 0) {
2511                                 retval = write(file_dev->fd, file_dev->tmp_buf,
2512                                     db_len);
2513                         } else if (data->sg_count == 0) {
2514                                 retval = write(file_dev->fd, data->buf,
2515                                     data->fill_len);
2516                         } else {
2517                                 retval = writev(file_dev->fd, data->iovec,
2518                                     data->sg_count);
2519                         }
2520                 }
2521         }
2522
2523         /* We're done, re-acquire the lock */
2524         pthread_mutex_lock(&dev->mutex);
2525
2526         if (retval >= (ssize_t)data->fill_len) {
2527                 /*
2528                  * If the bytes transferred is more than the request size,
2529                  * that indicates an overrun, which should only happen at
2530                  * the end of a transfer if we have to round up to a sector
2531                  * boundary.
2532                  */
2533                 if (buf->status == CAMDD_STATUS_NONE)
2534                         buf->status = CAMDD_STATUS_OK;
2535                 data->resid = 0;
2536                 dev->bytes_transferred += retval;
2537         } else if (retval == -1) {
2538                 warn("Error %s %s", (write_dev) ? "writing to" :
2539                     "reading from", file_dev->filename);
2540
2541                 buf->status = CAMDD_STATUS_ERROR;
2542                 data->resid = data->fill_len;
2543                 error_count++;
2544
2545                 if (dev->debug == 0)
2546                         goto bailout;
2547
2548                 if ((double_buf_needed != 0)
2549                  && (write_dev != 0)) {
2550                         fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
2551                             "offset %ju\n", __func__, file_dev->fd,
2552                             file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
2553                             (uintmax_t)io_offset);
2554                 } else if (data->sg_count == 0) {
2555                         fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
2556                             "offset %ju\n", __func__, file_dev->fd, data->buf,
2557                             data->fill_len, (uintmax_t)buf->lba,
2558                             (uintmax_t)io_offset);
2559                 } else {
2560                         int i;
2561
2562                         fprintf(stderr, "%s: fd %d, len %u, lba %ju "
2563                             "offset %ju\n", __func__, file_dev->fd, 
2564                             data->fill_len, (uintmax_t)buf->lba,
2565                             (uintmax_t)io_offset);
2566
2567                         for (i = 0; i < data->sg_count; i++) {
2568                                 fprintf(stderr, "index %d ptr %p len %zu\n",
2569                                     i, data->iovec[i].iov_base,
2570                                     data->iovec[i].iov_len);
2571                         }
2572                 }
2573         } else if (retval == 0) {
2574                 buf->status = CAMDD_STATUS_EOF;
2575                 if (dev->debug != 0)
2576                         printf("%s: got EOF from %s!\n", __func__,
2577                             file_dev->filename);
2578                 data->resid = data->fill_len;
2579                 error_count++;
2580         } else if (retval < (ssize_t)data->fill_len) {
2581                 if (buf->status == CAMDD_STATUS_NONE)
2582                         buf->status = CAMDD_STATUS_SHORT_IO;
2583                 data->resid = data->fill_len - retval;
2584                 dev->bytes_transferred += retval;
2585         }
2586
2587 bailout:
2588         if (buf != NULL) {
2589                 if (buf->status == CAMDD_STATUS_EOF) {
2590                         struct camdd_buf *buf2;
2591                         dev->flags |= CAMDD_DEV_FLAG_EOF;
2592                         STAILQ_FOREACH(buf2, &dev->run_queue, links)
2593                                 buf2->status = CAMDD_STATUS_EOF;
2594                 }
2595
2596                 camdd_complete_buf(dev, buf, &error_count);
2597         }
2598
2599         if (error_count != 0)
2600                 return (-1);
2601         else if (no_resources != 0)
2602                 return (1);
2603         else
2604                 return (0);
2605 }
2606
2607 /*
2608  * Execute one command from the run queue.  Returns 0 for success, 1 for
2609  * stop processing, and -1 for error.
2610  */
2611 int
2612 camdd_pass_run(struct camdd_dev *dev)
2613 {
2614         struct camdd_buf *buf = NULL;
2615         struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2616         struct camdd_buf_data *data;
2617         uint32_t num_blocks, sectors_used = 0;
2618         union ccb *ccb;
2619         int retval = 0, is_write = dev->write_dev;
2620         int double_buf_needed = 0;
2621
2622         buf = STAILQ_FIRST(&dev->run_queue);
2623         if (buf == NULL) {
2624                 retval = 1;
2625                 goto bailout;
2626         }
2627
2628         /*
2629          * If we're writing, we need to go through the source buffer list
2630          * and create an S/G list.
2631          */
2632         if (is_write != 0) {
2633                 retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
2634                     &sectors_used, &double_buf_needed);
2635                 if (retval != 0) {
2636                         retval = -1;
2637                         goto bailout;
2638                 }
2639         }
2640
2641         STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2642         dev->num_run_queue--;
2643
2644         data = &buf->buf_type_spec.data;
2645
2646         /*
2647          * In almost every case the number of blocks should be the device
2648          * block size.  The exception may be at the end of an I/O stream
2649          * for a partial block or at the end of a device.
2650          */
2651         if (is_write != 0)
2652                 num_blocks = sectors_used;
2653         else
2654                 num_blocks = data->fill_len / pass_dev->block_len;
2655
2656         ccb = &data->ccb;
2657
2658         switch (pass_dev->protocol) {
2659         case PROTO_SCSI:
2660                 CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
2661
2662                 scsi_read_write(&ccb->csio,
2663                                 /*retries*/ dev->retry_count,
2664                                 /*cbfcnp*/ NULL,
2665                                 /*tag_action*/ MSG_SIMPLE_Q_TAG,
2666                                 /*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
2667                                            SCSI_RW_WRITE,
2668                                 /*byte2*/ 0,
2669                                 /*minimum_cmd_size*/ dev->min_cmd_size,
2670                                 /*lba*/ buf->lba,
2671                                 /*block_count*/ num_blocks,
2672                                 /*data_ptr*/ (data->sg_count != 0) ?
2673                                              (uint8_t *)data->segs : data->buf,
2674                                 /*dxfer_len*/ (num_blocks * pass_dev->block_len),
2675                                 /*sense_len*/ SSD_FULL_SIZE,
2676                                 /*timeout*/ dev->io_timeout);
2677
2678                 if (data->sg_count != 0) {
2679                         ccb->csio.sglist_cnt = data->sg_count;
2680                 }
2681                 break;
2682         case PROTO_NVME:
2683                 CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
2684
2685                 nvme_read_write(&ccb->nvmeio,
2686                                 /*retries*/ dev->retry_count,
2687                                 /*cbfcnp*/ NULL,
2688                                 /*nsid*/ pass_dev->dev->target_lun & UINT32_MAX,
2689                                 /*readop*/ dev->write_dev == 0,
2690                                 /*lba*/ buf->lba,
2691                                 /*block_count*/ num_blocks,
2692                                 /*data_ptr*/ (data->sg_count != 0) ?
2693                                              (uint8_t *)data->segs : data->buf,
2694                                 /*dxfer_len*/ (num_blocks * pass_dev->block_len),
2695                                 /*timeout*/ dev->io_timeout);
2696
2697                 ccb->nvmeio.sglist_cnt = data->sg_count;
2698                 break;
2699         default:
2700                 retval = -1;
2701                 goto bailout;
2702         }
2703
2704         /* Disable freezing the device queue */
2705         ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
2706
2707         if (dev->retry_count != 0)
2708                 ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
2709
2710         if (data->sg_count != 0) {
2711                 ccb->ccb_h.flags |= CAM_DATA_SG;
2712         }
2713
2714         /*
2715          * Store a pointer to the buffer in the CCB.  The kernel will
2716          * restore this when we get it back, and we'll use it to identify
2717          * the buffer this CCB came from.
2718          */
2719         ccb->ccb_h.ccb_buf = buf;
2720
2721         /*
2722          * Unlock our mutex in preparation for issuing the ioctl.
2723          */
2724         pthread_mutex_unlock(&dev->mutex);
2725         /*
2726          * Queue the CCB to the pass(4) driver.
2727          */
2728         if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
2729                 pthread_mutex_lock(&dev->mutex);
2730
2731                 warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
2732                      pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
2733                 warn("%s: CCB address is %p", __func__, ccb);
2734                 retval = -1;
2735
2736                 STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2737         } else {
2738                 pthread_mutex_lock(&dev->mutex);
2739
2740                 dev->cur_active_io++;
2741                 STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
2742         }
2743
2744 bailout:
2745         return (retval);
2746 }
2747
2748 int
2749 camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
2750 {
2751         struct camdd_dev_pass *pass_dev;
2752         uint32_t num_blocks;
2753         int retval = 0;
2754
2755         pass_dev = &dev->dev_spec.pass;
2756
2757         *lba = dev->next_io_pos_bytes / dev->sector_size;
2758         *len = dev->blocksize;
2759         num_blocks = *len / dev->sector_size;
2760
2761         /*
2762          * If max_sector is 0, then we have no set limit.  This can happen
2763          * if we're writing to a file in a filesystem, or reading from
2764          * something like /dev/zero.
2765          */
2766         if ((dev->max_sector != 0)
2767          || (dev->sector_io_limit != 0)) {
2768                 uint64_t max_sector;
2769
2770                 if ((dev->max_sector != 0)
2771                  && (dev->sector_io_limit != 0)) 
2772                         max_sector = min(dev->sector_io_limit, dev->max_sector);
2773                 else if (dev->max_sector != 0)
2774                         max_sector = dev->max_sector;
2775                 else
2776                         max_sector = dev->sector_io_limit;
2777
2778
2779                 /*
2780                  * Check to see whether we're starting off past the end of
2781                  * the device.  If so, we need to just send an EOF      
2782                  * notification to the writer.
2783                  */
2784                 if (*lba > max_sector) {
2785                         *len = 0;
2786                         retval = 1;
2787                 } else if (((*lba + num_blocks) > max_sector + 1)
2788                         || ((*lba + num_blocks) < *lba)) {
2789                         /*
2790                          * If we get here (but pass the first check), we
2791                          * can trim the request length down to go to the
2792                          * end of the device.
2793                          */
2794                         num_blocks = (max_sector + 1) - *lba;
2795                         *len = num_blocks * dev->sector_size;
2796                         retval = 1;
2797                 }
2798         }
2799
2800         dev->next_io_pos_bytes += *len;
2801
2802         return (retval);
2803 }
2804
2805 /*
2806  * Returns 0 for success, 1 for EOF detected, and -1 for failure.
2807  */
2808 int
2809 camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
2810 {
2811         struct camdd_buf *buf = NULL;
2812         struct camdd_buf_data *data;
2813         struct camdd_dev_pass *pass_dev;
2814         size_t new_len;
2815         struct camdd_buf_data *rb_data;
2816         int is_write = dev->write_dev;
2817         int eof_flush_needed = 0;
2818         int retval = 0;
2819         int error;
2820
2821         pass_dev = &dev->dev_spec.pass;
2822
2823         /*
2824          * If we've gotten EOF or our partner has, we should not continue
2825          * queueing I/O.  If we're a writer, though, we should continue
2826          * to write any buffers that don't have EOF status.
2827          */
2828         if ((dev->flags & CAMDD_DEV_FLAG_EOF)
2829          || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
2830           && (is_write == 0))) {
2831                 /*
2832                  * Tell the worker thread that we have seen EOF.
2833                  */
2834                 retval = 1;
2835
2836                 /*
2837                  * If we're the writer, send the buffer back with EOF status.
2838                  */
2839                 if (is_write) {
2840                         read_buf->status = CAMDD_STATUS_EOF;
2841                         
2842                         error = camdd_complete_peer_buf(dev, read_buf);
2843                 }
2844                 goto bailout;
2845         }
2846
2847         if (is_write == 0) {
2848                 buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2849                 if (buf == NULL) {
2850                         retval = -1;
2851                         goto bailout;
2852                 }
2853                 data = &buf->buf_type_spec.data;
2854
2855                 retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
2856                 if (retval != 0) {
2857                         buf->status = CAMDD_STATUS_EOF;
2858
2859                         if ((buf->len == 0)
2860                          && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
2861                              CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
2862                                 camdd_release_buf(buf);
2863                                 goto bailout;
2864                         }
2865                         dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
2866                 }
2867
2868                 data->fill_len = buf->len;
2869                 data->src_start_offset = buf->lba * dev->sector_size;
2870
2871                 /*
2872                  * Put this on the run queue.
2873                  */
2874                 STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2875                 dev->num_run_queue++;
2876
2877                 /* We're done. */
2878                 goto bailout;
2879         }
2880
2881         /*
2882          * Check for new EOF status from the reader.
2883          */
2884         if ((read_buf->status == CAMDD_STATUS_EOF)
2885          || (read_buf->status == CAMDD_STATUS_ERROR)) {
2886                 dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2887                 if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
2888                  && (read_buf->len == 0)) {
2889                         camdd_complete_peer_buf(dev, read_buf);
2890                         retval = 1;
2891                         goto bailout;
2892                 } else
2893                         eof_flush_needed = 1;
2894         }
2895
2896         /*
2897          * See if we have a buffer we're composing with pieces from our
2898          * partner thread.
2899          */
2900         buf = STAILQ_FIRST(&dev->pending_queue);
2901         if (buf == NULL) {
2902                 uint64_t lba;
2903                 ssize_t len;
2904
2905                 retval = camdd_get_next_lba_len(dev, &lba, &len);
2906                 if (retval != 0) {
2907                         read_buf->status = CAMDD_STATUS_EOF;
2908
2909                         if (len == 0) {
2910                                 dev->flags |= CAMDD_DEV_FLAG_EOF;
2911                                 error = camdd_complete_peer_buf(dev, read_buf);
2912                                 goto bailout;
2913                         }
2914                 }
2915
2916                 /*
2917                  * If we don't have a pending buffer, we need to grab a new
2918                  * one from the free list or allocate another one.
2919                  */
2920                 buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2921                 if (buf == NULL) {
2922                         retval = 1;
2923                         goto bailout;
2924                 }
2925
2926                 buf->lba = lba;
2927                 buf->len = len;
2928
2929                 STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
2930                 dev->num_pending_queue++;
2931         }
2932
2933         data = &buf->buf_type_spec.data;
2934
2935         rb_data = &read_buf->buf_type_spec.data;
2936
2937         if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
2938          && (dev->debug != 0)) {
2939                 printf("%s: WARNING: reader offset %#jx != expected offset "
2940                     "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
2941                     (uintmax_t)dev->next_peer_pos_bytes);
2942         }
2943         dev->next_peer_pos_bytes = rb_data->src_start_offset +
2944             (rb_data->fill_len - rb_data->resid);
2945
2946         new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
2947         if (new_len < buf->len) {
2948                 /*
2949                  * There are three cases here:
2950                  * 1. We need more data to fill up a block, so we put 
2951                  *    this I/O on the queue and wait for more I/O.
2952                  * 2. We have a pending buffer in the queue that is
2953                  *    smaller than our blocksize, but we got an EOF.  So we
2954                  *    need to go ahead and flush the write out.
2955                  * 3. We got an error.
2956                  */
2957
2958                 /*
2959                  * Increment our fill length.
2960                  */
2961                 data->fill_len += (rb_data->fill_len - rb_data->resid);
2962
2963                 /*
2964                  * Add the new read buffer to the list for writing.
2965                  */
2966                 STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2967
2968                 /* Increment the count */
2969                 buf->src_count++;
2970
2971                 if (eof_flush_needed == 0) {
2972                         /*
2973                          * We need to exit, because we don't have enough
2974                          * data yet.
2975                          */
2976                         goto bailout;
2977                 } else {
2978                         /*
2979                          * Take the buffer off of the pending queue.
2980                          */
2981                         STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2982                                       links);
2983                         dev->num_pending_queue--;
2984
2985                         /*
2986                          * If we need an EOF flush, but there is no data
2987                          * to flush, go ahead and return this buffer.
2988                          */
2989                         if (data->fill_len == 0) {
2990                                 camdd_complete_buf(dev, buf, /*error_count*/0);
2991                                 retval = 1;
2992                                 goto bailout;
2993                         }
2994
2995                         /*
2996                          * Put this on the next queue for execution.
2997                          */
2998                         STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2999                         dev->num_run_queue++;
3000                 }
3001         } else if (new_len == buf->len) {
3002                 /*
3003                  * We have enough data to completey fill one block,
3004                  * so we're ready to issue the I/O.
3005                  */
3006
3007                 /*
3008                  * Take the buffer off of the pending queue.
3009                  */
3010                 STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
3011                 dev->num_pending_queue--;
3012
3013                 /*
3014                  * Add the new read buffer to the list for writing.
3015                  */
3016                 STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
3017
3018                 /* Increment the count */
3019                 buf->src_count++;
3020
3021                 /*
3022                  * Increment our fill length.
3023                  */
3024                 data->fill_len += (rb_data->fill_len - rb_data->resid);
3025
3026                 /*
3027                  * Put this on the next queue for execution.
3028                  */
3029                 STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3030                 dev->num_run_queue++;
3031         } else {
3032                 struct camdd_buf *idb;
3033                 struct camdd_buf_indirect *indirect;
3034                 uint32_t len_to_go, cur_offset;
3035
3036                 
3037                 idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3038                 if (idb == NULL) {
3039                         retval = 1;
3040                         goto bailout;
3041                 }
3042                 indirect = &idb->buf_type_spec.indirect;
3043                 indirect->src_buf = read_buf;
3044                 read_buf->refcount++;
3045                 indirect->offset = 0;
3046                 indirect->start_ptr = rb_data->buf;
3047                 /*
3048                  * We've already established that there is more
3049                  * data in read_buf than we have room for in our
3050                  * current write request.  So this particular chunk
3051                  * of the request should just be the remainder
3052                  * needed to fill up a block.
3053                  */
3054                 indirect->len = buf->len - (data->fill_len - data->resid);
3055
3056                 camdd_buf_add_child(buf, idb);
3057
3058                 /*
3059                  * This buffer is ready to execute, so we can take
3060                  * it off the pending queue and put it on the run
3061                  * queue.
3062                  */
3063                 STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
3064                               links);
3065                 dev->num_pending_queue--;
3066                 STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3067                 dev->num_run_queue++;
3068
3069                 cur_offset = indirect->offset + indirect->len;
3070
3071                 /*
3072                  * The resulting I/O would be too large to fit in
3073                  * one block.  We need to split this I/O into
3074                  * multiple pieces.  Allocate as many buffers as needed.
3075                  */
3076                 for (len_to_go = rb_data->fill_len - rb_data->resid -
3077                      indirect->len; len_to_go > 0;) {
3078                         struct camdd_buf *new_buf;
3079                         struct camdd_buf_data *new_data;
3080                         uint64_t lba;
3081                         ssize_t len;
3082
3083                         retval = camdd_get_next_lba_len(dev, &lba, &len);
3084                         if ((retval != 0)
3085                          && (len == 0)) {
3086                                 /*
3087                                  * The device has already been marked
3088                                  * as EOF, and there is no space left.
3089                                  */
3090                                 goto bailout;
3091                         }
3092
3093                         new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
3094                         if (new_buf == NULL) {
3095                                 retval = 1;
3096                                 goto bailout;
3097                         }
3098
3099                         new_buf->lba = lba;
3100                         new_buf->len = len;
3101
3102                         idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3103                         if (idb == NULL) {
3104                                 retval = 1;
3105                                 goto bailout;
3106                         }
3107
3108                         indirect = &idb->buf_type_spec.indirect;
3109
3110                         indirect->src_buf = read_buf;
3111                         read_buf->refcount++;
3112                         indirect->offset = cur_offset;
3113                         indirect->start_ptr = rb_data->buf + cur_offset;
3114                         indirect->len = min(len_to_go, new_buf->len);
3115 #if 0
3116                         if (((indirect->len % dev->sector_size) != 0)
3117                          || ((indirect->offset % dev->sector_size) != 0)) {
3118                                 warnx("offset %ju len %ju not aligned with "
3119                                     "sector size %u", indirect->offset,
3120                                     (uintmax_t)indirect->len, dev->sector_size);
3121                         }
3122 #endif
3123                         cur_offset += indirect->len;
3124                         len_to_go -= indirect->len;
3125
3126                         camdd_buf_add_child(new_buf, idb);
3127
3128                         new_data = &new_buf->buf_type_spec.data;
3129
3130                         if ((new_data->fill_len == new_buf->len)
3131                          || (eof_flush_needed != 0)) {
3132                                 STAILQ_INSERT_TAIL(&dev->run_queue,
3133                                                    new_buf, links);
3134                                 dev->num_run_queue++;
3135                         } else if (new_data->fill_len < buf->len) {
3136                                 STAILQ_INSERT_TAIL(&dev->pending_queue,
3137                                                 new_buf, links);
3138                                 dev->num_pending_queue++;
3139                         } else {
3140                                 warnx("%s: too much data in new "
3141                                       "buffer!", __func__);
3142                                 retval = 1;
3143                                 goto bailout;
3144                         }
3145                 }
3146         }
3147
3148 bailout:
3149         return (retval);
3150 }
3151
3152 void
3153 camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
3154                 uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
3155 {
3156         *our_depth = dev->cur_active_io + dev->num_run_queue;
3157         if (dev->num_peer_work_queue >
3158             dev->num_peer_done_queue)
3159                 *peer_depth = dev->num_peer_work_queue -
3160                               dev->num_peer_done_queue;
3161         else
3162                 *peer_depth = 0;
3163         *our_bytes = *our_depth * dev->blocksize;
3164         *peer_bytes = dev->peer_bytes_queued;
3165 }
3166
3167 void
3168 camdd_sig_handler(int sig)
3169 {
3170         if (sig == SIGINFO)
3171                 need_status = 1;
3172         else {
3173                 need_exit = 1;
3174                 error_exit = 1;
3175         }
3176
3177         sem_post(&camdd_sem);
3178 }
3179
3180 void
3181 camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev, 
3182                    struct timespec *start_time)
3183 {
3184         struct timespec done_time;
3185         uint64_t total_ns;
3186         long double mb_sec, total_sec;
3187         int error = 0;
3188
3189         error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
3190         if (error != 0) {
3191                 warn("Unable to get done time");
3192                 return;
3193         }
3194
3195         timespecsub(&done_time, start_time, &done_time);
3196         
3197         total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
3198         total_sec = total_ns;
3199         total_sec /= 1000000000;
3200
3201         fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
3202                 "%.4Lf seconds elapsed\n",
3203                 (uintmax_t)camdd_dev->bytes_transferred,
3204                 (camdd_dev->write_dev == 0) ?  "read from" : "written to",
3205                 camdd_dev->device_name,
3206                 (uintmax_t)other_dev->bytes_transferred,
3207                 (other_dev->write_dev == 0) ? "read from" : "written to",
3208                 other_dev->device_name, total_sec);
3209
3210         mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
3211         mb_sec /= 1024 * 1024;
3212         mb_sec *= 1000000000;
3213         mb_sec /= total_ns;
3214         fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
3215 }
3216
3217 int
3218 camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io,
3219          int retry_count, int timeout)
3220 {
3221         struct cam_device *new_cam_dev = NULL;
3222         struct camdd_dev *devs[2];
3223         struct timespec start_time;
3224         pthread_t threads[2];
3225         int unit = 0;
3226         int error = 0;
3227         int i;
3228
3229         bzero(devs, sizeof(devs));
3230
3231         if (num_io_opts != 2) {
3232                 warnx("Must have one input and one output path");
3233                 error = 1;
3234                 goto bailout;
3235         }
3236
3237         for (i = 0; i < num_io_opts; i++) {
3238                 switch (io_opts[i].dev_type) {
3239                 case CAMDD_DEV_PASS: {
3240                         if (isdigit(io_opts[i].dev_name[0])) {
3241                                 camdd_argmask new_arglist = CAMDD_ARG_NONE;
3242                                 int bus = 0, target = 0, lun = 0;
3243                                 int rv;
3244
3245                                 /* device specified as bus:target[:lun] */
3246                                 rv = parse_btl(io_opts[i].dev_name, &bus,
3247                                     &target, &lun, &new_arglist);
3248                                 if (rv < 2) {
3249                                         warnx("numeric device specification "
3250                                              "must be either bus:target, or "
3251                                              "bus:target:lun");
3252                                         error = 1;
3253                                         goto bailout;
3254                                 }
3255                                 /* default to 0 if lun was not specified */
3256                                 if ((new_arglist & CAMDD_ARG_LUN) == 0) {
3257                                         lun = 0;
3258                                         new_arglist |= CAMDD_ARG_LUN;
3259                                 }
3260                                 new_cam_dev = cam_open_btl(bus, target, lun,
3261                                     O_RDWR, NULL);
3262                         } else {
3263                                 char name[30];
3264
3265                                 if (cam_get_device(io_opts[i].dev_name, name,
3266                                                    sizeof name, &unit) == -1) {
3267                                         warnx("%s", cam_errbuf);
3268                                         error = 1;
3269                                         goto bailout;
3270                                 }
3271                                 new_cam_dev = cam_open_spec_device(name, unit,
3272                                     O_RDWR, NULL);
3273                         }
3274
3275                         if (new_cam_dev == NULL) {
3276                                 warnx("%s", cam_errbuf);
3277                                 error = 1;
3278                                 goto bailout;
3279                         }
3280
3281                         devs[i] = camdd_probe_pass(new_cam_dev,
3282                             /*io_opts*/ &io_opts[i],
3283                             CAMDD_ARG_ERR_RECOVER, 
3284                             /*probe_retry_count*/ 3,
3285                             /*probe_timeout*/ 5000,
3286                             /*io_retry_count*/ retry_count,
3287                             /*io_timeout*/ timeout);
3288                         if (devs[i] == NULL) {
3289                                 warn("Unable to probe device %s%u",
3290                                      new_cam_dev->device_name,
3291                                      new_cam_dev->dev_unit_num);
3292                                 error = 1;
3293                                 goto bailout;
3294                         }
3295                         break;
3296                 }
3297                 case CAMDD_DEV_FILE: {
3298                         int fd = -1;
3299
3300                         if (io_opts[i].dev_name[0] == '-') {
3301                                 if (io_opts[i].write_dev != 0)
3302                                         fd = STDOUT_FILENO;
3303                                 else
3304                                         fd = STDIN_FILENO;
3305                         } else {
3306                                 if (io_opts[i].write_dev != 0) {
3307                                         fd = open(io_opts[i].dev_name,
3308                                             O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
3309                                 } else {
3310                                         fd = open(io_opts[i].dev_name,
3311                                             O_RDONLY);
3312                                 }
3313                         }
3314                         if (fd == -1) {
3315                                 warn("error opening file %s",
3316                                     io_opts[i].dev_name);
3317                                 error = 1;
3318                                 goto bailout;
3319                         }
3320
3321                         devs[i] = camdd_probe_file(fd, &io_opts[i],
3322                             retry_count, timeout);
3323                         if (devs[i] == NULL) {
3324                                 error = 1;
3325                                 goto bailout;
3326                         }
3327
3328                         break;
3329                 }
3330                 default:
3331                         warnx("Unknown device type %d (%s)",
3332                             io_opts[i].dev_type, io_opts[i].dev_name);
3333                         error = 1;
3334                         goto bailout;
3335                         break; /*NOTREACHED */
3336                 }
3337
3338                 devs[i]->write_dev = io_opts[i].write_dev;
3339
3340                 devs[i]->start_offset_bytes = io_opts[i].offset;
3341
3342                 if (max_io != 0) {
3343                         devs[i]->sector_io_limit =
3344                             (devs[i]->start_offset_bytes /
3345                             devs[i]->sector_size) +
3346                             (max_io / devs[i]->sector_size) - 1;
3347                 }
3348
3349                 devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
3350                 devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
3351         }
3352
3353         devs[0]->peer_dev = devs[1];
3354         devs[1]->peer_dev = devs[0];
3355         devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
3356         devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
3357
3358         sem_init(&camdd_sem, /*pshared*/ 0, 0);
3359
3360         signal(SIGINFO, camdd_sig_handler);
3361         signal(SIGINT, camdd_sig_handler);
3362
3363         error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
3364         if (error != 0) {
3365                 warn("Unable to get start time");
3366                 goto bailout;
3367         }
3368
3369         for (i = 0; i < num_io_opts; i++) {
3370                 error = pthread_create(&threads[i], NULL, camdd_worker,
3371                                        (void *)devs[i]);
3372                 if (error != 0) {
3373                         warnc(error, "pthread_create() failed");
3374                         goto bailout;
3375                 }
3376         }
3377
3378         for (;;) {
3379                 if ((sem_wait(&camdd_sem) == -1)
3380                  || (need_exit != 0)) {
3381                         struct kevent ke;
3382
3383                         for (i = 0; i < num_io_opts; i++) {
3384                                 EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
3385                                     EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
3386
3387                                 devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
3388
3389                                 error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
3390                                                 NULL);
3391                                 if (error == -1)
3392                                         warn("%s: unable to wake up thread",
3393                                             __func__);
3394                                 error = 0;
3395                         }
3396                         break;
3397                 } else if (need_status != 0) {
3398                         camdd_print_status(devs[0], devs[1], &start_time);
3399                         need_status = 0;
3400                 }
3401         } 
3402         for (i = 0; i < num_io_opts; i++) {
3403                 pthread_join(threads[i], NULL);
3404         }
3405
3406         camdd_print_status(devs[0], devs[1], &start_time);
3407
3408 bailout:
3409
3410         for (i = 0; i < num_io_opts; i++)
3411                 camdd_free_dev(devs[i]);
3412
3413         return (error + error_exit);
3414 }
3415
3416 void
3417 usage(void)
3418 {
3419         fprintf(stderr,
3420 "usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
3421 "              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
3422 "              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
3423 "              <-i|-o file=/dev/nsa0,bs=512K>\n"
3424 "              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
3425 "Option description\n"
3426 "-i <arg=val>  Specify input device/file and parameters\n"
3427 "-o <arg=val>  Specify output device/file and parameters\n"
3428 "Input and Output parameters\n"
3429 "pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
3430 "file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
3431 "              or - for stdin/stdout\n"
3432 "bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
3433 "offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
3434 "              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
3435 "depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
3436 "mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
3437 "Optional arguments\n"
3438 "-C retry_cnt  Specify a retry count for pass(4) devices\n"
3439 "-E            Enable CAM error recovery for pass(4) devices\n"
3440 "-m max_io     Specify the maximum amount to be transferred in bytes or\n"
3441 "              using K, G, M, etc. suffixes\n"
3442 "-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
3443 "-v            Enable verbose error recovery\n"
3444 "-h            Print this message\n");
3445 }
3446
3447
3448 int
3449 camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
3450 {
3451         char *tmpstr, *tmpstr2;
3452         char *orig_tmpstr = NULL;
3453         int retval = 0;
3454
3455         io_opts->write_dev = is_write;
3456
3457         tmpstr = strdup(args);
3458         if (tmpstr == NULL) {
3459                 warn("strdup failed");
3460                 retval = 1;
3461                 goto bailout;
3462         }
3463         orig_tmpstr = tmpstr;
3464         while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
3465                 char *name, *value;
3466
3467                 /*
3468                  * If the user creates an empty parameter by putting in two
3469                  * commas, skip over it and look for the next field.
3470                  */
3471                 if (*tmpstr2 == '\0')
3472                         continue;
3473
3474                 name = strsep(&tmpstr2, "=");
3475                 if (*name == '\0') {
3476                         warnx("Got empty I/O parameter name");
3477                         retval = 1;
3478                         goto bailout;
3479                 }
3480                 value = strsep(&tmpstr2, "=");
3481                 if ((value == NULL)
3482                  || (*value == '\0')) {
3483                         warnx("Empty I/O parameter value for %s", name);
3484                         retval = 1;
3485                         goto bailout;
3486                 }
3487                 if (strncasecmp(name, "file", 4) == 0) {
3488                         io_opts->dev_type = CAMDD_DEV_FILE;
3489                         io_opts->dev_name = strdup(value);
3490                         if (io_opts->dev_name == NULL) {
3491                                 warn("Error allocating memory");
3492                                 retval = 1;
3493                                 goto bailout;
3494                         }
3495                 } else if (strncasecmp(name, "pass", 4) == 0) {
3496                         io_opts->dev_type = CAMDD_DEV_PASS;
3497                         io_opts->dev_name = strdup(value);
3498                         if (io_opts->dev_name == NULL) {
3499                                 warn("Error allocating memory");
3500                                 retval = 1;
3501                                 goto bailout;
3502                         }
3503                 } else if ((strncasecmp(name, "bs", 2) == 0)
3504                         || (strncasecmp(name, "blocksize", 9) == 0)) {
3505                         retval = expand_number(value, &io_opts->blocksize);
3506                         if (retval == -1) {
3507                                 warn("expand_number(3) failed on %s=%s", name,
3508                                     value);
3509                                 retval = 1;
3510                                 goto bailout;
3511                         }
3512                 } else if (strncasecmp(name, "depth", 5) == 0) {
3513                         char *endptr;
3514
3515                         io_opts->queue_depth = strtoull(value, &endptr, 0);
3516                         if (*endptr != '\0') {
3517                                 warnx("invalid queue depth %s", value);
3518                                 retval = 1;
3519                                 goto bailout;
3520                         }
3521                 } else if (strncasecmp(name, "mcs", 3) == 0) {
3522                         char *endptr;
3523
3524                         io_opts->min_cmd_size = strtol(value, &endptr, 0);
3525                         if ((*endptr != '\0')
3526                          || ((io_opts->min_cmd_size > 16)
3527                           || (io_opts->min_cmd_size < 0))) {
3528                                 warnx("invalid minimum cmd size %s", value);
3529                                 retval = 1;
3530                                 goto bailout;
3531                         }
3532                 } else if (strncasecmp(name, "offset", 6) == 0) {
3533                         retval = expand_number(value, &io_opts->offset);
3534                         if (retval == -1) {
3535                                 warn("expand_number(3) failed on %s=%s", name,
3536                                     value);
3537                                 retval = 1;
3538                                 goto bailout;
3539                         }
3540                 } else if (strncasecmp(name, "debug", 5) == 0) {
3541                         char *endptr;
3542
3543                         io_opts->debug = strtoull(value, &endptr, 0);
3544                         if (*endptr != '\0') {
3545                                 warnx("invalid debug level %s", value);
3546                                 retval = 1;
3547                                 goto bailout;
3548                         }
3549                 } else {
3550                         warnx("Unrecognized parameter %s=%s", name, value);
3551                 }
3552         }
3553 bailout:
3554         free(orig_tmpstr);
3555
3556         return (retval);
3557 }
3558
3559 int
3560 main(int argc, char **argv)
3561 {
3562         int c;
3563         camdd_argmask arglist = CAMDD_ARG_NONE;
3564         int timeout = 0, retry_count = 1;
3565         int error = 0;
3566         uint64_t max_io = 0;
3567         struct camdd_io_opts *opt_list = NULL;
3568
3569         if (argc == 1) {
3570                 usage();
3571                 exit(1);
3572         }
3573
3574         opt_list = calloc(2, sizeof(struct camdd_io_opts));
3575         if (opt_list == NULL) {
3576                 warn("Unable to allocate option list");
3577                 error = 1;
3578                 goto bailout;
3579         }
3580
3581         while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
3582                 switch (c) {
3583                 case 'C':
3584                         retry_count = strtol(optarg, NULL, 0);
3585                         if (retry_count < 0)
3586                                 errx(1, "retry count %d is < 0",
3587                                      retry_count);
3588                         arglist |= CAMDD_ARG_RETRIES;
3589                         break;
3590                 case 'E':
3591                         arglist |= CAMDD_ARG_ERR_RECOVER;
3592                         break;
3593                 case 'i':
3594                 case 'o':
3595                         if (((c == 'i')
3596                           && (opt_list[0].dev_type != CAMDD_DEV_NONE))
3597                          || ((c == 'o')
3598                           && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
3599                                 errx(1, "Only one input and output path "
3600                                     "allowed");
3601                         }
3602                         error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
3603                             (c == 'o') ? &opt_list[1] : &opt_list[0]);
3604                         if (error != 0)
3605                                 goto bailout;
3606                         break;
3607                 case 'm':
3608                         error = expand_number(optarg, &max_io);
3609                         if (error == -1) {
3610                                 warn("invalid maximum I/O amount %s", optarg);
3611                                 error = 1;
3612                                 goto bailout;
3613                         }
3614                         break;
3615                 case 't':
3616                         timeout = strtol(optarg, NULL, 0);
3617                         if (timeout < 0)
3618                                 errx(1, "invalid timeout %d", timeout);
3619                         /* Convert the timeout from seconds to ms */
3620                         timeout *= 1000;
3621                         arglist |= CAMDD_ARG_TIMEOUT;
3622                         break;
3623                 case 'v':
3624                         arglist |= CAMDD_ARG_VERBOSE;
3625                         break;
3626                 case 'h':
3627                 default:
3628                         usage();
3629                         exit(1);
3630                         break; /*NOTREACHED*/
3631                 }
3632         }
3633
3634         if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
3635          || (opt_list[1].dev_type == CAMDD_DEV_NONE))
3636                 errx(1, "Must specify both -i and -o");
3637
3638         /*
3639          * Set the timeout if the user hasn't specified one.
3640          */
3641         if (timeout == 0)
3642                 timeout = CAMDD_PASS_RW_TIMEOUT;
3643
3644         error = camdd_rw(opt_list, 2, max_io, retry_count, timeout);
3645
3646 bailout:
3647         free(opt_list);
3648
3649         exit(error);
3650 }