]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/camdd/camdd.c
sqlite3: Vendor import of sqlite3 3.44.0
[FreeBSD/FreeBSD.git] / usr.sbin / camdd / camdd.c
1 /*-
2  * Copyright (c) 1997-2007 Kenneth D. Merry
3  * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions, and the following disclaimer,
11  *    without modification.
12  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13  *    substantially similar to the "NO WARRANTY" disclaimer below
14  *    ("Disclaimer") and any redistribution must be conditioned upon
15  *    including a substantially similar Disclaimer requirement for further
16  *    binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGES.
30  *
31  * Authors: Ken Merry           (Spectra Logic Corporation)
32  */
33
34 /*
35  * This is eventually intended to be:
36  * - A basic data transfer/copy utility
37  * - A simple benchmark utility
38  * - An example of how to use the asynchronous pass(4) driver interface.
39  */
40 #include <sys/cdefs.h>
41 #include <sys/ioctl.h>
42 #include <sys/stdint.h>
43 #include <sys/types.h>
44 #include <sys/endian.h>
45 #include <sys/param.h>
46 #include <sys/sbuf.h>
47 #include <sys/stat.h>
48 #include <sys/event.h>
49 #include <sys/time.h>
50 #include <sys/uio.h>
51 #include <vm/vm.h>
52 #include <sys/bus.h>
53 #include <sys/bus_dma.h>
54 #include <sys/mtio.h>
55 #include <sys/conf.h>
56 #include <sys/disk.h>
57
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <semaphore.h>
61 #include <string.h>
62 #include <unistd.h>
63 #include <inttypes.h>
64 #include <limits.h>
65 #include <fcntl.h>
66 #include <ctype.h>
67 #include <err.h>
68 #include <libutil.h>
69 #include <pthread.h>
70 #include <assert.h>
71 #include <bsdxml.h>
72
73 #include <cam/cam.h>
74 #include <cam/cam_debug.h>
75 #include <cam/cam_ccb.h>
76 #include <cam/scsi/scsi_all.h>
77 #include <cam/scsi/scsi_da.h>
78 #include <cam/scsi/scsi_pass.h>
79 #include <cam/scsi/scsi_message.h>
80 #include <cam/scsi/smp_all.h>
81 #include <cam/nvme/nvme_all.h>
82 #include <camlib.h>
83 #include <mtlib.h>
84 #include <zlib.h>
85
86 typedef enum {
87         CAMDD_CMD_NONE          = 0x00000000,
88         CAMDD_CMD_HELP          = 0x00000001,
89         CAMDD_CMD_WRITE         = 0x00000002,
90         CAMDD_CMD_READ          = 0x00000003
91 } camdd_cmdmask;
92
93 typedef enum {
94         CAMDD_ARG_NONE          = 0x00000000,
95         CAMDD_ARG_VERBOSE       = 0x00000001,
96         CAMDD_ARG_ERR_RECOVER   = 0x00000080,
97 } camdd_argmask;
98
99 typedef enum {
100         CAMDD_DEV_NONE          = 0x00,
101         CAMDD_DEV_PASS          = 0x01,
102         CAMDD_DEV_FILE          = 0x02
103 } camdd_dev_type;
104
105 struct camdd_io_opts {
106         camdd_dev_type  dev_type;
107         char            *dev_name;
108         uint64_t        blocksize;
109         uint64_t        queue_depth;
110         uint64_t        offset;
111         int             min_cmd_size;
112         int             write_dev;
113         uint64_t        debug;
114 };
115
116 typedef enum {
117         CAMDD_BUF_NONE,
118         CAMDD_BUF_DATA,
119         CAMDD_BUF_INDIRECT
120 } camdd_buf_type;
121
122 struct camdd_buf_indirect {
123         /*
124          * Pointer to the source buffer.
125          */
126         struct camdd_buf *src_buf;
127
128         /*
129          * Offset into the source buffer, in bytes.
130          */
131         uint64_t          offset;
132         /*
133          * Pointer to the starting point in the source buffer.
134          */
135         uint8_t          *start_ptr;
136
137         /*
138          * Length of this chunk in bytes.
139          */
140         size_t            len;
141 };
142
143 struct camdd_buf_data {
144         /*
145          * Buffer allocated when we allocate this camdd_buf.  This should
146          * be the size of the blocksize for this device.
147          */
148         uint8_t                 *buf;
149
150         /*
151          * The amount of backing store allocated in buf.  Generally this
152          * will be the blocksize of the device.
153          */
154         uint32_t                 alloc_len;
155
156         /*
157          * The amount of data that was put into the buffer (on reads) or
158          * the amount of data we have put onto the src_list so far (on
159          * writes).
160          */
161         uint32_t                 fill_len;
162
163         /*
164          * The amount of data that was not transferred.
165          */
166         uint32_t                 resid;
167
168         /*
169          * Starting byte offset on the reader.
170          */
171         uint64_t                 src_start_offset;
172         
173         /*
174          * CCB used for pass(4) device targets.
175          */
176         union ccb                ccb;
177
178         /*
179          * Number of scatter/gather segments.
180          */
181         int                      sg_count;
182
183         /*
184          * Set if we had to tack on an extra buffer to round the transfer
185          * up to a sector size.
186          */
187         int                      extra_buf;
188
189         /*
190          * Scatter/gather list used generally when we're the writer for a
191          * pass(4) device. 
192          */
193         bus_dma_segment_t       *segs;
194
195         /*
196          * Scatter/gather list used generally when we're the writer for a
197          * file or block device;
198          */
199         struct iovec            *iovec;
200 };
201
202 union camdd_buf_types {
203         struct camdd_buf_indirect       indirect;
204         struct camdd_buf_data           data;
205 };
206
207 typedef enum {
208         CAMDD_STATUS_NONE,
209         CAMDD_STATUS_OK,
210         CAMDD_STATUS_SHORT_IO,
211         CAMDD_STATUS_EOF,
212         CAMDD_STATUS_ERROR
213 } camdd_buf_status;
214
215 struct camdd_buf {
216         camdd_buf_type           buf_type;
217         union camdd_buf_types    buf_type_spec;
218
219         camdd_buf_status         status;
220
221         uint64_t                 lba;
222         size_t                   len;
223
224         /*
225          * A reference count of how many indirect buffers point to this
226          * buffer.
227          */
228         int                      refcount;
229
230         /*
231          * A link back to our parent device.
232          */
233         struct camdd_dev        *dev;
234         STAILQ_ENTRY(camdd_buf)  links;
235         STAILQ_ENTRY(camdd_buf)  work_links;
236
237         /*
238          * A count of the buffers on the src_list.
239          */
240         int                      src_count;
241
242         /*
243          * List of buffers from our partner thread that are the components
244          * of this buffer for the I/O.  Uses src_links.
245          */
246         STAILQ_HEAD(,camdd_buf)  src_list;
247         STAILQ_ENTRY(camdd_buf)  src_links;
248 };
249
250 #define NUM_DEV_TYPES   2
251
252 struct camdd_dev_pass {
253         int                      scsi_dev_type;
254         int                      protocol;
255         struct cam_device       *dev;
256         uint64_t                 max_sector;
257         uint32_t                 block_len;
258         uint32_t                 cpi_maxio;
259 };
260
261 typedef enum {
262         CAMDD_FILE_NONE,
263         CAMDD_FILE_REG,
264         CAMDD_FILE_STD,
265         CAMDD_FILE_PIPE,
266         CAMDD_FILE_DISK,
267         CAMDD_FILE_TAPE,
268         CAMDD_FILE_TTY,
269         CAMDD_FILE_MEM
270 } camdd_file_type;
271
272 typedef enum {
273         CAMDD_FF_NONE           = 0x00,
274         CAMDD_FF_CAN_SEEK       = 0x01
275 } camdd_file_flags;
276
277 struct camdd_dev_file {
278         int                      fd;
279         struct stat              sb;
280         char                     filename[MAXPATHLEN + 1];
281         camdd_file_type          file_type;
282         camdd_file_flags         file_flags;
283         uint8_t                 *tmp_buf;
284 };
285
286 struct camdd_dev_block {
287         int                      fd;
288         uint64_t                 size_bytes;
289         uint32_t                 block_len;
290 };
291
292 union camdd_dev_spec {
293         struct camdd_dev_pass   pass;
294         struct camdd_dev_file   file;
295         struct camdd_dev_block  block;
296 };
297
298 typedef enum {
299         CAMDD_DEV_FLAG_NONE             = 0x00,
300         CAMDD_DEV_FLAG_EOF              = 0x01,
301         CAMDD_DEV_FLAG_PEER_EOF         = 0x02,
302         CAMDD_DEV_FLAG_ACTIVE           = 0x04,
303         CAMDD_DEV_FLAG_EOF_SENT         = 0x08,
304         CAMDD_DEV_FLAG_EOF_QUEUED       = 0x10
305 } camdd_dev_flags;
306
307 struct camdd_dev {
308         camdd_dev_type           dev_type;
309         union camdd_dev_spec     dev_spec;
310         camdd_dev_flags          flags;
311         char                     device_name[MAXPATHLEN+1];
312         uint32_t                 blocksize;
313         uint32_t                 sector_size;
314         uint64_t                 max_sector;
315         uint64_t                 sector_io_limit;
316         int                      min_cmd_size;
317         int                      write_dev;
318         int                      retry_count;
319         int                      io_timeout;
320         int                      debug;
321         uint64_t                 start_offset_bytes;
322         uint64_t                 next_io_pos_bytes;
323         uint64_t                 next_peer_pos_bytes;
324         uint64_t                 next_completion_pos_bytes;
325         uint64_t                 peer_bytes_queued;
326         uint64_t                 bytes_transferred;
327         uint32_t                 target_queue_depth;
328         uint32_t                 cur_active_io;
329         uint8_t                 *extra_buf;
330         uint32_t                 extra_buf_len;
331         struct camdd_dev        *peer_dev;
332         pthread_mutex_t          mutex;
333         pthread_cond_t           cond;
334         int                      kq;
335
336         int                      (*run)(struct camdd_dev *dev);
337         int                      (*fetch)(struct camdd_dev *dev);
338
339         /*
340          * Buffers that are available for I/O.  Uses links.
341          */
342         STAILQ_HEAD(,camdd_buf)  free_queue;
343
344         /*
345          * Free indirect buffers.  These are used for breaking a large
346          * buffer into multiple pieces.
347          */
348         STAILQ_HEAD(,camdd_buf)  free_indirect_queue;
349
350         /*
351          * Buffers that have been queued to the kernel.  Uses links.
352          */
353         STAILQ_HEAD(,camdd_buf)  active_queue;
354
355         /*
356          * Will generally contain one of our buffers that is waiting for enough
357          * I/O from our partner thread to be able to execute.  This will
358          * generally happen when our per-I/O-size is larger than the
359          * partner thread's per-I/O-size.  Uses links.
360          */
361         STAILQ_HEAD(,camdd_buf)  pending_queue;
362
363         /*
364          * Number of buffers on the pending queue
365          */
366         int                      num_pending_queue;
367
368         /*
369          * Buffers that are filled and ready to execute.  This is used when
370          * our partner (reader) thread sends us blocks that are larger than
371          * our blocksize, and so we have to split them into multiple pieces.
372          */
373         STAILQ_HEAD(,camdd_buf)  run_queue;
374
375         /*
376          * Number of buffers on the run queue.
377          */
378         int                      num_run_queue;
379
380         STAILQ_HEAD(,camdd_buf)  reorder_queue;
381
382         int                      num_reorder_queue;
383
384         /*
385          * Buffers that have been queued to us by our partner thread
386          * (generally the reader thread) to be written out.  Uses
387          * work_links.
388          */
389         STAILQ_HEAD(,camdd_buf)  work_queue;
390
391         /*
392          * Buffers that have been completed by our partner thread.  Uses
393          * work_links.
394          */
395         STAILQ_HEAD(,camdd_buf)  peer_done_queue;
396
397         /*
398          * Number of buffers on the peer done queue.
399          */
400         uint32_t                 num_peer_done_queue;
401
402         /*
403          * A list of buffers that we have queued to our peer thread.  Uses
404          * links.
405          */
406         STAILQ_HEAD(,camdd_buf)  peer_work_queue;
407
408         /*
409          * Number of buffers on the peer work queue.
410          */
411         uint32_t                 num_peer_work_queue;
412 };
413
414 static sem_t camdd_sem;
415 static sig_atomic_t need_exit = 0;
416 static sig_atomic_t error_exit = 0;
417 static sig_atomic_t need_status = 0;
418
419 #ifndef min
420 #define min(a, b) (a < b) ? a : b
421 #endif
422
423
424 /* Generically useful offsets into the peripheral private area */
425 #define ppriv_ptr0 periph_priv.entries[0].ptr
426 #define ppriv_ptr1 periph_priv.entries[1].ptr
427 #define ppriv_field0 periph_priv.entries[0].field
428 #define ppriv_field1 periph_priv.entries[1].field
429
430 #define ccb_buf ppriv_ptr0
431
432 #define CAMDD_FILE_DEFAULT_BLOCK        524288
433 #define CAMDD_FILE_DEFAULT_DEPTH        1
434 #define CAMDD_PASS_MAX_BLOCK            1048576
435 #define CAMDD_PASS_DEFAULT_DEPTH        6
436 #define CAMDD_PASS_RW_TIMEOUT           60 * 1000
437
438 static int parse_btl(char *tstr, int *bus, int *target, int *lun);
439 void camdd_free_dev(struct camdd_dev *dev);
440 struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
441                                   struct kevent *new_ke, int num_ke,
442                                   int retry_count, int timeout);
443 static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
444                                          camdd_buf_type buf_type);
445 void camdd_release_buf(struct camdd_buf *buf);
446 struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
447 int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
448                         uint32_t sector_size, uint32_t *num_sectors_used,
449                         int *double_buf_needed);
450 uint32_t camdd_buf_get_len(struct camdd_buf *buf);
451 void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
452 int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
453                      uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
454 int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
455          camdd_argmask arglist, int probe_retry_count,
456          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
457 int camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
458          camdd_argmask arglist, int probe_retry_count,
459          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
460 struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
461                                    int retry_count, int timeout);
462 struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
463                                    struct camdd_io_opts *io_opts,
464                                    camdd_argmask arglist, int probe_retry_count,
465                                    int probe_timeout, int io_retry_count,
466                                    int io_timeout);
467 void nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
468                 void (*cbfcnp)(struct cam_periph *, union ccb *),
469                 uint32_t nsid, int readop, uint64_t lba,
470                 uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
471                 uint32_t timeout);
472 void *camdd_file_worker(void *arg);
473 camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol);
474 int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd);
475 int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
476 int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
477 void camdd_peer_done(struct camdd_buf *buf);
478 void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
479                         int *error_count);
480 int camdd_pass_fetch(struct camdd_dev *dev);
481 int camdd_file_run(struct camdd_dev *dev);
482 int camdd_pass_run(struct camdd_dev *dev);
483 int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
484 int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
485 void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
486                      uint32_t *peer_depth, uint32_t *our_bytes,
487                      uint32_t *peer_bytes);
488 void *camdd_worker(void *arg);
489 void camdd_sig_handler(int sig);
490 void camdd_print_status(struct camdd_dev *camdd_dev,
491                         struct camdd_dev *other_dev,
492                         struct timespec *start_time);
493 int camdd_rw(struct camdd_io_opts *io_opts, camdd_argmask arglist,
494              int num_io_opts, uint64_t max_io, int retry_count, int timeout);
495 int camdd_parse_io_opts(char *args, int is_write,
496                         struct camdd_io_opts *io_opts);
497 void usage(void);
498
499 /*
500  * Parse out a bus, or a bus, target and lun in the following
501  * format:
502  * bus
503  * bus:target
504  * bus:target:lun
505  *
506  * Returns the number of parsed components, or 0.
507  */
508 static int
509 parse_btl(char *tstr, int *bus, int *target, int *lun)
510 {
511         char *tmpstr;
512         int convs = 0;
513
514         while (isspace(*tstr) && (*tstr != '\0'))
515                 tstr++;
516
517         tmpstr = (char *)strtok(tstr, ":");
518         if ((tmpstr != NULL) && (*tmpstr != '\0')) {
519                 *bus = strtol(tmpstr, NULL, 0);
520                 convs++;
521                 tmpstr = (char *)strtok(NULL, ":");
522                 if ((tmpstr != NULL) && (*tmpstr != '\0')) {
523                         *target = strtol(tmpstr, NULL, 0);
524                         convs++;
525                         tmpstr = (char *)strtok(NULL, ":");
526                         if ((tmpstr != NULL) && (*tmpstr != '\0')) {
527                                 *lun = strtol(tmpstr, NULL, 0);
528                                 convs++;
529                         }
530                 }
531         }
532
533         return convs;
534 }
535
536 /*
537  * XXX KDM clean up and free all of the buffers on the queue!
538  */
539 void
540 camdd_free_dev(struct camdd_dev *dev)
541 {
542         if (dev == NULL)
543                 return;
544
545         switch (dev->dev_type) {
546         case CAMDD_DEV_FILE: {
547                 struct camdd_dev_file *file_dev = &dev->dev_spec.file;
548
549                 if (file_dev->fd != -1)
550                         close(file_dev->fd);
551                 free(file_dev->tmp_buf);
552                 break;
553         }
554         case CAMDD_DEV_PASS: {
555                 struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
556
557                 if (pass_dev->dev != NULL)
558                         cam_close_device(pass_dev->dev);
559                 break;
560         }
561         default:
562                 break;
563         }
564
565         free(dev);
566 }
567
568 struct camdd_dev *
569 camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
570                 int retry_count, int timeout)
571 {
572         struct camdd_dev *dev = NULL;
573         struct kevent *ke;
574         size_t ke_size;
575         int retval = 0;
576
577         dev = calloc(1, sizeof(*dev));
578         if (dev == NULL) {
579                 warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
580                 goto bailout;
581         }
582
583         dev->dev_type = dev_type;
584         dev->io_timeout = timeout;
585         dev->retry_count = retry_count;
586         STAILQ_INIT(&dev->free_queue);
587         STAILQ_INIT(&dev->free_indirect_queue);
588         STAILQ_INIT(&dev->active_queue);
589         STAILQ_INIT(&dev->pending_queue);
590         STAILQ_INIT(&dev->run_queue);
591         STAILQ_INIT(&dev->reorder_queue);
592         STAILQ_INIT(&dev->work_queue);
593         STAILQ_INIT(&dev->peer_done_queue);
594         STAILQ_INIT(&dev->peer_work_queue);
595         retval = pthread_mutex_init(&dev->mutex, NULL);
596         if (retval != 0) {
597                 warnc(retval, "%s: failed to initialize mutex", __func__);
598                 goto bailout;
599         }
600
601         retval = pthread_cond_init(&dev->cond, NULL);
602         if (retval != 0) {
603                 warnc(retval, "%s: failed to initialize condition variable",
604                       __func__);
605                 goto bailout;
606         }
607
608         dev->kq = kqueue();
609         if (dev->kq == -1) {
610                 warn("%s: Unable to create kqueue", __func__);
611                 goto bailout;
612         }
613
614         ke_size = sizeof(struct kevent) * (num_ke + 4);
615         ke = calloc(1, ke_size);
616         if (ke == NULL) {
617                 warn("%s: unable to malloc %zu bytes", __func__, ke_size);
618                 goto bailout;
619         }
620         if (num_ke > 0)
621                 bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
622
623         EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
624                EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
625         EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
626                EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
627         EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
628         EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
629
630         retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
631         if (retval == -1) {
632                 warn("%s: Unable to register kevents", __func__);
633                 goto bailout;
634         }
635
636
637         return (dev);
638
639 bailout:
640         free(dev);
641
642         return (NULL);
643 }
644
645 static struct camdd_buf *
646 camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
647 {
648         struct camdd_buf *buf = NULL;
649         uint8_t *data_ptr = NULL;
650
651         /*
652          * We only need to allocate data space for data buffers.
653          */
654         switch (buf_type) {
655         case CAMDD_BUF_DATA:
656                 data_ptr = malloc(dev->blocksize);
657                 if (data_ptr == NULL) {
658                         warn("unable to allocate %u bytes", dev->blocksize);
659                         goto bailout_error;
660                 }
661                 break;
662         default:
663                 break;
664         }
665         
666         buf = calloc(1, sizeof(*buf));
667         if (buf == NULL) {
668                 warn("unable to allocate %zu bytes", sizeof(*buf));
669                 goto bailout_error;
670         }
671
672         buf->buf_type = buf_type;
673         buf->dev = dev;
674         switch (buf_type) {
675         case CAMDD_BUF_DATA: {
676                 struct camdd_buf_data *data;
677
678                 data = &buf->buf_type_spec.data;
679
680                 data->alloc_len = dev->blocksize;
681                 data->buf = data_ptr;
682                 break;
683         }
684         case CAMDD_BUF_INDIRECT:
685                 break;
686         default:
687                 break;
688         }
689         STAILQ_INIT(&buf->src_list);
690
691         return (buf);
692
693 bailout_error:
694         free(data_ptr);
695
696         return (NULL);
697 }
698
699 void
700 camdd_release_buf(struct camdd_buf *buf)
701 {
702         struct camdd_dev *dev;
703
704         dev = buf->dev;
705
706         switch (buf->buf_type) {
707         case CAMDD_BUF_DATA: {
708                 struct camdd_buf_data *data;
709
710                 data = &buf->buf_type_spec.data;
711
712                 if (data->segs != NULL) {
713                         if (data->extra_buf != 0) {
714                                 void *extra_buf;
715
716                                 extra_buf = (void *)
717                                     data->segs[data->sg_count - 1].ds_addr;
718                                 free(extra_buf);
719                                 data->extra_buf = 0;
720                         }
721                         free(data->segs);
722                         data->segs = NULL;
723                         data->sg_count = 0;
724                 } else if (data->iovec != NULL) {
725                         if (data->extra_buf != 0) {
726                                 free(data->iovec[data->sg_count - 1].iov_base);
727                                 data->extra_buf = 0;
728                         }
729                         free(data->iovec);
730                         data->iovec = NULL;
731                         data->sg_count = 0;
732                 }
733                 STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
734                 break;
735         }
736         case CAMDD_BUF_INDIRECT:
737                 STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
738                 break;
739         default:
740                 err(1, "%s: Invalid buffer type %d for released buffer",
741                     __func__, buf->buf_type);
742                 break;
743         }
744 }
745
746 struct camdd_buf *
747 camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
748 {
749         struct camdd_buf *buf = NULL;
750
751         switch (buf_type) {
752         case CAMDD_BUF_DATA:
753                 buf = STAILQ_FIRST(&dev->free_queue);
754                 if (buf != NULL) {
755                         struct camdd_buf_data *data;
756                         uint8_t *data_ptr;
757                         uint32_t alloc_len;
758
759                         STAILQ_REMOVE_HEAD(&dev->free_queue, links);
760                         data = &buf->buf_type_spec.data;
761                         data_ptr = data->buf;
762                         alloc_len = data->alloc_len;
763                         bzero(buf, sizeof(*buf));
764                         data->buf = data_ptr;
765                         data->alloc_len = alloc_len;
766                 }
767                 break;
768         case CAMDD_BUF_INDIRECT:
769                 buf = STAILQ_FIRST(&dev->free_indirect_queue);
770                 if (buf != NULL) {
771                         STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
772
773                         bzero(buf, sizeof(*buf));
774                 }
775                 break;
776         default:
777                 warnx("Unknown buffer type %d requested", buf_type);
778                 break;
779         }
780
781
782         if (buf == NULL)
783                 return (camdd_alloc_buf(dev, buf_type));
784         else {
785                 STAILQ_INIT(&buf->src_list);
786                 buf->dev = dev;
787                 buf->buf_type = buf_type;
788
789                 return (buf);
790         }
791 }
792
793 int
794 camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
795                     uint32_t *num_sectors_used, int *double_buf_needed)
796 {
797         struct camdd_buf *tmp_buf;
798         struct camdd_buf_data *data;
799         uint8_t *extra_buf = NULL;
800         size_t extra_buf_len = 0;
801         int extra_buf_attached = 0;
802         int i, retval = 0;
803
804         data = &buf->buf_type_spec.data;
805
806         data->sg_count = buf->src_count;
807         /*
808          * Compose a scatter/gather list from all of the buffers in the list.
809          * If the length of the buffer isn't a multiple of the sector size,
810          * we'll have to add an extra buffer.  This should only happen
811          * at the end of a transfer.
812          */
813         if ((data->fill_len % sector_size) != 0) {
814                 extra_buf_len = sector_size - (data->fill_len % sector_size);
815                 extra_buf = calloc(extra_buf_len, 1);
816                 if (extra_buf == NULL) {
817                         warn("%s: unable to allocate %zu bytes for extra "
818                             "buffer space", __func__, extra_buf_len);
819                         retval = 1;
820                         goto bailout;
821                 }
822                 data->extra_buf = 1;
823                 data->sg_count++;
824         }
825         if (iovec == 0) {
826                 data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
827                 if (data->segs == NULL) {
828                         warn("%s: unable to allocate %zu bytes for S/G list",
829                             __func__, sizeof(bus_dma_segment_t) *
830                             data->sg_count);
831                         retval = 1;
832                         goto bailout;
833                 }
834
835         } else {
836                 data->iovec = calloc(data->sg_count, sizeof(struct iovec));
837                 if (data->iovec == NULL) {
838                         warn("%s: unable to allocate %zu bytes for S/G list",
839                             __func__, sizeof(struct iovec) * data->sg_count);
840                         retval = 1;
841                         goto bailout;
842                 }
843         }
844
845         for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
846              i < buf->src_count && tmp_buf != NULL; i++,
847              tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
848
849                 if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
850                         struct camdd_buf_data *tmp_data;
851
852                         tmp_data = &tmp_buf->buf_type_spec.data;
853                         if (iovec == 0) {
854                                 data->segs[i].ds_addr =
855                                     (bus_addr_t) tmp_data->buf;
856                                 data->segs[i].ds_len = tmp_data->fill_len -
857                                     tmp_data->resid;
858                         } else {
859                                 data->iovec[i].iov_base = tmp_data->buf;
860                                 data->iovec[i].iov_len = tmp_data->fill_len -
861                                     tmp_data->resid;
862                         }
863                         if (((tmp_data->fill_len - tmp_data->resid) %
864                              sector_size) != 0)
865                                 *double_buf_needed = 1;
866                 } else {
867                         struct camdd_buf_indirect *tmp_ind;
868
869                         tmp_ind = &tmp_buf->buf_type_spec.indirect;
870                         if (iovec == 0) {
871                                 data->segs[i].ds_addr =
872                                     (bus_addr_t)tmp_ind->start_ptr;
873                                 data->segs[i].ds_len = tmp_ind->len;
874                         } else {
875                                 data->iovec[i].iov_base = tmp_ind->start_ptr;
876                                 data->iovec[i].iov_len = tmp_ind->len;
877                         }
878                         if ((tmp_ind->len % sector_size) != 0)
879                                 *double_buf_needed = 1;
880                 }
881         }
882
883         if (extra_buf != NULL) {
884                 if (iovec == 0) {
885                         data->segs[i].ds_addr = (bus_addr_t)extra_buf;
886                         data->segs[i].ds_len = extra_buf_len;
887                 } else {
888                         data->iovec[i].iov_base = extra_buf;
889                         data->iovec[i].iov_len = extra_buf_len;
890                 }
891                 extra_buf_attached = 1;
892                 i++;
893         }
894         if ((tmp_buf != NULL) || (i != data->sg_count)) {
895                 warnx("buffer source count does not match "
896                       "number of buffers in list!");
897                 retval = 1;
898                 goto bailout;
899         }
900
901 bailout:
902         if (retval == 0) {
903                 *num_sectors_used = (data->fill_len + extra_buf_len) /
904                     sector_size;
905         } else if (extra_buf_attached == 0) {
906                 /*
907                  * If extra_buf isn't attached yet, we need to free it
908                  * to avoid leaking.
909                  */
910                 free(extra_buf);
911                 data->extra_buf = 0;
912                 data->sg_count--;
913         }
914         return (retval);
915 }
916
917 uint32_t
918 camdd_buf_get_len(struct camdd_buf *buf)
919 {
920         uint32_t len = 0;
921
922         if (buf->buf_type != CAMDD_BUF_DATA) {
923                 struct camdd_buf_indirect *indirect;
924
925                 indirect = &buf->buf_type_spec.indirect;
926                 len = indirect->len;
927         } else {
928                 struct camdd_buf_data *data;
929
930                 data = &buf->buf_type_spec.data;
931                 len = data->fill_len;
932         }
933
934         return (len);
935 }
936
937 void
938 camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
939 {
940         struct camdd_buf_data *data;
941
942         assert(buf->buf_type == CAMDD_BUF_DATA);
943
944         data = &buf->buf_type_spec.data;
945
946         STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
947         buf->src_count++;
948
949         data->fill_len += camdd_buf_get_len(child_buf);
950 }
951
952 typedef enum {
953         CAMDD_TS_MAX_BLK,
954         CAMDD_TS_MIN_BLK,
955         CAMDD_TS_BLK_GRAN,
956         CAMDD_TS_EFF_IOSIZE
957 } camdd_status_item_index;
958
959 static struct camdd_status_items {
960         const char *name;
961         struct mt_status_entry *entry;
962 } req_status_items[] = {
963         { "max_blk", NULL },
964         { "min_blk", NULL },
965         { "blk_gran", NULL },
966         { "max_effective_iosize", NULL }
967 };
968
969 int
970 camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
971                  uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
972 {
973         struct mt_status_data status_data;
974         char *xml_str = NULL;
975         unsigned int i;
976         int retval = 0;
977         
978         retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
979         if (retval != 0)
980                 err(1, "Couldn't get XML string from %s", filename);
981
982         retval = mt_get_status(xml_str, &status_data);
983         if (retval != XML_STATUS_OK) {
984                 warn("couldn't get status for %s", filename);
985                 retval = 1;
986                 goto bailout;
987         } else
988                 retval = 0;
989
990         if (status_data.error != 0) {
991                 warnx("%s", status_data.error_str);
992                 retval = 1;
993                 goto bailout;
994         }
995
996         for (i = 0; i < nitems(req_status_items); i++) {
997                 char *name;
998
999                 name = __DECONST(char *, req_status_items[i].name);
1000                 req_status_items[i].entry = mt_status_entry_find(&status_data,
1001                     name);
1002                 if (req_status_items[i].entry == NULL) {
1003                         errx(1, "Cannot find status entry %s",
1004                             req_status_items[i].name);
1005                 }
1006         }
1007
1008         *max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
1009         *max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
1010         *min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
1011         *blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
1012 bailout:
1013
1014         free(xml_str);
1015         mt_status_free(&status_data);
1016
1017         return (retval);
1018 }
1019
1020 struct camdd_dev *
1021 camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
1022     int timeout)
1023 {
1024         struct camdd_dev *dev = NULL;
1025         struct camdd_dev_file *file_dev;
1026         uint64_t blocksize = io_opts->blocksize;
1027
1028         dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
1029         if (dev == NULL)
1030                 goto bailout;
1031
1032         file_dev = &dev->dev_spec.file;
1033         file_dev->fd = fd;
1034         strlcpy(file_dev->filename, io_opts->dev_name,
1035             sizeof(file_dev->filename));
1036         strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
1037         if (blocksize == 0)
1038                 dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
1039         else
1040                 dev->blocksize = blocksize;
1041
1042         if ((io_opts->queue_depth != 0)
1043          && (io_opts->queue_depth != 1)) {
1044                 warnx("Queue depth %ju for %s ignored, only 1 outstanding "
1045                     "command supported", (uintmax_t)io_opts->queue_depth,
1046                     io_opts->dev_name);
1047         }
1048         dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
1049         dev->run = camdd_file_run;
1050         dev->fetch = NULL;
1051
1052         /*
1053          * We can effectively access files on byte boundaries.  We'll reset
1054          * this for devices like disks that can be accessed on sector
1055          * boundaries.
1056          */
1057         dev->sector_size = 1;
1058
1059         if ((fd != STDIN_FILENO)
1060          && (fd != STDOUT_FILENO)) {
1061                 int retval;
1062
1063                 retval = fstat(fd, &file_dev->sb);
1064                 if (retval != 0) {
1065                         warn("Cannot stat %s", dev->device_name);
1066                         goto bailout_error;
1067                 }
1068                 if (S_ISREG(file_dev->sb.st_mode)) {
1069                         file_dev->file_type = CAMDD_FILE_REG;
1070                 } else if (S_ISCHR(file_dev->sb.st_mode)) {
1071                         int type;
1072
1073                         if (ioctl(fd, FIODTYPE, &type) == -1)
1074                                 err(1, "FIODTYPE ioctl failed on %s",
1075                                     dev->device_name);
1076                         else {
1077                                 if (type & D_TAPE)
1078                                         file_dev->file_type = CAMDD_FILE_TAPE;
1079                                 else if (type & D_DISK)
1080                                         file_dev->file_type = CAMDD_FILE_DISK;
1081                                 else if (type & D_MEM)
1082                                         file_dev->file_type = CAMDD_FILE_MEM;
1083                                 else if (type & D_TTY)
1084                                         file_dev->file_type = CAMDD_FILE_TTY;
1085                         }
1086                 } else if (S_ISDIR(file_dev->sb.st_mode)) {
1087                         errx(1, "cannot operate on directory %s",
1088                             dev->device_name);
1089                 } else if (S_ISFIFO(file_dev->sb.st_mode)) {
1090                         file_dev->file_type = CAMDD_FILE_PIPE;
1091                 } else
1092                         errx(1, "Cannot determine file type for %s",
1093                             dev->device_name);
1094
1095                 switch (file_dev->file_type) {
1096                 case CAMDD_FILE_REG:
1097                         if (file_dev->sb.st_size != 0)
1098                                 dev->max_sector = file_dev->sb.st_size - 1;
1099                         else
1100                                 dev->max_sector = 0;
1101                         file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1102                         break;
1103                 case CAMDD_FILE_TAPE: {
1104                         uint64_t max_iosize, max_blk, min_blk, blk_gran;
1105                         /*
1106                          * Check block limits and maximum effective iosize.
1107                          * Make sure the blocksize is within the block
1108                          * limits (and a multiple of the minimum blocksize)
1109                          * and that the blocksize is <= maximum effective
1110                          * iosize.
1111                          */
1112                         retval = camdd_probe_tape(fd, dev->device_name,
1113                             &max_iosize, &max_blk, &min_blk, &blk_gran);
1114                         if (retval != 0)
1115                                 errx(1, "Unable to probe tape %s",
1116                                     dev->device_name);
1117
1118                         /*
1119                          * The blocksize needs to be <= the maximum
1120                          * effective I/O size of the tape device.  Note
1121                          * that this also takes into account the maximum
1122                          * blocksize reported by READ BLOCK LIMITS.
1123                          */
1124                         if (dev->blocksize > max_iosize) {
1125                                 warnx("Blocksize %u too big for %s, limiting "
1126                                     "to %ju", dev->blocksize, dev->device_name,
1127                                     max_iosize);
1128                                 dev->blocksize = max_iosize;
1129                         }
1130
1131                         /*
1132                          * The blocksize needs to be at least min_blk;
1133                          */
1134                         if (dev->blocksize < min_blk) {
1135                                 warnx("Blocksize %u too small for %s, "
1136                                     "increasing to %ju", dev->blocksize,
1137                                     dev->device_name, min_blk);
1138                                 dev->blocksize = min_blk;
1139                         }
1140
1141                         /*
1142                          * And the blocksize needs to be a multiple of
1143                          * the block granularity.
1144                          */
1145                         if ((blk_gran != 0)
1146                          && (dev->blocksize % (1 << blk_gran))) {
1147                                 warnx("Blocksize %u for %s not a multiple of "
1148                                     "%d, adjusting to %d", dev->blocksize,
1149                                     dev->device_name, (1 << blk_gran),
1150                                     dev->blocksize & ~((1 << blk_gran) - 1));
1151                                 dev->blocksize &= ~((1 << blk_gran) - 1);
1152                         }
1153
1154                         if (dev->blocksize == 0) {
1155                                 errx(1, "Unable to derive valid blocksize for "
1156                                     "%s", dev->device_name);
1157                         }
1158
1159                         /*
1160                          * For tape drives, set the sector size to the
1161                          * blocksize so that we make sure not to write
1162                          * less than the blocksize out to the drive.
1163                          */
1164                         dev->sector_size = dev->blocksize;
1165                         break;
1166                 }
1167                 case CAMDD_FILE_DISK: {
1168                         off_t media_size;
1169                         unsigned int sector_size;
1170
1171                         file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1172
1173                         if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
1174                                 err(1, "DIOCGSECTORSIZE ioctl failed on %s",
1175                                     dev->device_name);
1176                         }
1177
1178                         if (sector_size == 0) {
1179                                 errx(1, "DIOCGSECTORSIZE ioctl returned "
1180                                     "invalid sector size %u for %s",
1181                                     sector_size, dev->device_name);
1182                         }
1183
1184                         if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
1185                                 err(1, "DIOCGMEDIASIZE ioctl failed on %s",
1186                                     dev->device_name);
1187                         }
1188
1189                         if (media_size == 0) {
1190                                 errx(1, "DIOCGMEDIASIZE ioctl returned "
1191                                     "invalid media size %ju for %s",
1192                                     (uintmax_t)media_size, dev->device_name);
1193                         }
1194
1195                         if (dev->blocksize % sector_size) {
1196                                 errx(1, "%s blocksize %u not a multiple of "
1197                                     "sector size %u", dev->device_name,
1198                                     dev->blocksize, sector_size);
1199                         }
1200
1201                         dev->sector_size = sector_size;
1202                         dev->max_sector = (media_size / sector_size) - 1;
1203                         break;
1204                 }
1205                 case CAMDD_FILE_MEM:
1206                         file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1207                         break;
1208                 default:
1209                         break;
1210                 }
1211         }
1212
1213         if ((io_opts->offset != 0)
1214          && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
1215                 warnx("Offset %ju specified for %s, but we cannot seek on %s",
1216                     io_opts->offset, io_opts->dev_name, io_opts->dev_name);
1217                 goto bailout_error;
1218         }
1219 #if 0
1220         else if ((io_opts->offset != 0)
1221                 && ((io_opts->offset % dev->sector_size) != 0)) {
1222                 warnx("Offset %ju for %s is not a multiple of the "
1223                       "sector size %u", io_opts->offset, 
1224                       io_opts->dev_name, dev->sector_size);
1225                 goto bailout_error;
1226         } else {
1227                 dev->start_offset_bytes = io_opts->offset;
1228         }
1229 #endif
1230
1231 bailout:
1232         return (dev);
1233
1234 bailout_error:
1235         camdd_free_dev(dev);
1236         return (NULL);
1237 }
1238
1239 /*
1240  * Get a get device CCB for the specified device.
1241  */
1242 int
1243 camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd)
1244 {
1245         union ccb *ccb;
1246         int retval = 0;
1247
1248         ccb = cam_getccb(device);
1249  
1250         if (ccb == NULL) {
1251                 warnx("%s: couldn't allocate CCB", __func__);
1252                 return -1;
1253         }
1254
1255         CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd);
1256
1257         ccb->ccb_h.func_code = XPT_GDEV_TYPE;
1258  
1259         if (cam_send_ccb(device, ccb) < 0) {
1260                 warn("%s: error sending Get Device Information CCB", __func__);
1261                         cam_error_print(device, ccb, CAM_ESF_ALL,
1262                                         CAM_EPF_ALL, stderr);
1263                 retval = -1;
1264                 goto bailout;
1265         }
1266
1267         if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1268                         cam_error_print(device, ccb, CAM_ESF_ALL,
1269                                         CAM_EPF_ALL, stderr);
1270                 retval = -1;
1271                 goto bailout;
1272         }
1273
1274         bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev));
1275
1276 bailout:
1277         cam_freeccb(ccb);
1278  
1279         return retval;
1280 }
1281
1282 int
1283 camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
1284                  camdd_argmask arglist, int probe_retry_count,
1285                  int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1286 {
1287         struct scsi_read_capacity_data rcap;
1288         struct scsi_read_capacity_data_long rcaplong;
1289         int retval = -1;
1290
1291         if (ccb == NULL) {
1292                 warnx("%s: error passed ccb is NULL", __func__);
1293                 goto bailout;
1294         }
1295
1296         CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
1297
1298         scsi_read_capacity(&ccb->csio,
1299                            /*retries*/ probe_retry_count,
1300                            /*cbfcnp*/ NULL,
1301                            /*tag_action*/ MSG_SIMPLE_Q_TAG,
1302                            &rcap,
1303                            SSD_FULL_SIZE,
1304                            /*timeout*/ probe_timeout ? probe_timeout : 5000);
1305
1306         /* Disable freezing the device queue */
1307         ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1308
1309         if (arglist & CAMDD_ARG_ERR_RECOVER)
1310                 ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1311
1312         if (cam_send_ccb(cam_dev, ccb) < 0) {
1313                 warn("error sending READ CAPACITY command");
1314
1315                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1316                                 CAM_EPF_ALL, stderr);
1317
1318                 goto bailout;
1319         }
1320
1321         if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1322                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1323                 goto bailout;
1324         }
1325
1326         *maxsector = scsi_4btoul(rcap.addr);
1327         *block_len = scsi_4btoul(rcap.length);
1328
1329         /*
1330          * A last block of 2^32-1 means that the true capacity is over 2TB,
1331          * and we need to issue the long READ CAPACITY to get the real
1332          * capacity.  Otherwise, we're all set.
1333          */
1334         if (*maxsector != 0xffffffff) {
1335                 retval = 0;
1336                 goto bailout;
1337         }
1338
1339         scsi_read_capacity_16(&ccb->csio,
1340                               /*retries*/ probe_retry_count,
1341                               /*cbfcnp*/ NULL,
1342                               /*tag_action*/ MSG_SIMPLE_Q_TAG,
1343                               /*lba*/ 0,
1344                               /*reladdr*/ 0,
1345                               /*pmi*/ 0,
1346                               (uint8_t *)&rcaplong,
1347                               sizeof(rcaplong),
1348                               /*sense_len*/ SSD_FULL_SIZE,
1349                               /*timeout*/ probe_timeout ? probe_timeout : 5000);
1350
1351         /* Disable freezing the device queue */
1352         ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1353
1354         if (arglist & CAMDD_ARG_ERR_RECOVER)
1355                 ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1356
1357         if (cam_send_ccb(cam_dev, ccb) < 0) {
1358                 warn("error sending READ CAPACITY (16) command");
1359                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1360                                 CAM_EPF_ALL, stderr);
1361                 goto bailout;
1362         }
1363
1364         if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1365                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1366                 goto bailout;
1367         }
1368
1369         *maxsector = scsi_8btou64(rcaplong.addr);
1370         *block_len = scsi_4btoul(rcaplong.length);
1371
1372         retval = 0;
1373
1374 bailout:
1375         return retval;
1376 }
1377
1378 int
1379 camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
1380                  camdd_argmask arglist, int probe_retry_count,
1381                  int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1382 {
1383         struct nvme_command *nc = NULL;
1384         struct nvme_namespace_data nsdata;
1385         uint32_t nsid = cam_dev->target_lun & UINT32_MAX;
1386         uint8_t format = 0, lbads = 0;
1387         int retval = -1;
1388
1389         if (ccb == NULL) {
1390                 warnx("%s: error passed ccb is NULL", __func__);
1391                 goto bailout;
1392         }
1393
1394         CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
1395
1396         /* Send Identify Namespace to get block size and capacity */
1397         nc = &ccb->nvmeio.cmd;
1398         nc->opc = NVME_OPC_IDENTIFY;
1399
1400         nc->nsid = nsid;
1401         nc->cdw10 = 0; /* Identify Namespace is CNS = 0 */
1402
1403         cam_fill_nvmeadmin(&ccb->nvmeio,
1404                         /*retries*/ probe_retry_count,
1405                         /*cbfcnp*/ NULL,
1406                         CAM_DIR_IN,
1407                         (uint8_t *)&nsdata,
1408                         sizeof(nsdata),
1409                         probe_timeout);
1410
1411         /* Disable freezing the device queue */
1412         ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1413
1414         if (arglist & CAMDD_ARG_ERR_RECOVER)
1415                 ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1416
1417         if (cam_send_ccb(cam_dev, ccb) < 0) {
1418                 warn("error sending Identify Namespace command");
1419
1420                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1421                                 CAM_EPF_ALL, stderr);
1422
1423                 goto bailout;
1424         }
1425
1426         if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1427                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1428                 goto bailout;
1429         }
1430
1431         *maxsector = nsdata.nsze;
1432         /* The LBA Data Size (LBADS) is reported as a power of 2 */
1433         format = nsdata.flbas & NVME_NS_DATA_FLBAS_FORMAT_MASK;
1434         lbads = (nsdata.lbaf[format] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) &
1435             NVME_NS_DATA_LBAF_LBADS_MASK;
1436         *block_len = 1 << lbads;
1437
1438         retval = 0;
1439
1440 bailout:
1441         return retval;
1442 }
1443
1444 /*
1445  * Need to implement this.  Do a basic probe:
1446  * - Check the inquiry data, make sure we're talking to a device that we
1447  *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
1448  * - Send a test unit ready, make sure the device is available.
1449  * - Get the capacity and block size.
1450  */
1451 struct camdd_dev *
1452 camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
1453                  camdd_argmask arglist, int probe_retry_count,
1454                  int probe_timeout, int io_retry_count, int io_timeout)
1455 {
1456         union ccb *ccb;
1457         uint64_t maxsector = 0;
1458         uint32_t cpi_maxio, max_iosize, pass_numblocks;
1459         uint32_t block_len = 0;
1460         struct camdd_dev *dev = NULL;
1461         struct camdd_dev_pass *pass_dev;
1462         struct kevent ke;
1463         struct ccb_getdev cgd;
1464         int retval;
1465         int scsi_dev_type = T_NODEVICE;
1466
1467         if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) {
1468                 warnx("%s: error retrieving CGD", __func__);
1469                 return NULL;
1470         }
1471
1472         ccb = cam_getccb(cam_dev);
1473
1474         if (ccb == NULL) {
1475                 warnx("%s: error allocating ccb", __func__);
1476                 goto bailout;
1477         }
1478
1479         switch (cgd.protocol) {
1480         case PROTO_SCSI:
1481                 scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
1482
1483                 /*
1484                  * For devices that support READ CAPACITY, we'll attempt to get the
1485                  * capacity.  Otherwise, we really don't support tape or other
1486                  * devices via SCSI passthrough, so just return an error in that case.
1487                  */
1488                 switch (scsi_dev_type) {
1489                 case T_DIRECT:
1490                 case T_WORM:
1491                 case T_CDROM:
1492                 case T_OPTICAL:
1493                 case T_RBC:
1494                 case T_ZBC_HM:
1495                         break;
1496                 default:
1497                         errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
1498                         break; /*NOTREACHED*/
1499                 }
1500
1501                 if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count,
1502                                                 arglist, probe_timeout, &maxsector,
1503                                                 &block_len))) {
1504                         goto bailout;
1505                 }
1506                 break;
1507         case PROTO_NVME:
1508                 if ((retval = camdd_probe_pass_nvme(cam_dev, ccb, probe_retry_count,
1509                                                 arglist, probe_timeout, &maxsector,
1510                                                 &block_len))) {
1511                         goto bailout;
1512                 }
1513                 break;
1514         default:
1515                 errx(1, "Unsupported PROTO type %d", cgd.protocol);
1516                 break; /*NOTREACHED*/
1517         }
1518
1519         if (block_len == 0) {
1520                 warnx("Sector size for %s%u is 0, cannot continue",
1521                     cam_dev->device_name, cam_dev->dev_unit_num);
1522                 goto bailout_error;
1523         }
1524
1525         CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi);
1526
1527         ccb->ccb_h.func_code = XPT_PATH_INQ;
1528         ccb->ccb_h.flags = CAM_DIR_NONE;
1529         ccb->ccb_h.retry_count = 1;
1530         
1531         if (cam_send_ccb(cam_dev, ccb) < 0) {
1532                 warn("error sending XPT_PATH_INQ CCB");
1533
1534                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1535                                 CAM_EPF_ALL, stderr);
1536                 goto bailout;
1537         }
1538
1539         EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1540
1541         dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
1542                               io_timeout);
1543         if (dev == NULL)
1544                 goto bailout;
1545
1546         pass_dev = &dev->dev_spec.pass;
1547         pass_dev->scsi_dev_type = scsi_dev_type;
1548         pass_dev->protocol = cgd.protocol;
1549         pass_dev->dev = cam_dev;
1550         pass_dev->max_sector = maxsector;
1551         pass_dev->block_len = block_len;
1552         pass_dev->cpi_maxio = ccb->cpi.maxio;
1553         snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
1554                  pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
1555         dev->sector_size = block_len;
1556         dev->max_sector = maxsector;
1557         
1558
1559         /*
1560          * Determine the optimal blocksize to use for this device.
1561          */
1562
1563         /*
1564          * If the controller has not specified a maximum I/O size,
1565          * just go with 128K as a somewhat conservative value.
1566          */
1567         if (pass_dev->cpi_maxio == 0)
1568                 cpi_maxio = 131072;
1569         else
1570                 cpi_maxio = pass_dev->cpi_maxio;
1571
1572         /*
1573          * If the controller has a large maximum I/O size, limit it
1574          * to something smaller so that the kernel doesn't have trouble
1575          * allocating buffers to copy data in and out for us.
1576          * XXX KDM this is until we have unmapped I/O support in the kernel.
1577          */
1578         max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
1579
1580         /*
1581          * If we weren't able to get a block size for some reason,
1582          * default to 512 bytes.
1583          */
1584         block_len = pass_dev->block_len;
1585         if (block_len == 0)
1586                 block_len = 512;
1587
1588         /*
1589          * Figure out how many blocksize chunks will fit in the
1590          * maximum I/O size.
1591          */
1592         pass_numblocks = max_iosize / block_len;
1593
1594         /*
1595          * And finally, multiple the number of blocks by the LBA
1596          * length to get our maximum block size;
1597          */
1598         dev->blocksize = pass_numblocks * block_len;
1599
1600         if (io_opts->blocksize != 0) {
1601                 if ((io_opts->blocksize % dev->sector_size) != 0) {
1602                         warnx("Blocksize %ju for %s is not a multiple of "
1603                               "sector size %u", (uintmax_t)io_opts->blocksize, 
1604                               dev->device_name, dev->sector_size);
1605                         goto bailout_error;
1606                 }
1607                 dev->blocksize = io_opts->blocksize;
1608         }
1609         dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
1610         if (io_opts->queue_depth != 0)
1611                 dev->target_queue_depth = io_opts->queue_depth;
1612
1613         if (io_opts->offset != 0) {
1614                 if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
1615                         warnx("Offset %ju is past the end of device %s",
1616                             io_opts->offset, dev->device_name);
1617                         goto bailout_error;
1618                 }
1619 #if 0
1620                 else if ((io_opts->offset % dev->sector_size) != 0) {
1621                         warnx("Offset %ju for %s is not a multiple of the "
1622                               "sector size %u", io_opts->offset, 
1623                               dev->device_name, dev->sector_size);
1624                         goto bailout_error;
1625                 }
1626                 dev->start_offset_bytes = io_opts->offset;
1627 #endif
1628         }
1629
1630         dev->min_cmd_size = io_opts->min_cmd_size;
1631
1632         dev->run = camdd_pass_run;
1633         dev->fetch = camdd_pass_fetch;
1634
1635 bailout:
1636         cam_freeccb(ccb);
1637
1638         return (dev);
1639
1640 bailout_error:
1641         cam_freeccb(ccb);
1642
1643         camdd_free_dev(dev);
1644
1645         return (NULL);
1646 }
1647
1648 void
1649 nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
1650                 void (*cbfcnp)(struct cam_periph *, union ccb *),
1651                 uint32_t nsid, int readop, uint64_t lba,
1652                 uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
1653                 uint32_t timeout)
1654 {
1655         struct nvme_command *nc = &nvmeio->cmd;
1656
1657         nc->opc = readop ? NVME_OPC_READ : NVME_OPC_WRITE;
1658
1659         nc->nsid = nsid;
1660
1661         nc->cdw10 = lba & UINT32_MAX;
1662         nc->cdw11 = lba >> 32;
1663
1664         /* NLB (bits 15:0) is a zero based value */
1665         nc->cdw12 = (block_count - 1) & UINT16_MAX;
1666
1667         cam_fill_nvmeio(nvmeio,
1668                         retries,
1669                         cbfcnp,
1670                         readop ? CAM_DIR_IN : CAM_DIR_OUT,
1671                         data_ptr,
1672                         dxfer_len,
1673                         timeout);
1674 }
1675
1676 void *
1677 camdd_worker(void *arg)
1678 {
1679         struct camdd_dev *dev = arg;
1680         struct camdd_buf *buf;
1681         struct timespec ts, *kq_ts;
1682
1683         ts.tv_sec = 0;
1684         ts.tv_nsec = 0;
1685
1686         pthread_mutex_lock(&dev->mutex);
1687
1688         dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
1689
1690         for (;;) {
1691                 struct kevent ke;
1692                 int retval = 0;
1693
1694                 /*
1695                  * XXX KDM check the reorder queue depth?
1696                  */
1697                 if (dev->write_dev == 0) {
1698                         uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
1699                         uint32_t target_depth = dev->target_queue_depth;
1700                         uint32_t peer_target_depth =
1701                             dev->peer_dev->target_queue_depth;
1702                         uint32_t peer_blocksize = dev->peer_dev->blocksize;
1703
1704                         camdd_get_depth(dev, &our_depth, &peer_depth,
1705                                         &our_bytes, &peer_bytes);
1706
1707 #if 0
1708                         while (((our_depth < target_depth)
1709                              && (peer_depth < peer_target_depth))
1710                             || ((peer_bytes + our_bytes) <
1711                                  (peer_blocksize * 2))) {
1712 #endif
1713                         while (((our_depth + peer_depth) <
1714                                 (target_depth + peer_target_depth))
1715                             || ((peer_bytes + our_bytes) <
1716                                 (peer_blocksize * 3))) {
1717
1718                                 retval = camdd_queue(dev, NULL);
1719                                 if (retval == 1)
1720                                         break;
1721                                 else if (retval != 0) {
1722                                         error_exit = 1;
1723                                         goto bailout;
1724                                 }
1725
1726                                 camdd_get_depth(dev, &our_depth, &peer_depth,
1727                                                 &our_bytes, &peer_bytes);
1728                         }
1729                 }
1730                 /*
1731                  * See if we have any I/O that is ready to execute.
1732                  */
1733                 buf = STAILQ_FIRST(&dev->run_queue);
1734                 if (buf != NULL) {
1735                         while (dev->target_queue_depth > dev->cur_active_io) {
1736                                 retval = dev->run(dev);
1737                                 if (retval == -1) {
1738                                         dev->flags |= CAMDD_DEV_FLAG_EOF;
1739                                         error_exit = 1;
1740                                         break;
1741                                 } else if (retval != 0) {
1742                                         break;
1743                                 }
1744                         }
1745                 }
1746
1747                 /*
1748                  * We've reached EOF, or our partner has reached EOF.
1749                  */
1750                 if ((dev->flags & CAMDD_DEV_FLAG_EOF)
1751                  || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
1752                         if (dev->write_dev != 0) {
1753                                 if ((STAILQ_EMPTY(&dev->work_queue))
1754                                  && (dev->num_run_queue == 0)
1755                                  && (dev->cur_active_io == 0)) {
1756                                         goto bailout;
1757                                 }
1758                         } else {
1759                                 /*
1760                                  * If we're the reader, and the writer
1761                                  * got EOF, he is already done.  If we got
1762                                  * the EOF, then we need to wait until
1763                                  * everything is flushed out for the writer.
1764                                  */
1765                                 if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
1766                                         goto bailout;
1767                                 } else if ((dev->num_peer_work_queue == 0)
1768                                         && (dev->num_peer_done_queue == 0)
1769                                         && (dev->cur_active_io == 0)
1770                                         && (dev->num_run_queue == 0)) {
1771                                         goto bailout;
1772                                 }
1773                         }
1774                         /*
1775                          * XXX KDM need to do something about the pending
1776                          * queue and cleanup resources.
1777                          */
1778                 } 
1779
1780                 if ((dev->write_dev == 0)
1781                  && (dev->cur_active_io == 0)
1782                  && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
1783                         kq_ts = &ts;
1784                 else
1785                         kq_ts = NULL;
1786
1787                 /*
1788                  * Run kevent to see if there are events to process.
1789                  */
1790                 pthread_mutex_unlock(&dev->mutex);
1791                 retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
1792                 pthread_mutex_lock(&dev->mutex);
1793                 if (retval == -1) {
1794                         warn("%s: error returned from kevent",__func__);
1795                         goto bailout;
1796                 } else if (retval != 0) {
1797                         switch (ke.filter) {
1798                         case EVFILT_READ:
1799                                 if (dev->fetch != NULL) {
1800                                         retval = dev->fetch(dev);
1801                                         if (retval == -1) {
1802                                                 error_exit = 1;
1803                                                 goto bailout;
1804                                         }
1805                                 }
1806                                 break;
1807                         case EVFILT_SIGNAL:
1808                                 /*
1809                                  * We register for this so we don't get
1810                                  * an error as a result of a SIGINFO or a
1811                                  * SIGINT.  It will actually get handled
1812                                  * by the signal handler.  If we get a
1813                                  * SIGINT, bail out without printing an
1814                                  * error message.  Any other signals 
1815                                  * will result in the error message above.
1816                                  */
1817                                 if (ke.ident == SIGINT)
1818                                         goto bailout;
1819                                 break;
1820                         case EVFILT_USER:
1821                                 retval = 0;
1822                                 /*
1823                                  * Check to see if the other thread has
1824                                  * queued any I/O for us to do.  (In this
1825                                  * case we're the writer.)
1826                                  */
1827                                 for (buf = STAILQ_FIRST(&dev->work_queue);
1828                                      buf != NULL;
1829                                      buf = STAILQ_FIRST(&dev->work_queue)) {
1830                                         STAILQ_REMOVE_HEAD(&dev->work_queue,
1831                                                            work_links);
1832                                         retval = camdd_queue(dev, buf);
1833                                         /*
1834                                          * We keep going unless we get an
1835                                          * actual error.  If we get EOF, we
1836                                          * still want to remove the buffers
1837                                          * from the queue and send the back
1838                                          * to the reader thread.
1839                                          */
1840                                         if (retval == -1) {
1841                                                 error_exit = 1;
1842                                                 goto bailout;
1843                                         } else
1844                                                 retval = 0;
1845                                 }
1846
1847                                 /*
1848                                  * Next check to see if the other thread has
1849                                  * queued any completed buffers back to us.
1850                                  * (In this case we're the reader.)
1851                                  */
1852                                 for (buf = STAILQ_FIRST(&dev->peer_done_queue);
1853                                      buf != NULL;
1854                                      buf = STAILQ_FIRST(&dev->peer_done_queue)){
1855                                         STAILQ_REMOVE_HEAD(
1856                                             &dev->peer_done_queue, work_links);
1857                                         dev->num_peer_done_queue--;
1858                                         camdd_peer_done(buf);
1859                                 }
1860                                 break;
1861                         default:
1862                                 warnx("%s: unknown kevent filter %d",
1863                                       __func__, ke.filter);
1864                                 break;
1865                         }
1866                 }
1867         }
1868
1869 bailout:
1870
1871         dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
1872
1873         /* XXX KDM cleanup resources here? */
1874
1875         pthread_mutex_unlock(&dev->mutex);
1876
1877         need_exit = 1;
1878         sem_post(&camdd_sem);
1879
1880         return (NULL);
1881 }
1882
1883 /*
1884  * Simplistic translation of CCB status to our local status.
1885  */
1886 camdd_buf_status
1887 camdd_ccb_status(union ccb *ccb, int protocol)
1888 {
1889         camdd_buf_status status = CAMDD_STATUS_NONE;
1890         cam_status ccb_status;
1891
1892         ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
1893
1894         switch (protocol) {
1895         case PROTO_SCSI:
1896                 switch (ccb_status) {
1897                 case CAM_REQ_CMP: {
1898                         if (ccb->csio.resid == 0) {
1899                                 status = CAMDD_STATUS_OK;
1900                         } else if (ccb->csio.dxfer_len > ccb->csio.resid) {
1901                                 status = CAMDD_STATUS_SHORT_IO;
1902                         } else {
1903                                 status = CAMDD_STATUS_EOF;
1904                         }
1905                         break;
1906                 }
1907                 case CAM_SCSI_STATUS_ERROR: {
1908                         switch (ccb->csio.scsi_status) {
1909                         case SCSI_STATUS_OK:
1910                         case SCSI_STATUS_COND_MET:
1911                         case SCSI_STATUS_INTERMED:
1912                         case SCSI_STATUS_INTERMED_COND_MET:
1913                                 status = CAMDD_STATUS_OK;
1914                                 break;
1915                         case SCSI_STATUS_CMD_TERMINATED:
1916                         case SCSI_STATUS_CHECK_COND:
1917                         case SCSI_STATUS_QUEUE_FULL:
1918                         case SCSI_STATUS_BUSY:
1919                         case SCSI_STATUS_RESERV_CONFLICT:
1920                         default:
1921                                 status = CAMDD_STATUS_ERROR;
1922                                 break;
1923                         }
1924                         break;
1925                 }
1926                 default:
1927                         status = CAMDD_STATUS_ERROR;
1928                         break;
1929                 }
1930                 break;
1931         case PROTO_NVME:
1932                 switch (ccb_status) {
1933                 case CAM_REQ_CMP:
1934                         status = CAMDD_STATUS_OK;
1935                         break;
1936                 default:
1937                         status = CAMDD_STATUS_ERROR;
1938                         break;
1939                 }
1940                 break;
1941         default:
1942                 status = CAMDD_STATUS_ERROR;
1943                 break;
1944         }
1945
1946         return (status);
1947 }
1948
1949 /*
1950  * Queue a buffer to our peer's work thread for writing.
1951  *
1952  * Returns 0 for success, -1 for failure, 1 if the other thread exited.
1953  */
1954 int
1955 camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
1956 {
1957         struct kevent ke;
1958         STAILQ_HEAD(, camdd_buf) local_queue;
1959         struct camdd_buf *buf1, *buf2;
1960         struct camdd_buf_data *data = NULL;
1961         uint64_t peer_bytes_queued = 0;
1962         int active = 1;
1963         int retval = 0;
1964
1965         STAILQ_INIT(&local_queue);
1966
1967         /*
1968          * Since we're the reader, we need to queue our I/O to the writer
1969          * in sequential order in order to make sure it gets written out
1970          * in sequential order.
1971          *
1972          * Check the next expected I/O starting offset.  If this doesn't
1973          * match, put it on the reorder queue.
1974          */
1975         if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
1976
1977                 /*
1978                  * If there is nothing on the queue, there is no sorting
1979                  * needed.
1980                  */
1981                 if (STAILQ_EMPTY(&dev->reorder_queue)) {
1982                         STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
1983                         dev->num_reorder_queue++;
1984                         goto bailout;
1985                 }
1986
1987                 /*
1988                  * Sort in ascending order by starting LBA.  There should
1989                  * be no identical LBAs.
1990                  */
1991                 for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1992                      buf1 = buf2) {
1993                         buf2 = STAILQ_NEXT(buf1, links);
1994                         if (buf->lba < buf1->lba) {
1995                                 /*
1996                                  * If we're less than the first one, then
1997                                  * we insert at the head of the list
1998                                  * because this has to be the first element
1999                                  * on the list.
2000                                  */
2001                                 STAILQ_INSERT_HEAD(&dev->reorder_queue,
2002                                                    buf, links);
2003                                 dev->num_reorder_queue++;
2004                                 break;
2005                         } else if (buf->lba > buf1->lba) {
2006                                 if (buf2 == NULL) {
2007                                         STAILQ_INSERT_TAIL(&dev->reorder_queue, 
2008                                             buf, links);
2009                                         dev->num_reorder_queue++;
2010                                         break;
2011                                 } else if (buf->lba < buf2->lba) {
2012                                         STAILQ_INSERT_AFTER(&dev->reorder_queue,
2013                                             buf1, buf, links);
2014                                         dev->num_reorder_queue++;
2015                                         break;
2016                                 }
2017                         } else {
2018                                 errx(1, "Found buffers with duplicate LBA %ju!",
2019                                      buf->lba);
2020                         }
2021                 }
2022                 goto bailout;
2023         } else {
2024
2025                 /*
2026                  * We're the next expected I/O completion, so put ourselves
2027                  * on the local queue to be sent to the writer.  We use
2028                  * work_links here so that we can queue this to the 
2029                  * peer_work_queue before taking the buffer off of the
2030                  * local_queue.
2031                  */
2032                 dev->next_completion_pos_bytes += buf->len;
2033                 STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
2034
2035                 /*
2036                  * Go through the reorder queue looking for more sequential
2037                  * I/O and add it to the local queue.
2038                  */
2039                 for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
2040                      buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
2041                         /*
2042                          * As soon as we see an I/O that is out of sequence,
2043                          * we're done.
2044                          */
2045                         if ((buf1->lba * dev->sector_size) !=
2046                              dev->next_completion_pos_bytes)
2047                                 break;
2048
2049                         STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
2050                         dev->num_reorder_queue--;
2051                         STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
2052                         dev->next_completion_pos_bytes += buf1->len;
2053                 }
2054         }
2055
2056         /*
2057          * Setup the event to let the other thread know that it has work
2058          * pending.
2059          */
2060         EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
2061                NOTE_TRIGGER, 0, NULL);
2062
2063         /*
2064          * Put this on our shadow queue so that we know what we've queued
2065          * to the other thread.
2066          */
2067         STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
2068                 if (buf1->buf_type != CAMDD_BUF_DATA) {
2069                         errx(1, "%s: should have a data buffer, not an "
2070                             "indirect buffer", __func__);
2071                 }
2072                 data = &buf1->buf_type_spec.data;
2073
2074                 /*
2075                  * We only need to send one EOF to the writer, and don't
2076                  * need to continue sending EOFs after that.
2077                  */
2078                 if (buf1->status == CAMDD_STATUS_EOF) {
2079                         if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
2080                                 STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
2081                                     work_links);
2082                                 camdd_release_buf(buf1);
2083                                 retval = 1;
2084                                 continue;
2085                         }
2086                         dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
2087                 }
2088
2089
2090                 STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
2091                 peer_bytes_queued += (data->fill_len - data->resid);
2092                 dev->peer_bytes_queued += (data->fill_len - data->resid);
2093                 dev->num_peer_work_queue++;
2094         }
2095
2096         if (STAILQ_FIRST(&local_queue) == NULL)
2097                 goto bailout;
2098
2099         /*
2100          * Drop our mutex and pick up the other thread's mutex.  We need to
2101          * do this to avoid deadlocks.
2102          */
2103         pthread_mutex_unlock(&dev->mutex);
2104         pthread_mutex_lock(&dev->peer_dev->mutex);
2105
2106         if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
2107                 /*
2108                  * Put the buffers on the other thread's incoming work queue.
2109                  */
2110                 for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2111                      buf1 = STAILQ_FIRST(&local_queue)) {
2112                         STAILQ_REMOVE_HEAD(&local_queue, work_links);
2113                         STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
2114                                            work_links);
2115                 }
2116                 /*
2117                  * Send an event to the other thread's kqueue to let it know
2118                  * that there is something on the work queue.
2119                  */
2120                 retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2121                 if (retval == -1)
2122                         warn("%s: unable to add peer work_queue kevent",
2123                              __func__);
2124                 else
2125                         retval = 0;
2126         } else
2127                 active = 0;
2128
2129         pthread_mutex_unlock(&dev->peer_dev->mutex);
2130         pthread_mutex_lock(&dev->mutex);
2131
2132         /*
2133          * If the other side isn't active, run through the queue and
2134          * release all of the buffers.
2135          */
2136         if (active == 0) {
2137                 for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2138                      buf1 = STAILQ_FIRST(&local_queue)) {
2139                         STAILQ_REMOVE_HEAD(&local_queue, work_links);
2140                         STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
2141                                       links);
2142                         dev->num_peer_work_queue--;
2143                         camdd_release_buf(buf1);
2144                 }
2145                 dev->peer_bytes_queued -= peer_bytes_queued;
2146                 retval = 1;
2147         }
2148
2149 bailout:
2150         return (retval);
2151 }
2152
2153 /*
2154  * Return a buffer to the reader thread when we have completed writing it.
2155  */
2156 int
2157 camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
2158 {
2159         struct kevent ke;
2160         int retval = 0;
2161
2162         /*
2163          * Setup the event to let the other thread know that we have
2164          * completed a buffer.
2165          */
2166         EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
2167                NOTE_TRIGGER, 0, NULL);
2168
2169         /*
2170          * Drop our lock and acquire the other thread's lock before
2171          * manipulating 
2172          */
2173         pthread_mutex_unlock(&dev->mutex);
2174         pthread_mutex_lock(&dev->peer_dev->mutex);
2175
2176         /*
2177          * Put the buffer on the reader thread's peer done queue now that
2178          * we have completed it.
2179          */
2180         STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
2181                            work_links);
2182         dev->peer_dev->num_peer_done_queue++;
2183
2184         /*
2185          * Send an event to the peer thread to let it know that we've added
2186          * something to its peer done queue.
2187          */
2188         retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2189         if (retval == -1)
2190                 warn("%s: unable to add peer_done_queue kevent", __func__);
2191         else
2192                 retval = 0;
2193
2194         /*
2195          * Drop the other thread's lock and reacquire ours.
2196          */
2197         pthread_mutex_unlock(&dev->peer_dev->mutex);
2198         pthread_mutex_lock(&dev->mutex);
2199
2200         return (retval);
2201 }
2202
2203 /*
2204  * Free a buffer that was written out by the writer thread and returned to
2205  * the reader thread.
2206  */
2207 void
2208 camdd_peer_done(struct camdd_buf *buf)
2209 {
2210         struct camdd_dev *dev;
2211         struct camdd_buf_data *data;
2212
2213         dev = buf->dev;
2214         if (buf->buf_type != CAMDD_BUF_DATA) {
2215                 errx(1, "%s: should have a data buffer, not an "
2216                     "indirect buffer", __func__);
2217         }
2218
2219         data = &buf->buf_type_spec.data;
2220
2221         STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
2222         dev->num_peer_work_queue--;
2223         dev->peer_bytes_queued -= (data->fill_len - data->resid);
2224
2225         if (buf->status == CAMDD_STATUS_EOF)
2226                 dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2227
2228         STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2229 }
2230
2231 /*
2232  * Assumes caller holds the lock for this device.
2233  */
2234 void
2235 camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
2236                    int *error_count)
2237 {
2238         int retval = 0;
2239
2240         /*
2241          * If we're the reader, we need to send the completed I/O
2242          * to the writer.  If we're the writer, we need to just
2243          * free up resources, or let the reader know if we've
2244          * encountered an error.
2245          */
2246         if (dev->write_dev == 0) {
2247                 retval = camdd_queue_peer_buf(dev, buf);
2248                 if (retval != 0)
2249                         (*error_count)++;
2250         } else {
2251                 struct camdd_buf *tmp_buf, *next_buf;
2252
2253                 STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
2254                                     next_buf) {
2255                         struct camdd_buf *src_buf;
2256                         struct camdd_buf_indirect *indirect;
2257
2258                         STAILQ_REMOVE(&buf->src_list, tmp_buf,
2259                                       camdd_buf, src_links);
2260
2261                         tmp_buf->status = buf->status;
2262
2263                         if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
2264                                 camdd_complete_peer_buf(dev, tmp_buf);
2265                                 continue;
2266                         }
2267
2268                         indirect = &tmp_buf->buf_type_spec.indirect;
2269                         src_buf = indirect->src_buf;
2270                         src_buf->refcount--;
2271                         /*
2272                          * XXX KDM we probably need to account for
2273                          * exactly how many bytes we were able to
2274                          * write.  Allocate the residual to the
2275                          * first N buffers?  Or just track the
2276                          * number of bytes written?  Right now the reader
2277                          * doesn't do anything with a residual.
2278                          */
2279                         src_buf->status = buf->status;
2280                         if (src_buf->refcount <= 0)
2281                                 camdd_complete_peer_buf(dev, src_buf);
2282                         STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
2283                                            tmp_buf, links);
2284                 }
2285
2286                 STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2287         }
2288 }
2289
2290 /*
2291  * Fetch all completed commands from the pass(4) device.
2292  *
2293  * Returns the number of commands received, or -1 if any of the commands
2294  * completed with an error.  Returns 0 if no commands are available.
2295  */
2296 int
2297 camdd_pass_fetch(struct camdd_dev *dev)
2298 {
2299         struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2300         union ccb ccb;
2301         int retval = 0, num_fetched = 0, error_count = 0;
2302
2303         pthread_mutex_unlock(&dev->mutex);
2304         /*
2305          * XXX KDM we don't distinguish between EFAULT and ENOENT.
2306          */
2307         while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
2308                 struct camdd_buf *buf;
2309                 struct camdd_buf_data *data;
2310                 cam_status ccb_status;
2311                 union ccb *buf_ccb;
2312
2313                 buf = ccb.ccb_h.ccb_buf;
2314                 data = &buf->buf_type_spec.data;
2315                 buf_ccb = &data->ccb;
2316
2317                 num_fetched++;
2318
2319                 /*
2320                  * Copy the CCB back out so we get status, sense data, etc.
2321                  */
2322                 bcopy(&ccb, buf_ccb, sizeof(ccb));
2323
2324                 pthread_mutex_lock(&dev->mutex);
2325
2326                 /*
2327                  * We're now done, so take this off the active queue.
2328                  */
2329                 STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
2330                 dev->cur_active_io--;
2331
2332                 ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
2333                 if (ccb_status != CAM_REQ_CMP) {
2334                         cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
2335                                         CAM_EPF_ALL, stderr);
2336                 }
2337
2338                 switch (pass_dev->protocol) {
2339                 case PROTO_SCSI:
2340                         data->resid = ccb.csio.resid;
2341                         dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
2342                         break;
2343                 case PROTO_NVME:
2344                         data->resid = 0;
2345                         dev->bytes_transferred += ccb.nvmeio.dxfer_len;
2346                         break;
2347                 default:
2348                         return -1;
2349                         break;
2350                 }
2351
2352                 if (buf->status == CAMDD_STATUS_NONE)
2353                         buf->status = camdd_ccb_status(&ccb, pass_dev->protocol);
2354                 if (buf->status == CAMDD_STATUS_ERROR)
2355                         error_count++;
2356                 else if (buf->status == CAMDD_STATUS_EOF) {
2357                         /*
2358                          * Once we queue this buffer to our partner thread,
2359                          * he will know that we've hit EOF.
2360                          */
2361                         dev->flags |= CAMDD_DEV_FLAG_EOF;
2362                 }
2363
2364                 camdd_complete_buf(dev, buf, &error_count);
2365
2366                 /*
2367                  * Unlock in preparation for the ioctl call.
2368                  */
2369                 pthread_mutex_unlock(&dev->mutex);
2370         }
2371
2372         pthread_mutex_lock(&dev->mutex);
2373
2374         if (error_count > 0)
2375                 return (-1);
2376         else
2377                 return (num_fetched);
2378 }
2379
2380 /*
2381  * Returns -1 for error, 0 for success/continue, and 1 for resource
2382  * shortage/stop processing.
2383  */
2384 int
2385 camdd_file_run(struct camdd_dev *dev)
2386 {
2387         struct camdd_dev_file *file_dev = &dev->dev_spec.file;
2388         struct camdd_buf_data *data;
2389         struct camdd_buf *buf;
2390         off_t io_offset;
2391         int retval = 0, write_dev = dev->write_dev;
2392         int error_count = 0, no_resources = 0, double_buf_needed = 0;
2393         uint32_t num_sectors = 0, db_len = 0;
2394
2395         buf = STAILQ_FIRST(&dev->run_queue);
2396         if (buf == NULL) {
2397                 no_resources = 1;
2398                 goto bailout;
2399         } else if ((dev->write_dev == 0)
2400                 && (dev->flags & (CAMDD_DEV_FLAG_EOF |
2401                                   CAMDD_DEV_FLAG_EOF_SENT))) {
2402                 STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2403                 dev->num_run_queue--;
2404                 buf->status = CAMDD_STATUS_EOF;
2405                 error_count++;
2406                 goto bailout;
2407         }
2408
2409         /*
2410          * If we're writing, we need to go through the source buffer list
2411          * and create an S/G list.
2412          */
2413         if (write_dev != 0) {
2414                 retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
2415                     dev->sector_size, &num_sectors, &double_buf_needed);
2416                 if (retval != 0) {
2417                         no_resources = 1;
2418                         goto bailout;
2419                 }
2420         }
2421
2422         STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2423         dev->num_run_queue--;
2424
2425         data = &buf->buf_type_spec.data;
2426
2427         /*
2428          * pread(2) and pwrite(2) offsets are byte offsets.
2429          */
2430         io_offset = buf->lba * dev->sector_size;
2431
2432         /*
2433          * Unlock the mutex while we read or write.
2434          */
2435         pthread_mutex_unlock(&dev->mutex);
2436
2437         /*
2438          * Note that we don't need to double buffer if we're the reader
2439          * because in that case, we have allocated a single buffer of
2440          * sufficient size to do the read.  This copy is necessary on
2441          * writes because if one of the components of the S/G list is not
2442          * a sector size multiple, the kernel will reject the write.  This
2443          * is unfortunate but not surprising.  So this will make sure that
2444          * we're using a single buffer that is a multiple of the sector size.
2445          */
2446         if ((double_buf_needed != 0)
2447          && (data->sg_count > 1)
2448          && (write_dev != 0)) {
2449                 uint32_t cur_offset;
2450                 int i;
2451
2452                 if (file_dev->tmp_buf == NULL)
2453                         file_dev->tmp_buf = calloc(dev->blocksize, 1);
2454                 if (file_dev->tmp_buf == NULL) {
2455                         buf->status = CAMDD_STATUS_ERROR;
2456                         error_count++;
2457                         pthread_mutex_lock(&dev->mutex);
2458                         goto bailout;
2459                 }
2460                 for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
2461                         bcopy(data->iovec[i].iov_base,
2462                             &file_dev->tmp_buf[cur_offset],
2463                             data->iovec[i].iov_len);
2464                         cur_offset += data->iovec[i].iov_len;
2465                 }
2466                 db_len = cur_offset;
2467         }
2468
2469         if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
2470                 if (write_dev == 0) {
2471                         /*
2472                          * XXX KDM is there any way we would need a S/G
2473                          * list here?
2474                          */
2475                         retval = pread(file_dev->fd, data->buf,
2476                             buf->len, io_offset);
2477                 } else {
2478                         if (double_buf_needed != 0) {
2479                                 retval = pwrite(file_dev->fd, file_dev->tmp_buf,
2480                                     db_len, io_offset);
2481                         } else if (data->sg_count == 0) {
2482                                 retval = pwrite(file_dev->fd, data->buf,
2483                                     data->fill_len, io_offset);
2484                         } else {
2485                                 retval = pwritev(file_dev->fd, data->iovec,
2486                                     data->sg_count, io_offset);
2487                         }
2488                 }
2489         } else {
2490                 if (write_dev == 0) {
2491                         /*
2492                          * XXX KDM is there any way we would need a S/G
2493                          * list here?
2494                          */
2495                         retval = read(file_dev->fd, data->buf, buf->len);
2496                 } else {
2497                         if (double_buf_needed != 0) {
2498                                 retval = write(file_dev->fd, file_dev->tmp_buf,
2499                                     db_len);
2500                         } else if (data->sg_count == 0) {
2501                                 retval = write(file_dev->fd, data->buf,
2502                                     data->fill_len);
2503                         } else {
2504                                 retval = writev(file_dev->fd, data->iovec,
2505                                     data->sg_count);
2506                         }
2507                 }
2508         }
2509
2510         /* We're done, re-acquire the lock */
2511         pthread_mutex_lock(&dev->mutex);
2512
2513         if (retval >= (ssize_t)data->fill_len) {
2514                 /*
2515                  * If the bytes transferred is more than the request size,
2516                  * that indicates an overrun, which should only happen at
2517                  * the end of a transfer if we have to round up to a sector
2518                  * boundary.
2519                  */
2520                 if (buf->status == CAMDD_STATUS_NONE)
2521                         buf->status = CAMDD_STATUS_OK;
2522                 data->resid = 0;
2523                 dev->bytes_transferred += retval;
2524         } else if (retval == -1) {
2525                 warn("Error %s %s", (write_dev) ? "writing to" :
2526                     "reading from", file_dev->filename);
2527
2528                 buf->status = CAMDD_STATUS_ERROR;
2529                 data->resid = data->fill_len;
2530                 error_count++;
2531
2532                 if (dev->debug == 0)
2533                         goto bailout;
2534
2535                 if ((double_buf_needed != 0)
2536                  && (write_dev != 0)) {
2537                         fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
2538                             "offset %ju\n", __func__, file_dev->fd,
2539                             file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
2540                             (uintmax_t)io_offset);
2541                 } else if (data->sg_count == 0) {
2542                         fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
2543                             "offset %ju\n", __func__, file_dev->fd, data->buf,
2544                             data->fill_len, (uintmax_t)buf->lba,
2545                             (uintmax_t)io_offset);
2546                 } else {
2547                         int i;
2548
2549                         fprintf(stderr, "%s: fd %d, len %u, lba %ju "
2550                             "offset %ju\n", __func__, file_dev->fd, 
2551                             data->fill_len, (uintmax_t)buf->lba,
2552                             (uintmax_t)io_offset);
2553
2554                         for (i = 0; i < data->sg_count; i++) {
2555                                 fprintf(stderr, "index %d ptr %p len %zu\n",
2556                                     i, data->iovec[i].iov_base,
2557                                     data->iovec[i].iov_len);
2558                         }
2559                 }
2560         } else if (retval == 0) {
2561                 buf->status = CAMDD_STATUS_EOF;
2562                 if (dev->debug != 0)
2563                         printf("%s: got EOF from %s!\n", __func__,
2564                             file_dev->filename);
2565                 data->resid = data->fill_len;
2566                 error_count++;
2567         } else if (retval < (ssize_t)data->fill_len) {
2568                 if (buf->status == CAMDD_STATUS_NONE)
2569                         buf->status = CAMDD_STATUS_SHORT_IO;
2570                 data->resid = data->fill_len - retval;
2571                 dev->bytes_transferred += retval;
2572         }
2573
2574 bailout:
2575         if (buf != NULL) {
2576                 if (buf->status == CAMDD_STATUS_EOF) {
2577                         struct camdd_buf *buf2;
2578                         dev->flags |= CAMDD_DEV_FLAG_EOF;
2579                         STAILQ_FOREACH(buf2, &dev->run_queue, links)
2580                                 buf2->status = CAMDD_STATUS_EOF;
2581                 }
2582
2583                 camdd_complete_buf(dev, buf, &error_count);
2584         }
2585
2586         if (error_count != 0)
2587                 return (-1);
2588         else if (no_resources != 0)
2589                 return (1);
2590         else
2591                 return (0);
2592 }
2593
2594 /*
2595  * Execute one command from the run queue.  Returns 0 for success, 1 for
2596  * stop processing, and -1 for error.
2597  */
2598 int
2599 camdd_pass_run(struct camdd_dev *dev)
2600 {
2601         struct camdd_buf *buf = NULL;
2602         struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2603         struct camdd_buf_data *data;
2604         uint32_t num_blocks, sectors_used = 0;
2605         union ccb *ccb;
2606         int retval = 0, is_write = dev->write_dev;
2607         int double_buf_needed = 0;
2608
2609         buf = STAILQ_FIRST(&dev->run_queue);
2610         if (buf == NULL) {
2611                 retval = 1;
2612                 goto bailout;
2613         }
2614
2615         /*
2616          * If we're writing, we need to go through the source buffer list
2617          * and create an S/G list.
2618          */
2619         if (is_write != 0) {
2620                 retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
2621                     &sectors_used, &double_buf_needed);
2622                 if (retval != 0) {
2623                         retval = -1;
2624                         goto bailout;
2625                 }
2626         }
2627
2628         STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2629         dev->num_run_queue--;
2630
2631         data = &buf->buf_type_spec.data;
2632
2633         /*
2634          * In almost every case the number of blocks should be the device
2635          * block size.  The exception may be at the end of an I/O stream
2636          * for a partial block or at the end of a device.
2637          */
2638         if (is_write != 0)
2639                 num_blocks = sectors_used;
2640         else
2641                 num_blocks = data->fill_len / pass_dev->block_len;
2642
2643         ccb = &data->ccb;
2644
2645         switch (pass_dev->protocol) {
2646         case PROTO_SCSI:
2647                 CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
2648
2649                 scsi_read_write(&ccb->csio,
2650                                 /*retries*/ dev->retry_count,
2651                                 /*cbfcnp*/ NULL,
2652                                 /*tag_action*/ MSG_SIMPLE_Q_TAG,
2653                                 /*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
2654                                            SCSI_RW_WRITE,
2655                                 /*byte2*/ 0,
2656                                 /*minimum_cmd_size*/ dev->min_cmd_size,
2657                                 /*lba*/ buf->lba,
2658                                 /*block_count*/ num_blocks,
2659                                 /*data_ptr*/ (data->sg_count != 0) ?
2660                                              (uint8_t *)data->segs : data->buf,
2661                                 /*dxfer_len*/ (num_blocks * pass_dev->block_len),
2662                                 /*sense_len*/ SSD_FULL_SIZE,
2663                                 /*timeout*/ dev->io_timeout);
2664
2665                 if (data->sg_count != 0) {
2666                         ccb->csio.sglist_cnt = data->sg_count;
2667                 }
2668                 break;
2669         case PROTO_NVME:
2670                 CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
2671
2672                 nvme_read_write(&ccb->nvmeio,
2673                                 /*retries*/ dev->retry_count,
2674                                 /*cbfcnp*/ NULL,
2675                                 /*nsid*/ pass_dev->dev->target_lun & UINT32_MAX,
2676                                 /*readop*/ dev->write_dev == 0,
2677                                 /*lba*/ buf->lba,
2678                                 /*block_count*/ num_blocks,
2679                                 /*data_ptr*/ (data->sg_count != 0) ?
2680                                              (uint8_t *)data->segs : data->buf,
2681                                 /*dxfer_len*/ (num_blocks * pass_dev->block_len),
2682                                 /*timeout*/ dev->io_timeout);
2683
2684                 ccb->nvmeio.sglist_cnt = data->sg_count;
2685                 break;
2686         default:
2687                 retval = -1;
2688                 goto bailout;
2689         }
2690
2691         /* Disable freezing the device queue */
2692         ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
2693
2694         if (dev->retry_count != 0)
2695                 ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
2696
2697         if (data->sg_count != 0) {
2698                 ccb->ccb_h.flags |= CAM_DATA_SG;
2699         }
2700
2701         /*
2702          * Store a pointer to the buffer in the CCB.  The kernel will
2703          * restore this when we get it back, and we'll use it to identify
2704          * the buffer this CCB came from.
2705          */
2706         ccb->ccb_h.ccb_buf = buf;
2707
2708         /*
2709          * Unlock our mutex in preparation for issuing the ioctl.
2710          */
2711         pthread_mutex_unlock(&dev->mutex);
2712         /*
2713          * Queue the CCB to the pass(4) driver.
2714          */
2715         if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
2716                 pthread_mutex_lock(&dev->mutex);
2717
2718                 warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
2719                      pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
2720                 warn("%s: CCB address is %p", __func__, ccb);
2721                 retval = -1;
2722
2723                 STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2724         } else {
2725                 pthread_mutex_lock(&dev->mutex);
2726
2727                 dev->cur_active_io++;
2728                 STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
2729         }
2730
2731 bailout:
2732         return (retval);
2733 }
2734
2735 int
2736 camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
2737 {
2738         uint32_t num_blocks;
2739         int retval = 0;
2740
2741         *lba = dev->next_io_pos_bytes / dev->sector_size;
2742         *len = dev->blocksize;
2743         num_blocks = *len / dev->sector_size;
2744
2745         /*
2746          * If max_sector is 0, then we have no set limit.  This can happen
2747          * if we're writing to a file in a filesystem, or reading from
2748          * something like /dev/zero.
2749          */
2750         if ((dev->max_sector != 0)
2751          || (dev->sector_io_limit != 0)) {
2752                 uint64_t max_sector;
2753
2754                 if ((dev->max_sector != 0)
2755                  && (dev->sector_io_limit != 0)) 
2756                         max_sector = min(dev->sector_io_limit, dev->max_sector);
2757                 else if (dev->max_sector != 0)
2758                         max_sector = dev->max_sector;
2759                 else
2760                         max_sector = dev->sector_io_limit;
2761
2762
2763                 /*
2764                  * Check to see whether we're starting off past the end of
2765                  * the device.  If so, we need to just send an EOF      
2766                  * notification to the writer.
2767                  */
2768                 if (*lba > max_sector) {
2769                         *len = 0;
2770                         retval = 1;
2771                 } else if (((*lba + num_blocks) > max_sector + 1)
2772                         || ((*lba + num_blocks) < *lba)) {
2773                         /*
2774                          * If we get here (but pass the first check), we
2775                          * can trim the request length down to go to the
2776                          * end of the device.
2777                          */
2778                         num_blocks = (max_sector + 1) - *lba;
2779                         *len = num_blocks * dev->sector_size;
2780                         retval = 1;
2781                 }
2782         }
2783
2784         dev->next_io_pos_bytes += *len;
2785
2786         return (retval);
2787 }
2788
2789 /*
2790  * Returns 0 for success, 1 for EOF detected, and -1 for failure.
2791  */
2792 int
2793 camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
2794 {
2795         struct camdd_buf *buf = NULL;
2796         struct camdd_buf_data *data;
2797         size_t new_len;
2798         struct camdd_buf_data *rb_data;
2799         int is_write = dev->write_dev;
2800         int eof_flush_needed = 0;
2801         int retval = 0;
2802
2803         /*
2804          * If we've gotten EOF or our partner has, we should not continue
2805          * queueing I/O.  If we're a writer, though, we should continue
2806          * to write any buffers that don't have EOF status.
2807          */
2808         if ((dev->flags & CAMDD_DEV_FLAG_EOF)
2809          || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
2810           && (is_write == 0))) {
2811                 /*
2812                  * Tell the worker thread that we have seen EOF.
2813                  */
2814                 retval = 1;
2815
2816                 /*
2817                  * If we're the writer, send the buffer back with EOF status.
2818                  */
2819                 if (is_write) {
2820                         read_buf->status = CAMDD_STATUS_EOF;
2821                         
2822                         camdd_complete_peer_buf(dev, read_buf);
2823                 }
2824                 goto bailout;
2825         }
2826
2827         if (is_write == 0) {
2828                 buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2829                 if (buf == NULL) {
2830                         retval = -1;
2831                         goto bailout;
2832                 }
2833                 data = &buf->buf_type_spec.data;
2834
2835                 retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
2836                 if (retval != 0) {
2837                         buf->status = CAMDD_STATUS_EOF;
2838
2839                         if ((buf->len == 0)
2840                          && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
2841                              CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
2842                                 camdd_release_buf(buf);
2843                                 goto bailout;
2844                         }
2845                         dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
2846                 }
2847
2848                 data->fill_len = buf->len;
2849                 data->src_start_offset = buf->lba * dev->sector_size;
2850
2851                 /*
2852                  * Put this on the run queue.
2853                  */
2854                 STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2855                 dev->num_run_queue++;
2856
2857                 /* We're done. */
2858                 goto bailout;
2859         }
2860
2861         /*
2862          * Check for new EOF status from the reader.
2863          */
2864         if ((read_buf->status == CAMDD_STATUS_EOF)
2865          || (read_buf->status == CAMDD_STATUS_ERROR)) {
2866                 dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2867                 if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
2868                  && (read_buf->len == 0)) {
2869                         camdd_complete_peer_buf(dev, read_buf);
2870                         retval = 1;
2871                         goto bailout;
2872                 } else
2873                         eof_flush_needed = 1;
2874         }
2875
2876         /*
2877          * See if we have a buffer we're composing with pieces from our
2878          * partner thread.
2879          */
2880         buf = STAILQ_FIRST(&dev->pending_queue);
2881         if (buf == NULL) {
2882                 uint64_t lba;
2883                 ssize_t len;
2884
2885                 retval = camdd_get_next_lba_len(dev, &lba, &len);
2886                 if (retval != 0) {
2887                         read_buf->status = CAMDD_STATUS_EOF;
2888
2889                         if (len == 0) {
2890                                 dev->flags |= CAMDD_DEV_FLAG_EOF;
2891                                 camdd_complete_peer_buf(dev, read_buf);
2892                                 goto bailout;
2893                         }
2894                 }
2895
2896                 /*
2897                  * If we don't have a pending buffer, we need to grab a new
2898                  * one from the free list or allocate another one.
2899                  */
2900                 buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2901                 if (buf == NULL) {
2902                         retval = 1;
2903                         goto bailout;
2904                 }
2905
2906                 buf->lba = lba;
2907                 buf->len = len;
2908
2909                 STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
2910                 dev->num_pending_queue++;
2911         }
2912
2913         data = &buf->buf_type_spec.data;
2914
2915         rb_data = &read_buf->buf_type_spec.data;
2916
2917         if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
2918          && (dev->debug != 0)) {
2919                 printf("%s: WARNING: reader offset %#jx != expected offset "
2920                     "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
2921                     (uintmax_t)dev->next_peer_pos_bytes);
2922         }
2923         dev->next_peer_pos_bytes = rb_data->src_start_offset +
2924             (rb_data->fill_len - rb_data->resid);
2925
2926         new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
2927         if (new_len < buf->len) {
2928                 /*
2929                  * There are three cases here:
2930                  * 1. We need more data to fill up a block, so we put 
2931                  *    this I/O on the queue and wait for more I/O.
2932                  * 2. We have a pending buffer in the queue that is
2933                  *    smaller than our blocksize, but we got an EOF.  So we
2934                  *    need to go ahead and flush the write out.
2935                  * 3. We got an error.
2936                  */
2937
2938                 /*
2939                  * Increment our fill length.
2940                  */
2941                 data->fill_len += (rb_data->fill_len - rb_data->resid);
2942
2943                 /*
2944                  * Add the new read buffer to the list for writing.
2945                  */
2946                 STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2947
2948                 /* Increment the count */
2949                 buf->src_count++;
2950
2951                 if (eof_flush_needed == 0) {
2952                         /*
2953                          * We need to exit, because we don't have enough
2954                          * data yet.
2955                          */
2956                         goto bailout;
2957                 } else {
2958                         /*
2959                          * Take the buffer off of the pending queue.
2960                          */
2961                         STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2962                                       links);
2963                         dev->num_pending_queue--;
2964
2965                         /*
2966                          * If we need an EOF flush, but there is no data
2967                          * to flush, go ahead and return this buffer.
2968                          */
2969                         if (data->fill_len == 0) {
2970                                 camdd_complete_buf(dev, buf, /*error_count*/0);
2971                                 retval = 1;
2972                                 goto bailout;
2973                         }
2974
2975                         /*
2976                          * Put this on the next queue for execution.
2977                          */
2978                         STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2979                         dev->num_run_queue++;
2980                 }
2981         } else if (new_len == buf->len) {
2982                 /*
2983                  * We have enough data to completey fill one block,
2984                  * so we're ready to issue the I/O.
2985                  */
2986
2987                 /*
2988                  * Take the buffer off of the pending queue.
2989                  */
2990                 STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
2991                 dev->num_pending_queue--;
2992
2993                 /*
2994                  * Add the new read buffer to the list for writing.
2995                  */
2996                 STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2997
2998                 /* Increment the count */
2999                 buf->src_count++;
3000
3001                 /*
3002                  * Increment our fill length.
3003                  */
3004                 data->fill_len += (rb_data->fill_len - rb_data->resid);
3005
3006                 /*
3007                  * Put this on the next queue for execution.
3008                  */
3009                 STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3010                 dev->num_run_queue++;
3011         } else {
3012                 struct camdd_buf *idb;
3013                 struct camdd_buf_indirect *indirect;
3014                 uint32_t len_to_go, cur_offset;
3015
3016                 
3017                 idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3018                 if (idb == NULL) {
3019                         retval = 1;
3020                         goto bailout;
3021                 }
3022                 indirect = &idb->buf_type_spec.indirect;
3023                 indirect->src_buf = read_buf;
3024                 read_buf->refcount++;
3025                 indirect->offset = 0;
3026                 indirect->start_ptr = rb_data->buf;
3027                 /*
3028                  * We've already established that there is more
3029                  * data in read_buf than we have room for in our
3030                  * current write request.  So this particular chunk
3031                  * of the request should just be the remainder
3032                  * needed to fill up a block.
3033                  */
3034                 indirect->len = buf->len - (data->fill_len - data->resid);
3035
3036                 camdd_buf_add_child(buf, idb);
3037
3038                 /*
3039                  * This buffer is ready to execute, so we can take
3040                  * it off the pending queue and put it on the run
3041                  * queue.
3042                  */
3043                 STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
3044                               links);
3045                 dev->num_pending_queue--;
3046                 STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3047                 dev->num_run_queue++;
3048
3049                 cur_offset = indirect->offset + indirect->len;
3050
3051                 /*
3052                  * The resulting I/O would be too large to fit in
3053                  * one block.  We need to split this I/O into
3054                  * multiple pieces.  Allocate as many buffers as needed.
3055                  */
3056                 for (len_to_go = rb_data->fill_len - rb_data->resid -
3057                      indirect->len; len_to_go > 0;) {
3058                         struct camdd_buf *new_buf;
3059                         struct camdd_buf_data *new_data;
3060                         uint64_t lba;
3061                         ssize_t len;
3062
3063                         retval = camdd_get_next_lba_len(dev, &lba, &len);
3064                         if ((retval != 0)
3065                          && (len == 0)) {
3066                                 /*
3067                                  * The device has already been marked
3068                                  * as EOF, and there is no space left.
3069                                  */
3070                                 goto bailout;
3071                         }
3072
3073                         new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
3074                         if (new_buf == NULL) {
3075                                 retval = 1;
3076                                 goto bailout;
3077                         }
3078
3079                         new_buf->lba = lba;
3080                         new_buf->len = len;
3081
3082                         idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3083                         if (idb == NULL) {
3084                                 retval = 1;
3085                                 goto bailout;
3086                         }
3087
3088                         indirect = &idb->buf_type_spec.indirect;
3089
3090                         indirect->src_buf = read_buf;
3091                         read_buf->refcount++;
3092                         indirect->offset = cur_offset;
3093                         indirect->start_ptr = rb_data->buf + cur_offset;
3094                         indirect->len = min(len_to_go, new_buf->len);
3095 #if 0
3096                         if (((indirect->len % dev->sector_size) != 0)
3097                          || ((indirect->offset % dev->sector_size) != 0)) {
3098                                 warnx("offset %ju len %ju not aligned with "
3099                                     "sector size %u", indirect->offset,
3100                                     (uintmax_t)indirect->len, dev->sector_size);
3101                         }
3102 #endif
3103                         cur_offset += indirect->len;
3104                         len_to_go -= indirect->len;
3105
3106                         camdd_buf_add_child(new_buf, idb);
3107
3108                         new_data = &new_buf->buf_type_spec.data;
3109
3110                         if ((new_data->fill_len == new_buf->len)
3111                          || (eof_flush_needed != 0)) {
3112                                 STAILQ_INSERT_TAIL(&dev->run_queue,
3113                                                    new_buf, links);
3114                                 dev->num_run_queue++;
3115                         } else if (new_data->fill_len < buf->len) {
3116                                 STAILQ_INSERT_TAIL(&dev->pending_queue,
3117                                                 new_buf, links);
3118                                 dev->num_pending_queue++;
3119                         } else {
3120                                 warnx("%s: too much data in new "
3121                                       "buffer!", __func__);
3122                                 retval = 1;
3123                                 goto bailout;
3124                         }
3125                 }
3126         }
3127
3128 bailout:
3129         return (retval);
3130 }
3131
3132 void
3133 camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
3134                 uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
3135 {
3136         *our_depth = dev->cur_active_io + dev->num_run_queue;
3137         if (dev->num_peer_work_queue >
3138             dev->num_peer_done_queue)
3139                 *peer_depth = dev->num_peer_work_queue -
3140                               dev->num_peer_done_queue;
3141         else
3142                 *peer_depth = 0;
3143         *our_bytes = *our_depth * dev->blocksize;
3144         *peer_bytes = dev->peer_bytes_queued;
3145 }
3146
3147 void
3148 camdd_sig_handler(int sig)
3149 {
3150         if (sig == SIGINFO)
3151                 need_status = 1;
3152         else {
3153                 need_exit = 1;
3154                 error_exit = 1;
3155         }
3156
3157         sem_post(&camdd_sem);
3158 }
3159
3160 void
3161 camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev, 
3162                    struct timespec *start_time)
3163 {
3164         struct timespec done_time;
3165         uint64_t total_ns;
3166         long double mb_sec, total_sec;
3167         int error = 0;
3168
3169         error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
3170         if (error != 0) {
3171                 warn("Unable to get done time");
3172                 return;
3173         }
3174
3175         timespecsub(&done_time, start_time, &done_time);
3176         
3177         total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
3178         total_sec = total_ns;
3179         total_sec /= 1000000000;
3180
3181         fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
3182                 "%.4Lf seconds elapsed\n",
3183                 (uintmax_t)camdd_dev->bytes_transferred,
3184                 (camdd_dev->write_dev == 0) ?  "read from" : "written to",
3185                 camdd_dev->device_name,
3186                 (uintmax_t)other_dev->bytes_transferred,
3187                 (other_dev->write_dev == 0) ? "read from" : "written to",
3188                 other_dev->device_name, total_sec);
3189
3190         mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
3191         mb_sec /= 1024 * 1024;
3192         mb_sec *= 1000000000;
3193         mb_sec /= total_ns;
3194         fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
3195 }
3196
3197 int
3198 camdd_rw(struct camdd_io_opts *io_opts, camdd_argmask arglist, int num_io_opts,
3199          uint64_t max_io, int retry_count, int timeout)
3200 {
3201         struct cam_device *new_cam_dev = NULL;
3202         struct camdd_dev *devs[2];
3203         struct timespec start_time;
3204         pthread_t threads[2];
3205         int unit = 0;
3206         int error = 0;
3207         int i;
3208
3209         bzero(devs, sizeof(devs));
3210
3211         if (num_io_opts != 2) {
3212                 warnx("Must have one input and one output path");
3213                 error = 1;
3214                 goto bailout;
3215         }
3216
3217         for (i = 0; i < num_io_opts; i++) {
3218                 switch (io_opts[i].dev_type) {
3219                 case CAMDD_DEV_PASS: {
3220                         if (isdigit(io_opts[i].dev_name[0])) {
3221                                 int bus = 0, target = 0, lun = 0;
3222                                 int rv;
3223
3224                                 /* device specified as bus:target[:lun] */
3225                                 rv = parse_btl(io_opts[i].dev_name, &bus,
3226                                     &target, &lun);
3227                                 if (rv < 2) {
3228                                         warnx("numeric device specification "
3229                                              "must be either bus:target, or "
3230                                              "bus:target:lun");
3231                                         error = 1;
3232                                         goto bailout;
3233                                 }
3234                                 /* default to 0 if lun was not specified */
3235                                 if (rv == 2) {
3236                                         lun = 0;
3237                                 }
3238                                 new_cam_dev = cam_open_btl(bus, target, lun,
3239                                     O_RDWR, NULL);
3240                         } else {
3241                                 char name[30];
3242
3243                                 if (cam_get_device(io_opts[i].dev_name, name,
3244                                                    sizeof name, &unit) == -1) {
3245                                         warnx("%s", cam_errbuf);
3246                                         error = 1;
3247                                         goto bailout;
3248                                 }
3249                                 new_cam_dev = cam_open_spec_device(name, unit,
3250                                     O_RDWR, NULL);
3251                         }
3252
3253                         if (new_cam_dev == NULL) {
3254                                 warnx("%s", cam_errbuf);
3255                                 error = 1;
3256                                 goto bailout;
3257                         }
3258
3259                         devs[i] = camdd_probe_pass(new_cam_dev,
3260                             /*io_opts*/ &io_opts[i],
3261                             arglist, 
3262                             /*probe_retry_count*/ 3,
3263                             /*probe_timeout*/ 5000,
3264                             /*io_retry_count*/ retry_count,
3265                             /*io_timeout*/ timeout);
3266                         if (devs[i] == NULL) {
3267                                 warn("Unable to probe device %s%u",
3268                                      new_cam_dev->device_name,
3269                                      new_cam_dev->dev_unit_num);
3270                                 error = 1;
3271                                 goto bailout;
3272                         }
3273                         break;
3274                 }
3275                 case CAMDD_DEV_FILE: {
3276                         int fd = -1;
3277
3278                         if (io_opts[i].dev_name[0] == '-') {
3279                                 if (io_opts[i].write_dev != 0)
3280                                         fd = STDOUT_FILENO;
3281                                 else
3282                                         fd = STDIN_FILENO;
3283                         } else {
3284                                 if (io_opts[i].write_dev != 0) {
3285                                         fd = open(io_opts[i].dev_name,
3286                                             O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
3287                                 } else {
3288                                         fd = open(io_opts[i].dev_name,
3289                                             O_RDONLY);
3290                                 }
3291                         }
3292                         if (fd == -1) {
3293                                 warn("error opening file %s",
3294                                     io_opts[i].dev_name);
3295                                 error = 1;
3296                                 goto bailout;
3297                         }
3298
3299                         devs[i] = camdd_probe_file(fd, &io_opts[i],
3300                             retry_count, timeout);
3301                         if (devs[i] == NULL) {
3302                                 error = 1;
3303                                 goto bailout;
3304                         }
3305
3306                         break;
3307                 }
3308                 default:
3309                         warnx("Unknown device type %d (%s)",
3310                             io_opts[i].dev_type, io_opts[i].dev_name);
3311                         error = 1;
3312                         goto bailout;
3313                         break; /*NOTREACHED */
3314                 }
3315
3316                 devs[i]->write_dev = io_opts[i].write_dev;
3317
3318                 devs[i]->start_offset_bytes = io_opts[i].offset;
3319
3320                 if (max_io != 0) {
3321                         devs[i]->sector_io_limit =
3322                             (devs[i]->start_offset_bytes /
3323                             devs[i]->sector_size) +
3324                             (max_io / devs[i]->sector_size) - 1;
3325                 }
3326
3327                 devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
3328                 devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
3329         }
3330
3331         devs[0]->peer_dev = devs[1];
3332         devs[1]->peer_dev = devs[0];
3333         devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
3334         devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
3335
3336         sem_init(&camdd_sem, /*pshared*/ 0, 0);
3337
3338         signal(SIGINFO, camdd_sig_handler);
3339         signal(SIGINT, camdd_sig_handler);
3340
3341         error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
3342         if (error != 0) {
3343                 warn("Unable to get start time");
3344                 goto bailout;
3345         }
3346
3347         for (i = 0; i < num_io_opts; i++) {
3348                 error = pthread_create(&threads[i], NULL, camdd_worker,
3349                                        (void *)devs[i]);
3350                 if (error != 0) {
3351                         warnc(error, "pthread_create() failed");
3352                         goto bailout;
3353                 }
3354         }
3355
3356         for (;;) {
3357                 if ((sem_wait(&camdd_sem) == -1)
3358                  || (need_exit != 0)) {
3359                         struct kevent ke;
3360
3361                         for (i = 0; i < num_io_opts; i++) {
3362                                 EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
3363                                     EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
3364
3365                                 devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
3366
3367                                 error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
3368                                                 NULL);
3369                                 if (error == -1)
3370                                         warn("%s: unable to wake up thread",
3371                                             __func__);
3372                                 error = 0;
3373                         }
3374                         break;
3375                 } else if (need_status != 0) {
3376                         camdd_print_status(devs[0], devs[1], &start_time);
3377                         need_status = 0;
3378                 }
3379         } 
3380         for (i = 0; i < num_io_opts; i++) {
3381                 pthread_join(threads[i], NULL);
3382         }
3383
3384         camdd_print_status(devs[0], devs[1], &start_time);
3385
3386 bailout:
3387
3388         for (i = 0; i < num_io_opts; i++)
3389                 camdd_free_dev(devs[i]);
3390
3391         return (error + error_exit);
3392 }
3393
3394 void
3395 usage(void)
3396 {
3397         fprintf(stderr,
3398 "usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
3399 "              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
3400 "              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
3401 "              <-i|-o file=/dev/nsa0,bs=512K>\n"
3402 "              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
3403 "Option description\n"
3404 "-i <arg=val>  Specify input device/file and parameters\n"
3405 "-o <arg=val>  Specify output device/file and parameters\n"
3406 "Input and Output parameters\n"
3407 "pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
3408 "file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
3409 "              or - for stdin/stdout\n"
3410 "bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
3411 "offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
3412 "              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
3413 "depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
3414 "mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
3415 "Optional arguments\n"
3416 "-C retry_cnt  Specify a retry count for pass(4) devices\n"
3417 "-E            Enable CAM error recovery for pass(4) devices\n"
3418 "-m max_io     Specify the maximum amount to be transferred in bytes or\n"
3419 "              using K, G, M, etc. suffixes\n"
3420 "-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
3421 "-v            Enable verbose error recovery\n"
3422 "-h            Print this message\n");
3423 }
3424
3425
3426 int
3427 camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
3428 {
3429         char *tmpstr, *tmpstr2;
3430         char *orig_tmpstr = NULL;
3431         int retval = 0;
3432
3433         io_opts->write_dev = is_write;
3434
3435         tmpstr = strdup(args);
3436         if (tmpstr == NULL) {
3437                 warn("strdup failed");
3438                 retval = 1;
3439                 goto bailout;
3440         }
3441         orig_tmpstr = tmpstr;
3442         while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
3443                 char *name, *value;
3444
3445                 /*
3446                  * If the user creates an empty parameter by putting in two
3447                  * commas, skip over it and look for the next field.
3448                  */
3449                 if (*tmpstr2 == '\0')
3450                         continue;
3451
3452                 name = strsep(&tmpstr2, "=");
3453                 if (*name == '\0') {
3454                         warnx("Got empty I/O parameter name");
3455                         retval = 1;
3456                         goto bailout;
3457                 }
3458                 value = strsep(&tmpstr2, "=");
3459                 if ((value == NULL)
3460                  || (*value == '\0')) {
3461                         warnx("Empty I/O parameter value for %s", name);
3462                         retval = 1;
3463                         goto bailout;
3464                 }
3465                 if (strncasecmp(name, "file", 4) == 0) {
3466                         io_opts->dev_type = CAMDD_DEV_FILE;
3467                         io_opts->dev_name = strdup(value);
3468                         if (io_opts->dev_name == NULL) {
3469                                 warn("Error allocating memory");
3470                                 retval = 1;
3471                                 goto bailout;
3472                         }
3473                 } else if (strncasecmp(name, "pass", 4) == 0) {
3474                         io_opts->dev_type = CAMDD_DEV_PASS;
3475                         io_opts->dev_name = strdup(value);
3476                         if (io_opts->dev_name == NULL) {
3477                                 warn("Error allocating memory");
3478                                 retval = 1;
3479                                 goto bailout;
3480                         }
3481                 } else if ((strncasecmp(name, "bs", 2) == 0)
3482                         || (strncasecmp(name, "blocksize", 9) == 0)) {
3483                         retval = expand_number(value, &io_opts->blocksize);
3484                         if (retval == -1) {
3485                                 warn("expand_number(3) failed on %s=%s", name,
3486                                     value);
3487                                 retval = 1;
3488                                 goto bailout;
3489                         }
3490                 } else if (strncasecmp(name, "depth", 5) == 0) {
3491                         char *endptr;
3492
3493                         io_opts->queue_depth = strtoull(value, &endptr, 0);
3494                         if (*endptr != '\0') {
3495                                 warnx("invalid queue depth %s", value);
3496                                 retval = 1;
3497                                 goto bailout;
3498                         }
3499                 } else if (strncasecmp(name, "mcs", 3) == 0) {
3500                         char *endptr;
3501
3502                         io_opts->min_cmd_size = strtol(value, &endptr, 0);
3503                         if ((*endptr != '\0')
3504                          || ((io_opts->min_cmd_size > 16)
3505                           || (io_opts->min_cmd_size < 0))) {
3506                                 warnx("invalid minimum cmd size %s", value);
3507                                 retval = 1;
3508                                 goto bailout;
3509                         }
3510                 } else if (strncasecmp(name, "offset", 6) == 0) {
3511                         retval = expand_number(value, &io_opts->offset);
3512                         if (retval == -1) {
3513                                 warn("expand_number(3) failed on %s=%s", name,
3514                                     value);
3515                                 retval = 1;
3516                                 goto bailout;
3517                         }
3518                 } else if (strncasecmp(name, "debug", 5) == 0) {
3519                         char *endptr;
3520
3521                         io_opts->debug = strtoull(value, &endptr, 0);
3522                         if (*endptr != '\0') {
3523                                 warnx("invalid debug level %s", value);
3524                                 retval = 1;
3525                                 goto bailout;
3526                         }
3527                 } else {
3528                         warnx("Unrecognized parameter %s=%s", name, value);
3529                 }
3530         }
3531 bailout:
3532         free(orig_tmpstr);
3533
3534         return (retval);
3535 }
3536
3537 int
3538 main(int argc, char **argv)
3539 {
3540         int c;
3541         camdd_argmask arglist = CAMDD_ARG_NONE;
3542         int timeout = 0, retry_count = 1;
3543         int error = 0;
3544         uint64_t max_io = 0;
3545         struct camdd_io_opts *opt_list = NULL;
3546
3547         if (argc == 1) {
3548                 usage();
3549                 exit(1);
3550         }
3551
3552         opt_list = calloc(2, sizeof(struct camdd_io_opts));
3553         if (opt_list == NULL) {
3554                 warn("Unable to allocate option list");
3555                 error = 1;
3556                 goto bailout;
3557         }
3558
3559         while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
3560                 switch (c) {
3561                 case 'C':
3562                         retry_count = strtol(optarg, NULL, 0);
3563                         if (retry_count < 0)
3564                                 errx(1, "retry count %d is < 0",
3565                                      retry_count);
3566                         break;
3567                 case 'E':
3568                         arglist |= CAMDD_ARG_ERR_RECOVER;
3569                         break;
3570                 case 'i':
3571                 case 'o':
3572                         if (((c == 'i')
3573                           && (opt_list[0].dev_type != CAMDD_DEV_NONE))
3574                          || ((c == 'o')
3575                           && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
3576                                 errx(1, "Only one input and output path "
3577                                     "allowed");
3578                         }
3579                         error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
3580                             (c == 'o') ? &opt_list[1] : &opt_list[0]);
3581                         if (error != 0)
3582                                 goto bailout;
3583                         break;
3584                 case 'm':
3585                         error = expand_number(optarg, &max_io);
3586                         if (error == -1) {
3587                                 warn("invalid maximum I/O amount %s", optarg);
3588                                 error = 1;
3589                                 goto bailout;
3590                         }
3591                         break;
3592                 case 't':
3593                         timeout = strtol(optarg, NULL, 0);
3594                         if (timeout < 0)
3595                                 errx(1, "invalid timeout %d", timeout);
3596                         /* Convert the timeout from seconds to ms */
3597                         timeout *= 1000;
3598                         break;
3599                 case 'v':
3600                         arglist |= CAMDD_ARG_VERBOSE;
3601                         break;
3602                 case 'h':
3603                 default:
3604                         usage();
3605                         exit(1);
3606                         break; /*NOTREACHED*/
3607                 }
3608         }
3609
3610         if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
3611          || (opt_list[1].dev_type == CAMDD_DEV_NONE))
3612                 errx(1, "Must specify both -i and -o");
3613
3614         /*
3615          * Set the timeout if the user hasn't specified one.
3616          */
3617         if (timeout == 0)
3618                 timeout = CAMDD_PASS_RW_TIMEOUT;
3619
3620         error = camdd_rw(opt_list, arglist, 2, max_io, retry_count, timeout);
3621
3622 bailout:
3623         free(opt_list);
3624
3625         exit(error);
3626 }