]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/camdd/camdd.c
MFV r331695, 331700: 9166 zfs storage pool checkpoint
[FreeBSD/FreeBSD.git] / usr.sbin / camdd / camdd.c
1 /*-
2  * Copyright (c) 1997-2007 Kenneth D. Merry
3  * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions, and the following disclaimer,
11  *    without modification.
12  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13  *    substantially similar to the "NO WARRANTY" disclaimer below
14  *    ("Disclaimer") and any redistribution must be conditioned upon
15  *    including a substantially similar Disclaimer requirement for further
16  *    binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGES.
30  *
31  * Authors: Ken Merry           (Spectra Logic Corporation)
32  */
33
34 /*
35  * This is eventually intended to be:
36  * - A basic data transfer/copy utility
37  * - A simple benchmark utility
38  * - An example of how to use the asynchronous pass(4) driver interface.
39  */
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 #include <sys/ioctl.h>
44 #include <sys/stdint.h>
45 #include <sys/types.h>
46 #include <sys/endian.h>
47 #include <sys/param.h>
48 #include <sys/sbuf.h>
49 #include <sys/stat.h>
50 #include <sys/event.h>
51 #include <sys/time.h>
52 #include <sys/uio.h>
53 #include <vm/vm.h>
54 #include <machine/bus.h>
55 #include <sys/bus.h>
56 #include <sys/bus_dma.h>
57 #include <sys/mtio.h>
58 #include <sys/conf.h>
59 #include <sys/disk.h>
60
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <semaphore.h>
64 #include <string.h>
65 #include <unistd.h>
66 #include <inttypes.h>
67 #include <limits.h>
68 #include <fcntl.h>
69 #include <ctype.h>
70 #include <err.h>
71 #include <libutil.h>
72 #include <pthread.h>
73 #include <assert.h>
74 #include <bsdxml.h>
75
76 #include <cam/cam.h>
77 #include <cam/cam_debug.h>
78 #include <cam/cam_ccb.h>
79 #include <cam/scsi/scsi_all.h>
80 #include <cam/scsi/scsi_da.h>
81 #include <cam/scsi/scsi_pass.h>
82 #include <cam/scsi/scsi_message.h>
83 #include <cam/scsi/smp_all.h>
84 #include <camlib.h>
85 #include <mtlib.h>
86 #include <zlib.h>
87
88 typedef enum {
89         CAMDD_CMD_NONE          = 0x00000000,
90         CAMDD_CMD_HELP          = 0x00000001,
91         CAMDD_CMD_WRITE         = 0x00000002,
92         CAMDD_CMD_READ          = 0x00000003
93 } camdd_cmdmask;
94
95 typedef enum {
96         CAMDD_ARG_NONE          = 0x00000000,
97         CAMDD_ARG_VERBOSE       = 0x00000001,
98         CAMDD_ARG_DEVICE        = 0x00000002,
99         CAMDD_ARG_BUS           = 0x00000004,
100         CAMDD_ARG_TARGET        = 0x00000008,
101         CAMDD_ARG_LUN           = 0x00000010,
102         CAMDD_ARG_UNIT          = 0x00000020,
103         CAMDD_ARG_TIMEOUT       = 0x00000040,
104         CAMDD_ARG_ERR_RECOVER   = 0x00000080,
105         CAMDD_ARG_RETRIES       = 0x00000100
106 } camdd_argmask;
107
108 typedef enum {
109         CAMDD_DEV_NONE          = 0x00,
110         CAMDD_DEV_PASS          = 0x01,
111         CAMDD_DEV_FILE          = 0x02
112 } camdd_dev_type;
113
114 struct camdd_io_opts {
115         camdd_dev_type  dev_type;
116         char            *dev_name;
117         uint64_t        blocksize;
118         uint64_t        queue_depth;
119         uint64_t        offset;
120         int             min_cmd_size;
121         int             write_dev;
122         uint64_t        debug;
123 };
124
125 typedef enum {
126         CAMDD_BUF_NONE,
127         CAMDD_BUF_DATA,
128         CAMDD_BUF_INDIRECT
129 } camdd_buf_type;
130
131 struct camdd_buf_indirect {
132         /*
133          * Pointer to the source buffer.
134          */
135         struct camdd_buf *src_buf;
136
137         /*
138          * Offset into the source buffer, in bytes.
139          */
140         uint64_t          offset;
141         /*
142          * Pointer to the starting point in the source buffer.
143          */
144         uint8_t          *start_ptr;
145
146         /*
147          * Length of this chunk in bytes.
148          */
149         size_t            len;
150 };
151
152 struct camdd_buf_data {
153         /*
154          * Buffer allocated when we allocate this camdd_buf.  This should
155          * be the size of the blocksize for this device.
156          */
157         uint8_t                 *buf;
158
159         /*
160          * The amount of backing store allocated in buf.  Generally this
161          * will be the blocksize of the device.
162          */
163         uint32_t                 alloc_len;
164
165         /*
166          * The amount of data that was put into the buffer (on reads) or
167          * the amount of data we have put onto the src_list so far (on
168          * writes).
169          */
170         uint32_t                 fill_len;
171
172         /*
173          * The amount of data that was not transferred.
174          */
175         uint32_t                 resid;
176
177         /*
178          * Starting byte offset on the reader.
179          */
180         uint64_t                 src_start_offset;
181         
182         /*
183          * CCB used for pass(4) device targets.
184          */
185         union ccb                ccb;
186
187         /*
188          * Number of scatter/gather segments.
189          */
190         int                      sg_count;
191
192         /*
193          * Set if we had to tack on an extra buffer to round the transfer
194          * up to a sector size.
195          */
196         int                      extra_buf;
197
198         /*
199          * Scatter/gather list used generally when we're the writer for a
200          * pass(4) device. 
201          */
202         bus_dma_segment_t       *segs;
203
204         /*
205          * Scatter/gather list used generally when we're the writer for a
206          * file or block device;
207          */
208         struct iovec            *iovec;
209 };
210
211 union camdd_buf_types {
212         struct camdd_buf_indirect       indirect;
213         struct camdd_buf_data           data;
214 };
215
216 typedef enum {
217         CAMDD_STATUS_NONE,
218         CAMDD_STATUS_OK,
219         CAMDD_STATUS_SHORT_IO,
220         CAMDD_STATUS_EOF,
221         CAMDD_STATUS_ERROR
222 } camdd_buf_status;
223
224 struct camdd_buf {
225         camdd_buf_type           buf_type;
226         union camdd_buf_types    buf_type_spec;
227
228         camdd_buf_status         status;
229
230         uint64_t                 lba;
231         size_t                   len;
232
233         /*
234          * A reference count of how many indirect buffers point to this
235          * buffer.
236          */
237         int                      refcount;
238
239         /*
240          * A link back to our parent device.
241          */
242         struct camdd_dev        *dev;
243         STAILQ_ENTRY(camdd_buf)  links;
244         STAILQ_ENTRY(camdd_buf)  work_links;
245
246         /*
247          * A count of the buffers on the src_list.
248          */
249         int                      src_count;
250
251         /*
252          * List of buffers from our partner thread that are the components
253          * of this buffer for the I/O.  Uses src_links.
254          */
255         STAILQ_HEAD(,camdd_buf)  src_list;
256         STAILQ_ENTRY(camdd_buf)  src_links;
257 };
258
259 #define NUM_DEV_TYPES   2
260
261 struct camdd_dev_pass {
262         int                      scsi_dev_type;
263         int                      protocol;
264         struct cam_device       *dev;
265         uint64_t                 max_sector;
266         uint32_t                 block_len;
267         uint32_t                 cpi_maxio;
268 };
269
270 typedef enum {
271         CAMDD_FILE_NONE,
272         CAMDD_FILE_REG,
273         CAMDD_FILE_STD,
274         CAMDD_FILE_PIPE,
275         CAMDD_FILE_DISK,
276         CAMDD_FILE_TAPE,
277         CAMDD_FILE_TTY,
278         CAMDD_FILE_MEM
279 } camdd_file_type;
280
281 typedef enum {
282         CAMDD_FF_NONE           = 0x00,
283         CAMDD_FF_CAN_SEEK       = 0x01
284 } camdd_file_flags;
285
286 struct camdd_dev_file {
287         int                      fd;
288         struct stat              sb;
289         char                     filename[MAXPATHLEN + 1];
290         camdd_file_type          file_type;
291         camdd_file_flags         file_flags;
292         uint8_t                 *tmp_buf;
293 };
294
295 struct camdd_dev_block {
296         int                      fd;
297         uint64_t                 size_bytes;
298         uint32_t                 block_len;
299 };
300
301 union camdd_dev_spec {
302         struct camdd_dev_pass   pass;
303         struct camdd_dev_file   file;
304         struct camdd_dev_block  block;
305 };
306
307 typedef enum {
308         CAMDD_DEV_FLAG_NONE             = 0x00,
309         CAMDD_DEV_FLAG_EOF              = 0x01,
310         CAMDD_DEV_FLAG_PEER_EOF         = 0x02,
311         CAMDD_DEV_FLAG_ACTIVE           = 0x04,
312         CAMDD_DEV_FLAG_EOF_SENT         = 0x08,
313         CAMDD_DEV_FLAG_EOF_QUEUED       = 0x10
314 } camdd_dev_flags;
315
316 struct camdd_dev {
317         camdd_dev_type           dev_type;
318         union camdd_dev_spec     dev_spec;
319         camdd_dev_flags          flags;
320         char                     device_name[MAXPATHLEN+1];
321         uint32_t                 blocksize;
322         uint32_t                 sector_size;
323         uint64_t                 max_sector;
324         uint64_t                 sector_io_limit;
325         int                      min_cmd_size;
326         int                      write_dev;
327         int                      retry_count;
328         int                      io_timeout;
329         int                      debug;
330         uint64_t                 start_offset_bytes;
331         uint64_t                 next_io_pos_bytes;
332         uint64_t                 next_peer_pos_bytes;
333         uint64_t                 next_completion_pos_bytes;
334         uint64_t                 peer_bytes_queued;
335         uint64_t                 bytes_transferred;
336         uint32_t                 target_queue_depth;
337         uint32_t                 cur_active_io;
338         uint8_t                 *extra_buf;
339         uint32_t                 extra_buf_len;
340         struct camdd_dev        *peer_dev;
341         pthread_mutex_t          mutex;
342         pthread_cond_t           cond;
343         int                      kq;
344
345         int                      (*run)(struct camdd_dev *dev);
346         int                      (*fetch)(struct camdd_dev *dev);
347
348         /*
349          * Buffers that are available for I/O.  Uses links.
350          */
351         STAILQ_HEAD(,camdd_buf)  free_queue;
352
353         /*
354          * Free indirect buffers.  These are used for breaking a large
355          * buffer into multiple pieces.
356          */
357         STAILQ_HEAD(,camdd_buf)  free_indirect_queue;
358
359         /*
360          * Buffers that have been queued to the kernel.  Uses links.
361          */
362         STAILQ_HEAD(,camdd_buf)  active_queue;
363
364         /*
365          * Will generally contain one of our buffers that is waiting for enough
366          * I/O from our partner thread to be able to execute.  This will
367          * generally happen when our per-I/O-size is larger than the
368          * partner thread's per-I/O-size.  Uses links.
369          */
370         STAILQ_HEAD(,camdd_buf)  pending_queue;
371
372         /*
373          * Number of buffers on the pending queue
374          */
375         int                      num_pending_queue;
376
377         /*
378          * Buffers that are filled and ready to execute.  This is used when
379          * our partner (reader) thread sends us blocks that are larger than
380          * our blocksize, and so we have to split them into multiple pieces.
381          */
382         STAILQ_HEAD(,camdd_buf)  run_queue;
383
384         /*
385          * Number of buffers on the run queue.
386          */
387         int                      num_run_queue;
388
389         STAILQ_HEAD(,camdd_buf)  reorder_queue;
390
391         int                      num_reorder_queue;
392
393         /*
394          * Buffers that have been queued to us by our partner thread
395          * (generally the reader thread) to be written out.  Uses
396          * work_links.
397          */
398         STAILQ_HEAD(,camdd_buf)  work_queue;
399
400         /*
401          * Buffers that have been completed by our partner thread.  Uses
402          * work_links.
403          */
404         STAILQ_HEAD(,camdd_buf)  peer_done_queue;
405
406         /*
407          * Number of buffers on the peer done queue.
408          */
409         uint32_t                 num_peer_done_queue;
410
411         /*
412          * A list of buffers that we have queued to our peer thread.  Uses
413          * links.
414          */
415         STAILQ_HEAD(,camdd_buf)  peer_work_queue;
416
417         /*
418          * Number of buffers on the peer work queue.
419          */
420         uint32_t                 num_peer_work_queue;
421 };
422
423 static sem_t camdd_sem;
424 static sig_atomic_t need_exit = 0;
425 static sig_atomic_t error_exit = 0;
426 static sig_atomic_t need_status = 0;
427
428 #ifndef min
429 #define min(a, b) (a < b) ? a : b
430 #endif
431
432 /*
433  * XXX KDM private copy of timespecsub().  This is normally defined in
434  * sys/time.h, but is only enabled in the kernel.  If that definition is
435  * enabled in userland, it breaks the build of libnetbsd.
436  */
437 #ifndef timespecsub
438 #define timespecsub(vvp, uvp)                                           \
439         do {                                                            \
440                 (vvp)->tv_sec -= (uvp)->tv_sec;                         \
441                 (vvp)->tv_nsec -= (uvp)->tv_nsec;                       \
442                 if ((vvp)->tv_nsec < 0) {                               \
443                         (vvp)->tv_sec--;                                \
444                         (vvp)->tv_nsec += 1000000000;                   \
445                 }                                                       \
446         } while (0)
447 #endif
448
449
450 /* Generically useful offsets into the peripheral private area */
451 #define ppriv_ptr0 periph_priv.entries[0].ptr
452 #define ppriv_ptr1 periph_priv.entries[1].ptr
453 #define ppriv_field0 periph_priv.entries[0].field
454 #define ppriv_field1 periph_priv.entries[1].field
455
456 #define ccb_buf ppriv_ptr0
457
458 #define CAMDD_FILE_DEFAULT_BLOCK        524288
459 #define CAMDD_FILE_DEFAULT_DEPTH        1
460 #define CAMDD_PASS_MAX_BLOCK            1048576
461 #define CAMDD_PASS_DEFAULT_DEPTH        6
462 #define CAMDD_PASS_RW_TIMEOUT           60 * 1000
463
464 static int parse_btl(char *tstr, int *bus, int *target, int *lun,
465                      camdd_argmask *arglst);
466 void camdd_free_dev(struct camdd_dev *dev);
467 struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
468                                   struct kevent *new_ke, int num_ke,
469                                   int retry_count, int timeout);
470 static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
471                                          camdd_buf_type buf_type);
472 void camdd_release_buf(struct camdd_buf *buf);
473 struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
474 int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
475                         uint32_t sector_size, uint32_t *num_sectors_used,
476                         int *double_buf_needed);
477 uint32_t camdd_buf_get_len(struct camdd_buf *buf);
478 void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
479 int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
480                      uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
481 int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
482          camdd_argmask arglist, int probe_retry_count,
483          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
484 struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
485                                    int retry_count, int timeout);
486 struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
487                                    struct camdd_io_opts *io_opts,
488                                    camdd_argmask arglist, int probe_retry_count,
489                                    int probe_timeout, int io_retry_count,
490                                    int io_timeout);
491 void *camdd_file_worker(void *arg);
492 camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol);
493 int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd);
494 int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
495 int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
496 void camdd_peer_done(struct camdd_buf *buf);
497 void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
498                         int *error_count);
499 int camdd_pass_fetch(struct camdd_dev *dev);
500 int camdd_file_run(struct camdd_dev *dev);
501 int camdd_pass_run(struct camdd_dev *dev);
502 int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
503 int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
504 void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
505                      uint32_t *peer_depth, uint32_t *our_bytes,
506                      uint32_t *peer_bytes);
507 void *camdd_worker(void *arg);
508 void camdd_sig_handler(int sig);
509 void camdd_print_status(struct camdd_dev *camdd_dev,
510                         struct camdd_dev *other_dev,
511                         struct timespec *start_time);
512 int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts,
513              uint64_t max_io, int retry_count, int timeout);
514 int camdd_parse_io_opts(char *args, int is_write,
515                         struct camdd_io_opts *io_opts);
516 void usage(void);
517
518 /*
519  * Parse out a bus, or a bus, target and lun in the following
520  * format:
521  * bus
522  * bus:target
523  * bus:target:lun
524  *
525  * Returns the number of parsed components, or 0.
526  */
527 static int
528 parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst)
529 {
530         char *tmpstr;
531         int convs = 0;
532
533         while (isspace(*tstr) && (*tstr != '\0'))
534                 tstr++;
535
536         tmpstr = (char *)strtok(tstr, ":");
537         if ((tmpstr != NULL) && (*tmpstr != '\0')) {
538                 *bus = strtol(tmpstr, NULL, 0);
539                 *arglst |= CAMDD_ARG_BUS;
540                 convs++;
541                 tmpstr = (char *)strtok(NULL, ":");
542                 if ((tmpstr != NULL) && (*tmpstr != '\0')) {
543                         *target = strtol(tmpstr, NULL, 0);
544                         *arglst |= CAMDD_ARG_TARGET;
545                         convs++;
546                         tmpstr = (char *)strtok(NULL, ":");
547                         if ((tmpstr != NULL) && (*tmpstr != '\0')) {
548                                 *lun = strtol(tmpstr, NULL, 0);
549                                 *arglst |= CAMDD_ARG_LUN;
550                                 convs++;
551                         }
552                 }
553         }
554
555         return convs;
556 }
557
558 /*
559  * XXX KDM clean up and free all of the buffers on the queue!
560  */
561 void
562 camdd_free_dev(struct camdd_dev *dev)
563 {
564         if (dev == NULL)
565                 return;
566
567         switch (dev->dev_type) {
568         case CAMDD_DEV_FILE: {
569                 struct camdd_dev_file *file_dev = &dev->dev_spec.file;
570
571                 if (file_dev->fd != -1)
572                         close(file_dev->fd);
573                 free(file_dev->tmp_buf);
574                 break;
575         }
576         case CAMDD_DEV_PASS: {
577                 struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
578
579                 if (pass_dev->dev != NULL)
580                         cam_close_device(pass_dev->dev);
581                 break;
582         }
583         default:
584                 break;
585         }
586
587         free(dev);
588 }
589
590 struct camdd_dev *
591 camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
592                 int retry_count, int timeout)
593 {
594         struct camdd_dev *dev = NULL;
595         struct kevent *ke;
596         size_t ke_size;
597         int retval = 0;
598
599         dev = malloc(sizeof(*dev));
600         if (dev == NULL) {
601                 warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
602                 goto bailout;
603         }
604
605         bzero(dev, sizeof(*dev));
606
607         dev->dev_type = dev_type;
608         dev->io_timeout = timeout;
609         dev->retry_count = retry_count;
610         STAILQ_INIT(&dev->free_queue);
611         STAILQ_INIT(&dev->free_indirect_queue);
612         STAILQ_INIT(&dev->active_queue);
613         STAILQ_INIT(&dev->pending_queue);
614         STAILQ_INIT(&dev->run_queue);
615         STAILQ_INIT(&dev->reorder_queue);
616         STAILQ_INIT(&dev->work_queue);
617         STAILQ_INIT(&dev->peer_done_queue);
618         STAILQ_INIT(&dev->peer_work_queue);
619         retval = pthread_mutex_init(&dev->mutex, NULL);
620         if (retval != 0) {
621                 warnc(retval, "%s: failed to initialize mutex", __func__);
622                 goto bailout;
623         }
624
625         retval = pthread_cond_init(&dev->cond, NULL);
626         if (retval != 0) {
627                 warnc(retval, "%s: failed to initialize condition variable",
628                       __func__);
629                 goto bailout;
630         }
631
632         dev->kq = kqueue();
633         if (dev->kq == -1) {
634                 warn("%s: Unable to create kqueue", __func__);
635                 goto bailout;
636         }
637
638         ke_size = sizeof(struct kevent) * (num_ke + 4);
639         ke = malloc(ke_size);
640         if (ke == NULL) {
641                 warn("%s: unable to malloc %zu bytes", __func__, ke_size);
642                 goto bailout;
643         }
644         bzero(ke, ke_size);
645         if (num_ke > 0)
646                 bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
647
648         EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
649                EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
650         EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
651                EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
652         EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
653         EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
654
655         retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
656         if (retval == -1) {
657                 warn("%s: Unable to register kevents", __func__);
658                 goto bailout;
659         }
660
661
662         return (dev);
663
664 bailout:
665         free(dev);
666
667         return (NULL);
668 }
669
670 static struct camdd_buf *
671 camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
672 {
673         struct camdd_buf *buf = NULL;
674         uint8_t *data_ptr = NULL;
675
676         /*
677          * We only need to allocate data space for data buffers.
678          */
679         switch (buf_type) {
680         case CAMDD_BUF_DATA:
681                 data_ptr = malloc(dev->blocksize);
682                 if (data_ptr == NULL) {
683                         warn("unable to allocate %u bytes", dev->blocksize);
684                         goto bailout_error;
685                 }
686                 break;
687         default:
688                 break;
689         }
690         
691         buf = malloc(sizeof(*buf));
692         if (buf == NULL) {
693                 warn("unable to allocate %zu bytes", sizeof(*buf));
694                 goto bailout_error;
695         }
696
697         bzero(buf, sizeof(*buf));
698         buf->buf_type = buf_type;
699         buf->dev = dev;
700         switch (buf_type) {
701         case CAMDD_BUF_DATA: {
702                 struct camdd_buf_data *data;
703
704                 data = &buf->buf_type_spec.data;
705
706                 data->alloc_len = dev->blocksize;
707                 data->buf = data_ptr;
708                 break;
709         }
710         case CAMDD_BUF_INDIRECT:
711                 break;
712         default:
713                 break;
714         }
715         STAILQ_INIT(&buf->src_list);
716
717         return (buf);
718
719 bailout_error:
720         free(data_ptr);
721
722         return (NULL);
723 }
724
725 void
726 camdd_release_buf(struct camdd_buf *buf)
727 {
728         struct camdd_dev *dev;
729
730         dev = buf->dev;
731
732         switch (buf->buf_type) {
733         case CAMDD_BUF_DATA: {
734                 struct camdd_buf_data *data;
735
736                 data = &buf->buf_type_spec.data;
737
738                 if (data->segs != NULL) {
739                         if (data->extra_buf != 0) {
740                                 void *extra_buf;
741
742                                 extra_buf = (void *)
743                                     data->segs[data->sg_count - 1].ds_addr;
744                                 free(extra_buf);
745                                 data->extra_buf = 0;
746                         }
747                         free(data->segs);
748                         data->segs = NULL;
749                         data->sg_count = 0;
750                 } else if (data->iovec != NULL) {
751                         if (data->extra_buf != 0) {
752                                 free(data->iovec[data->sg_count - 1].iov_base);
753                                 data->extra_buf = 0;
754                         }
755                         free(data->iovec);
756                         data->iovec = NULL;
757                         data->sg_count = 0;
758                 }
759                 STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
760                 break;
761         }
762         case CAMDD_BUF_INDIRECT:
763                 STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
764                 break;
765         default:
766                 err(1, "%s: Invalid buffer type %d for released buffer",
767                     __func__, buf->buf_type);
768                 break;
769         }
770 }
771
772 struct camdd_buf *
773 camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
774 {
775         struct camdd_buf *buf = NULL;
776
777         switch (buf_type) {
778         case CAMDD_BUF_DATA:
779                 buf = STAILQ_FIRST(&dev->free_queue);
780                 if (buf != NULL) {
781                         struct camdd_buf_data *data;
782                         uint8_t *data_ptr;
783                         uint32_t alloc_len;
784
785                         STAILQ_REMOVE_HEAD(&dev->free_queue, links);
786                         data = &buf->buf_type_spec.data;
787                         data_ptr = data->buf;
788                         alloc_len = data->alloc_len;
789                         bzero(buf, sizeof(*buf));
790                         data->buf = data_ptr;
791                         data->alloc_len = alloc_len;
792                 }
793                 break;
794         case CAMDD_BUF_INDIRECT:
795                 buf = STAILQ_FIRST(&dev->free_indirect_queue);
796                 if (buf != NULL) {
797                         STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
798
799                         bzero(buf, sizeof(*buf));
800                 }
801                 break;
802         default:
803                 warnx("Unknown buffer type %d requested", buf_type);
804                 break;
805         }
806
807
808         if (buf == NULL)
809                 return (camdd_alloc_buf(dev, buf_type));
810         else {
811                 STAILQ_INIT(&buf->src_list);
812                 buf->dev = dev;
813                 buf->buf_type = buf_type;
814
815                 return (buf);
816         }
817 }
818
819 int
820 camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
821                     uint32_t *num_sectors_used, int *double_buf_needed)
822 {
823         struct camdd_buf *tmp_buf;
824         struct camdd_buf_data *data;
825         uint8_t *extra_buf = NULL;
826         size_t extra_buf_len = 0;
827         int extra_buf_attached = 0;
828         int i, retval = 0;
829
830         data = &buf->buf_type_spec.data;
831
832         data->sg_count = buf->src_count;
833         /*
834          * Compose a scatter/gather list from all of the buffers in the list.
835          * If the length of the buffer isn't a multiple of the sector size,
836          * we'll have to add an extra buffer.  This should only happen
837          * at the end of a transfer.
838          */
839         if ((data->fill_len % sector_size) != 0) {
840                 extra_buf_len = sector_size - (data->fill_len % sector_size);
841                 extra_buf = calloc(extra_buf_len, 1);
842                 if (extra_buf == NULL) {
843                         warn("%s: unable to allocate %zu bytes for extra "
844                             "buffer space", __func__, extra_buf_len);
845                         retval = 1;
846                         goto bailout;
847                 }
848                 data->extra_buf = 1;
849                 data->sg_count++;
850         }
851         if (iovec == 0) {
852                 data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
853                 if (data->segs == NULL) {
854                         warn("%s: unable to allocate %zu bytes for S/G list",
855                             __func__, sizeof(bus_dma_segment_t) *
856                             data->sg_count);
857                         retval = 1;
858                         goto bailout;
859                 }
860
861         } else {
862                 data->iovec = calloc(data->sg_count, sizeof(struct iovec));
863                 if (data->iovec == NULL) {
864                         warn("%s: unable to allocate %zu bytes for S/G list",
865                             __func__, sizeof(struct iovec) * data->sg_count);
866                         retval = 1;
867                         goto bailout;
868                 }
869         }
870
871         for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
872              i < buf->src_count && tmp_buf != NULL; i++,
873              tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
874
875                 if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
876                         struct camdd_buf_data *tmp_data;
877
878                         tmp_data = &tmp_buf->buf_type_spec.data;
879                         if (iovec == 0) {
880                                 data->segs[i].ds_addr =
881                                     (bus_addr_t) tmp_data->buf;
882                                 data->segs[i].ds_len = tmp_data->fill_len -
883                                     tmp_data->resid;
884                         } else {
885                                 data->iovec[i].iov_base = tmp_data->buf;
886                                 data->iovec[i].iov_len = tmp_data->fill_len -
887                                     tmp_data->resid;
888                         }
889                         if (((tmp_data->fill_len - tmp_data->resid) %
890                              sector_size) != 0)
891                                 *double_buf_needed = 1;
892                 } else {
893                         struct camdd_buf_indirect *tmp_ind;
894
895                         tmp_ind = &tmp_buf->buf_type_spec.indirect;
896                         if (iovec == 0) {
897                                 data->segs[i].ds_addr =
898                                     (bus_addr_t)tmp_ind->start_ptr;
899                                 data->segs[i].ds_len = tmp_ind->len;
900                         } else {
901                                 data->iovec[i].iov_base = tmp_ind->start_ptr;
902                                 data->iovec[i].iov_len = tmp_ind->len;
903                         }
904                         if ((tmp_ind->len % sector_size) != 0)
905                                 *double_buf_needed = 1;
906                 }
907         }
908
909         if (extra_buf != NULL) {
910                 if (iovec == 0) {
911                         data->segs[i].ds_addr = (bus_addr_t)extra_buf;
912                         data->segs[i].ds_len = extra_buf_len;
913                 } else {
914                         data->iovec[i].iov_base = extra_buf;
915                         data->iovec[i].iov_len = extra_buf_len;
916                 }
917                 extra_buf_attached = 1;
918                 i++;
919         }
920         if ((tmp_buf != NULL) || (i != data->sg_count)) {
921                 warnx("buffer source count does not match "
922                       "number of buffers in list!");
923                 retval = 1;
924                 goto bailout;
925         }
926
927 bailout:
928         if (retval == 0) {
929                 *num_sectors_used = (data->fill_len + extra_buf_len) /
930                     sector_size;
931         } else if (extra_buf_attached == 0) {
932                 /*
933                  * If extra_buf isn't attached yet, we need to free it
934                  * to avoid leaking.
935                  */
936                 free(extra_buf);
937                 data->extra_buf = 0;
938                 data->sg_count--;
939         }
940         return (retval);
941 }
942
943 uint32_t
944 camdd_buf_get_len(struct camdd_buf *buf)
945 {
946         uint32_t len = 0;
947
948         if (buf->buf_type != CAMDD_BUF_DATA) {
949                 struct camdd_buf_indirect *indirect;
950
951                 indirect = &buf->buf_type_spec.indirect;
952                 len = indirect->len;
953         } else {
954                 struct camdd_buf_data *data;
955
956                 data = &buf->buf_type_spec.data;
957                 len = data->fill_len;
958         }
959
960         return (len);
961 }
962
963 void
964 camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
965 {
966         struct camdd_buf_data *data;
967
968         assert(buf->buf_type == CAMDD_BUF_DATA);
969
970         data = &buf->buf_type_spec.data;
971
972         STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
973         buf->src_count++;
974
975         data->fill_len += camdd_buf_get_len(child_buf);
976 }
977
978 typedef enum {
979         CAMDD_TS_MAX_BLK,
980         CAMDD_TS_MIN_BLK,
981         CAMDD_TS_BLK_GRAN,
982         CAMDD_TS_EFF_IOSIZE
983 } camdd_status_item_index;
984
985 static struct camdd_status_items {
986         const char *name;
987         struct mt_status_entry *entry;
988 } req_status_items[] = {
989         { "max_blk", NULL },
990         { "min_blk", NULL },
991         { "blk_gran", NULL },
992         { "max_effective_iosize", NULL }
993 };
994
995 int
996 camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
997                  uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
998 {
999         struct mt_status_data status_data;
1000         char *xml_str = NULL;
1001         unsigned int i;
1002         int retval = 0;
1003         
1004         retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
1005         if (retval != 0)
1006                 err(1, "Couldn't get XML string from %s", filename);
1007
1008         retval = mt_get_status(xml_str, &status_data);
1009         if (retval != XML_STATUS_OK) {
1010                 warn("couldn't get status for %s", filename);
1011                 retval = 1;
1012                 goto bailout;
1013         } else
1014                 retval = 0;
1015
1016         if (status_data.error != 0) {
1017                 warnx("%s", status_data.error_str);
1018                 retval = 1;
1019                 goto bailout;
1020         }
1021
1022         for (i = 0; i < nitems(req_status_items); i++) {
1023                 char *name;
1024
1025                 name = __DECONST(char *, req_status_items[i].name);
1026                 req_status_items[i].entry = mt_status_entry_find(&status_data,
1027                     name);
1028                 if (req_status_items[i].entry == NULL) {
1029                         errx(1, "Cannot find status entry %s",
1030                             req_status_items[i].name);
1031                 }
1032         }
1033
1034         *max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
1035         *max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
1036         *min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
1037         *blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
1038 bailout:
1039
1040         free(xml_str);
1041         mt_status_free(&status_data);
1042
1043         return (retval);
1044 }
1045
1046 struct camdd_dev *
1047 camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
1048     int timeout)
1049 {
1050         struct camdd_dev *dev = NULL;
1051         struct camdd_dev_file *file_dev;
1052         uint64_t blocksize = io_opts->blocksize;
1053
1054         dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
1055         if (dev == NULL)
1056                 goto bailout;
1057
1058         file_dev = &dev->dev_spec.file;
1059         file_dev->fd = fd;
1060         strlcpy(file_dev->filename, io_opts->dev_name,
1061             sizeof(file_dev->filename));
1062         strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
1063         if (blocksize == 0)
1064                 dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
1065         else
1066                 dev->blocksize = blocksize;
1067
1068         if ((io_opts->queue_depth != 0)
1069          && (io_opts->queue_depth != 1)) {
1070                 warnx("Queue depth %ju for %s ignored, only 1 outstanding "
1071                     "command supported", (uintmax_t)io_opts->queue_depth,
1072                     io_opts->dev_name);
1073         }
1074         dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
1075         dev->run = camdd_file_run;
1076         dev->fetch = NULL;
1077
1078         /*
1079          * We can effectively access files on byte boundaries.  We'll reset
1080          * this for devices like disks that can be accessed on sector
1081          * boundaries.
1082          */
1083         dev->sector_size = 1;
1084
1085         if ((fd != STDIN_FILENO)
1086          && (fd != STDOUT_FILENO)) {
1087                 int retval;
1088
1089                 retval = fstat(fd, &file_dev->sb);
1090                 if (retval != 0) {
1091                         warn("Cannot stat %s", dev->device_name);
1092                         goto bailout_error;
1093                 }
1094                 if (S_ISREG(file_dev->sb.st_mode)) {
1095                         file_dev->file_type = CAMDD_FILE_REG;
1096                 } else if (S_ISCHR(file_dev->sb.st_mode)) {
1097                         int type;
1098
1099                         if (ioctl(fd, FIODTYPE, &type) == -1)
1100                                 err(1, "FIODTYPE ioctl failed on %s",
1101                                     dev->device_name);
1102                         else {
1103                                 if (type & D_TAPE)
1104                                         file_dev->file_type = CAMDD_FILE_TAPE;
1105                                 else if (type & D_DISK)
1106                                         file_dev->file_type = CAMDD_FILE_DISK;
1107                                 else if (type & D_MEM)
1108                                         file_dev->file_type = CAMDD_FILE_MEM;
1109                                 else if (type & D_TTY)
1110                                         file_dev->file_type = CAMDD_FILE_TTY;
1111                         }
1112                 } else if (S_ISDIR(file_dev->sb.st_mode)) {
1113                         errx(1, "cannot operate on directory %s",
1114                             dev->device_name);
1115                 } else if (S_ISFIFO(file_dev->sb.st_mode)) {
1116                         file_dev->file_type = CAMDD_FILE_PIPE;
1117                 } else
1118                         errx(1, "Cannot determine file type for %s",
1119                             dev->device_name);
1120
1121                 switch (file_dev->file_type) {
1122                 case CAMDD_FILE_REG:
1123                         if (file_dev->sb.st_size != 0)
1124                                 dev->max_sector = file_dev->sb.st_size - 1;
1125                         else
1126                                 dev->max_sector = 0;
1127                         file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1128                         break;
1129                 case CAMDD_FILE_TAPE: {
1130                         uint64_t max_iosize, max_blk, min_blk, blk_gran;
1131                         /*
1132                          * Check block limits and maximum effective iosize.
1133                          * Make sure the blocksize is within the block
1134                          * limits (and a multiple of the minimum blocksize)
1135                          * and that the blocksize is <= maximum effective
1136                          * iosize.
1137                          */
1138                         retval = camdd_probe_tape(fd, dev->device_name,
1139                             &max_iosize, &max_blk, &min_blk, &blk_gran);
1140                         if (retval != 0)
1141                                 errx(1, "Unable to probe tape %s",
1142                                     dev->device_name);
1143
1144                         /*
1145                          * The blocksize needs to be <= the maximum
1146                          * effective I/O size of the tape device.  Note
1147                          * that this also takes into account the maximum
1148                          * blocksize reported by READ BLOCK LIMITS.
1149                          */
1150                         if (dev->blocksize > max_iosize) {
1151                                 warnx("Blocksize %u too big for %s, limiting "
1152                                     "to %ju", dev->blocksize, dev->device_name,
1153                                     max_iosize);
1154                                 dev->blocksize = max_iosize;
1155                         }
1156
1157                         /*
1158                          * The blocksize needs to be at least min_blk;
1159                          */
1160                         if (dev->blocksize < min_blk) {
1161                                 warnx("Blocksize %u too small for %s, "
1162                                     "increasing to %ju", dev->blocksize,
1163                                     dev->device_name, min_blk);
1164                                 dev->blocksize = min_blk;
1165                         }
1166
1167                         /*
1168                          * And the blocksize needs to be a multiple of
1169                          * the block granularity.
1170                          */
1171                         if ((blk_gran != 0)
1172                          && (dev->blocksize % (1 << blk_gran))) {
1173                                 warnx("Blocksize %u for %s not a multiple of "
1174                                     "%d, adjusting to %d", dev->blocksize,
1175                                     dev->device_name, (1 << blk_gran),
1176                                     dev->blocksize & ~((1 << blk_gran) - 1));
1177                                 dev->blocksize &= ~((1 << blk_gran) - 1);
1178                         }
1179
1180                         if (dev->blocksize == 0) {
1181                                 errx(1, "Unable to derive valid blocksize for "
1182                                     "%s", dev->device_name);
1183                         }
1184
1185                         /*
1186                          * For tape drives, set the sector size to the
1187                          * blocksize so that we make sure not to write
1188                          * less than the blocksize out to the drive.
1189                          */
1190                         dev->sector_size = dev->blocksize;
1191                         break;
1192                 }
1193                 case CAMDD_FILE_DISK: {
1194                         off_t media_size;
1195                         unsigned int sector_size;
1196
1197                         file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1198
1199                         if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
1200                                 err(1, "DIOCGSECTORSIZE ioctl failed on %s",
1201                                     dev->device_name);
1202                         }
1203
1204                         if (sector_size == 0) {
1205                                 errx(1, "DIOCGSECTORSIZE ioctl returned "
1206                                     "invalid sector size %u for %s",
1207                                     sector_size, dev->device_name);
1208                         }
1209
1210                         if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
1211                                 err(1, "DIOCGMEDIASIZE ioctl failed on %s",
1212                                     dev->device_name);
1213                         }
1214
1215                         if (media_size == 0) {
1216                                 errx(1, "DIOCGMEDIASIZE ioctl returned "
1217                                     "invalid media size %ju for %s",
1218                                     (uintmax_t)media_size, dev->device_name);
1219                         }
1220
1221                         if (dev->blocksize % sector_size) {
1222                                 errx(1, "%s blocksize %u not a multiple of "
1223                                     "sector size %u", dev->device_name,
1224                                     dev->blocksize, sector_size);
1225                         }
1226
1227                         dev->sector_size = sector_size;
1228                         dev->max_sector = (media_size / sector_size) - 1;
1229                         break;
1230                 }
1231                 case CAMDD_FILE_MEM:
1232                         file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1233                         break;
1234                 default:
1235                         break;
1236                 }
1237         }
1238
1239         if ((io_opts->offset != 0)
1240          && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
1241                 warnx("Offset %ju specified for %s, but we cannot seek on %s",
1242                     io_opts->offset, io_opts->dev_name, io_opts->dev_name);
1243                 goto bailout_error;
1244         }
1245 #if 0
1246         else if ((io_opts->offset != 0)
1247                 && ((io_opts->offset % dev->sector_size) != 0)) {
1248                 warnx("Offset %ju for %s is not a multiple of the "
1249                       "sector size %u", io_opts->offset, 
1250                       io_opts->dev_name, dev->sector_size);
1251                 goto bailout_error;
1252         } else {
1253                 dev->start_offset_bytes = io_opts->offset;
1254         }
1255 #endif
1256
1257 bailout:
1258         return (dev);
1259
1260 bailout_error:
1261         camdd_free_dev(dev);
1262         return (NULL);
1263 }
1264
1265 /*
1266  * Get a get device CCB for the specified device.
1267  */
1268 int
1269 camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd)
1270 {
1271         union ccb *ccb;
1272         int retval = 0;
1273
1274         ccb = cam_getccb(device);
1275  
1276         if (ccb == NULL) {
1277                 warnx("%s: couldn't allocate CCB", __func__);
1278                 return -1;
1279         }
1280
1281         CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd);
1282
1283         ccb->ccb_h.func_code = XPT_GDEV_TYPE;
1284  
1285         if (cam_send_ccb(device, ccb) < 0) {
1286                 warn("%s: error sending Get Device Information CCB", __func__);
1287                         cam_error_print(device, ccb, CAM_ESF_ALL,
1288                                         CAM_EPF_ALL, stderr);
1289                 retval = -1;
1290                 goto bailout;
1291         }
1292
1293         if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1294                         cam_error_print(device, ccb, CAM_ESF_ALL,
1295                                         CAM_EPF_ALL, stderr);
1296                 retval = -1;
1297                 goto bailout;
1298         }
1299
1300         bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev));
1301
1302 bailout:
1303         cam_freeccb(ccb);
1304  
1305         return retval;
1306 }
1307
1308 int
1309 camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
1310                  camdd_argmask arglist, int probe_retry_count,
1311                  int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1312 {
1313         struct scsi_read_capacity_data rcap;
1314         struct scsi_read_capacity_data_long rcaplong;
1315         int retval = -1;
1316
1317         if (ccb == NULL) {
1318                 warnx("%s: error passed ccb is NULL", __func__);
1319                 goto bailout;
1320         }
1321
1322         CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
1323
1324         scsi_read_capacity(&ccb->csio,
1325                            /*retries*/ probe_retry_count,
1326                            /*cbfcnp*/ NULL,
1327                            /*tag_action*/ MSG_SIMPLE_Q_TAG,
1328                            &rcap,
1329                            SSD_FULL_SIZE,
1330                            /*timeout*/ probe_timeout ? probe_timeout : 5000);
1331
1332         /* Disable freezing the device queue */
1333         ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1334
1335         if (arglist & CAMDD_ARG_ERR_RECOVER)
1336                 ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1337
1338         if (cam_send_ccb(cam_dev, ccb) < 0) {
1339                 warn("error sending READ CAPACITY command");
1340
1341                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1342                                 CAM_EPF_ALL, stderr);
1343
1344                 goto bailout;
1345         }
1346
1347         if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1348                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1349                 goto bailout;
1350         }
1351
1352         *maxsector = scsi_4btoul(rcap.addr);
1353         *block_len = scsi_4btoul(rcap.length);
1354
1355         /*
1356          * A last block of 2^32-1 means that the true capacity is over 2TB,
1357          * and we need to issue the long READ CAPACITY to get the real
1358          * capacity.  Otherwise, we're all set.
1359          */
1360         if (*maxsector != 0xffffffff) {
1361                 retval = 0;
1362                 goto bailout;
1363         }
1364
1365         scsi_read_capacity_16(&ccb->csio,
1366                               /*retries*/ probe_retry_count,
1367                               /*cbfcnp*/ NULL,
1368                               /*tag_action*/ MSG_SIMPLE_Q_TAG,
1369                               /*lba*/ 0,
1370                               /*reladdr*/ 0,
1371                               /*pmi*/ 0,
1372                               (uint8_t *)&rcaplong,
1373                               sizeof(rcaplong),
1374                               /*sense_len*/ SSD_FULL_SIZE,
1375                               /*timeout*/ probe_timeout ? probe_timeout : 5000);
1376
1377         /* Disable freezing the device queue */
1378         ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1379
1380         if (arglist & CAMDD_ARG_ERR_RECOVER)
1381                 ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1382
1383         if (cam_send_ccb(cam_dev, ccb) < 0) {
1384                 warn("error sending READ CAPACITY (16) command");
1385                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1386                                 CAM_EPF_ALL, stderr);
1387                 goto bailout;
1388         }
1389
1390         if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1391                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1392                 goto bailout;
1393         }
1394
1395         *maxsector = scsi_8btou64(rcaplong.addr);
1396         *block_len = scsi_4btoul(rcaplong.length);
1397
1398         retval = 0;
1399
1400 bailout:
1401         return retval;
1402 }
1403
1404 /*
1405  * Need to implement this.  Do a basic probe:
1406  * - Check the inquiry data, make sure we're talking to a device that we
1407  *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
1408  * - Send a test unit ready, make sure the device is available.
1409  * - Get the capacity and block size.
1410  */
1411 struct camdd_dev *
1412 camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
1413                  camdd_argmask arglist, int probe_retry_count,
1414                  int probe_timeout, int io_retry_count, int io_timeout)
1415 {
1416         union ccb *ccb;
1417         uint64_t maxsector = 0;
1418         uint32_t cpi_maxio, max_iosize, pass_numblocks;
1419         uint32_t block_len = 0;
1420         struct camdd_dev *dev = NULL;
1421         struct camdd_dev_pass *pass_dev;
1422         struct kevent ke;
1423         struct ccb_getdev cgd;
1424         int retval;
1425         int scsi_dev_type;
1426
1427         if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) {
1428                 warnx("%s: error retrieving CGD", __func__);
1429                 return NULL;
1430         }
1431
1432         ccb = cam_getccb(cam_dev);
1433
1434         if (ccb == NULL) {
1435                 warnx("%s: error allocating ccb", __func__);
1436                 goto bailout;
1437         }
1438
1439         switch (cgd.protocol) {
1440         case PROTO_SCSI:
1441                 scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
1442
1443                 /*
1444                  * For devices that support READ CAPACITY, we'll attempt to get the
1445                  * capacity.  Otherwise, we really don't support tape or other
1446                  * devices via SCSI passthrough, so just return an error in that case.
1447                  */
1448                 switch (scsi_dev_type) {
1449                 case T_DIRECT:
1450                 case T_WORM:
1451                 case T_CDROM:
1452                 case T_OPTICAL:
1453                 case T_RBC:
1454                 case T_ZBC_HM:
1455                         break;
1456                 default:
1457                         errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
1458                         break; /*NOTREACHED*/
1459                 }
1460
1461                 if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count,
1462                                                 arglist, probe_timeout, &maxsector,
1463                                                 &block_len))) {
1464                         goto bailout;
1465                 }
1466                 break;
1467         default:
1468                 errx(1, "Unsupported PROTO type %d", cgd.protocol);
1469                 break; /*NOTREACHED*/
1470         }
1471
1472         if (block_len == 0) {
1473                 warnx("Sector size for %s%u is 0, cannot continue",
1474                     cam_dev->device_name, cam_dev->dev_unit_num);
1475                 goto bailout_error;
1476         }
1477
1478         CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi);
1479
1480         ccb->ccb_h.func_code = XPT_PATH_INQ;
1481         ccb->ccb_h.flags = CAM_DIR_NONE;
1482         ccb->ccb_h.retry_count = 1;
1483         
1484         if (cam_send_ccb(cam_dev, ccb) < 0) {
1485                 warn("error sending XPT_PATH_INQ CCB");
1486
1487                 cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1488                                 CAM_EPF_ALL, stderr);
1489                 goto bailout;
1490         }
1491
1492         EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1493
1494         dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
1495                               io_timeout);
1496         if (dev == NULL)
1497                 goto bailout;
1498
1499         pass_dev = &dev->dev_spec.pass;
1500         pass_dev->scsi_dev_type = scsi_dev_type;
1501         pass_dev->protocol = cgd.protocol;
1502         pass_dev->dev = cam_dev;
1503         pass_dev->max_sector = maxsector;
1504         pass_dev->block_len = block_len;
1505         pass_dev->cpi_maxio = ccb->cpi.maxio;
1506         snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
1507                  pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
1508         dev->sector_size = block_len;
1509         dev->max_sector = maxsector;
1510         
1511
1512         /*
1513          * Determine the optimal blocksize to use for this device.
1514          */
1515
1516         /*
1517          * If the controller has not specified a maximum I/O size,
1518          * just go with 128K as a somewhat conservative value.
1519          */
1520         if (pass_dev->cpi_maxio == 0)
1521                 cpi_maxio = 131072;
1522         else
1523                 cpi_maxio = pass_dev->cpi_maxio;
1524
1525         /*
1526          * If the controller has a large maximum I/O size, limit it
1527          * to something smaller so that the kernel doesn't have trouble
1528          * allocating buffers to copy data in and out for us.
1529          * XXX KDM this is until we have unmapped I/O support in the kernel.
1530          */
1531         max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
1532
1533         /*
1534          * If we weren't able to get a block size for some reason,
1535          * default to 512 bytes.
1536          */
1537         block_len = pass_dev->block_len;
1538         if (block_len == 0)
1539                 block_len = 512;
1540
1541         /*
1542          * Figure out how many blocksize chunks will fit in the
1543          * maximum I/O size.
1544          */
1545         pass_numblocks = max_iosize / block_len;
1546
1547         /*
1548          * And finally, multiple the number of blocks by the LBA
1549          * length to get our maximum block size;
1550          */
1551         dev->blocksize = pass_numblocks * block_len;
1552
1553         if (io_opts->blocksize != 0) {
1554                 if ((io_opts->blocksize % dev->sector_size) != 0) {
1555                         warnx("Blocksize %ju for %s is not a multiple of "
1556                               "sector size %u", (uintmax_t)io_opts->blocksize, 
1557                               dev->device_name, dev->sector_size);
1558                         goto bailout_error;
1559                 }
1560                 dev->blocksize = io_opts->blocksize;
1561         }
1562         dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
1563         if (io_opts->queue_depth != 0)
1564                 dev->target_queue_depth = io_opts->queue_depth;
1565
1566         if (io_opts->offset != 0) {
1567                 if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
1568                         warnx("Offset %ju is past the end of device %s",
1569                             io_opts->offset, dev->device_name);
1570                         goto bailout_error;
1571                 }
1572 #if 0
1573                 else if ((io_opts->offset % dev->sector_size) != 0) {
1574                         warnx("Offset %ju for %s is not a multiple of the "
1575                               "sector size %u", io_opts->offset, 
1576                               dev->device_name, dev->sector_size);
1577                         goto bailout_error;
1578                 }
1579                 dev->start_offset_bytes = io_opts->offset;
1580 #endif
1581         }
1582
1583         dev->min_cmd_size = io_opts->min_cmd_size;
1584
1585         dev->run = camdd_pass_run;
1586         dev->fetch = camdd_pass_fetch;
1587
1588 bailout:
1589         cam_freeccb(ccb);
1590
1591         return (dev);
1592
1593 bailout_error:
1594         cam_freeccb(ccb);
1595
1596         camdd_free_dev(dev);
1597
1598         return (NULL);
1599 }
1600
1601 void *
1602 camdd_worker(void *arg)
1603 {
1604         struct camdd_dev *dev = arg;
1605         struct camdd_buf *buf;
1606         struct timespec ts, *kq_ts;
1607
1608         ts.tv_sec = 0;
1609         ts.tv_nsec = 0;
1610
1611         pthread_mutex_lock(&dev->mutex);
1612
1613         dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
1614
1615         for (;;) {
1616                 struct kevent ke;
1617                 int retval = 0;
1618
1619                 /*
1620                  * XXX KDM check the reorder queue depth?
1621                  */
1622                 if (dev->write_dev == 0) {
1623                         uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
1624                         uint32_t target_depth = dev->target_queue_depth;
1625                         uint32_t peer_target_depth =
1626                             dev->peer_dev->target_queue_depth;
1627                         uint32_t peer_blocksize = dev->peer_dev->blocksize;
1628
1629                         camdd_get_depth(dev, &our_depth, &peer_depth,
1630                                         &our_bytes, &peer_bytes);
1631
1632 #if 0
1633                         while (((our_depth < target_depth)
1634                              && (peer_depth < peer_target_depth))
1635                             || ((peer_bytes + our_bytes) <
1636                                  (peer_blocksize * 2))) {
1637 #endif
1638                         while (((our_depth + peer_depth) <
1639                                 (target_depth + peer_target_depth))
1640                             || ((peer_bytes + our_bytes) <
1641                                 (peer_blocksize * 3))) {
1642
1643                                 retval = camdd_queue(dev, NULL);
1644                                 if (retval == 1)
1645                                         break;
1646                                 else if (retval != 0) {
1647                                         error_exit = 1;
1648                                         goto bailout;
1649                                 }
1650
1651                                 camdd_get_depth(dev, &our_depth, &peer_depth,
1652                                                 &our_bytes, &peer_bytes);
1653                         }
1654                 }
1655                 /*
1656                  * See if we have any I/O that is ready to execute.
1657                  */
1658                 buf = STAILQ_FIRST(&dev->run_queue);
1659                 if (buf != NULL) {
1660                         while (dev->target_queue_depth > dev->cur_active_io) {
1661                                 retval = dev->run(dev);
1662                                 if (retval == -1) {
1663                                         dev->flags |= CAMDD_DEV_FLAG_EOF;
1664                                         error_exit = 1;
1665                                         break;
1666                                 } else if (retval != 0) {
1667                                         break;
1668                                 }
1669                         }
1670                 }
1671
1672                 /*
1673                  * We've reached EOF, or our partner has reached EOF.
1674                  */
1675                 if ((dev->flags & CAMDD_DEV_FLAG_EOF)
1676                  || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
1677                         if (dev->write_dev != 0) {
1678                                 if ((STAILQ_EMPTY(&dev->work_queue))
1679                                  && (dev->num_run_queue == 0)
1680                                  && (dev->cur_active_io == 0)) {
1681                                         goto bailout;
1682                                 }
1683                         } else {
1684                                 /*
1685                                  * If we're the reader, and the writer
1686                                  * got EOF, he is already done.  If we got
1687                                  * the EOF, then we need to wait until
1688                                  * everything is flushed out for the writer.
1689                                  */
1690                                 if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
1691                                         goto bailout;
1692                                 } else if ((dev->num_peer_work_queue == 0)
1693                                         && (dev->num_peer_done_queue == 0)
1694                                         && (dev->cur_active_io == 0)
1695                                         && (dev->num_run_queue == 0)) {
1696                                         goto bailout;
1697                                 }
1698                         }
1699                         /*
1700                          * XXX KDM need to do something about the pending
1701                          * queue and cleanup resources.
1702                          */
1703                 } 
1704
1705                 if ((dev->write_dev == 0)
1706                  && (dev->cur_active_io == 0)
1707                  && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
1708                         kq_ts = &ts;
1709                 else
1710                         kq_ts = NULL;
1711
1712                 /*
1713                  * Run kevent to see if there are events to process.
1714                  */
1715                 pthread_mutex_unlock(&dev->mutex);
1716                 retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
1717                 pthread_mutex_lock(&dev->mutex);
1718                 if (retval == -1) {
1719                         warn("%s: error returned from kevent",__func__);
1720                         goto bailout;
1721                 } else if (retval != 0) {
1722                         switch (ke.filter) {
1723                         case EVFILT_READ:
1724                                 if (dev->fetch != NULL) {
1725                                         retval = dev->fetch(dev);
1726                                         if (retval == -1) {
1727                                                 error_exit = 1;
1728                                                 goto bailout;
1729                                         }
1730                                 }
1731                                 break;
1732                         case EVFILT_SIGNAL:
1733                                 /*
1734                                  * We register for this so we don't get
1735                                  * an error as a result of a SIGINFO or a
1736                                  * SIGINT.  It will actually get handled
1737                                  * by the signal handler.  If we get a
1738                                  * SIGINT, bail out without printing an
1739                                  * error message.  Any other signals 
1740                                  * will result in the error message above.
1741                                  */
1742                                 if (ke.ident == SIGINT)
1743                                         goto bailout;
1744                                 break;
1745                         case EVFILT_USER:
1746                                 retval = 0;
1747                                 /*
1748                                  * Check to see if the other thread has
1749                                  * queued any I/O for us to do.  (In this
1750                                  * case we're the writer.)
1751                                  */
1752                                 for (buf = STAILQ_FIRST(&dev->work_queue);
1753                                      buf != NULL;
1754                                      buf = STAILQ_FIRST(&dev->work_queue)) {
1755                                         STAILQ_REMOVE_HEAD(&dev->work_queue,
1756                                                            work_links);
1757                                         retval = camdd_queue(dev, buf);
1758                                         /*
1759                                          * We keep going unless we get an
1760                                          * actual error.  If we get EOF, we
1761                                          * still want to remove the buffers
1762                                          * from the queue and send the back
1763                                          * to the reader thread.
1764                                          */
1765                                         if (retval == -1) {
1766                                                 error_exit = 1;
1767                                                 goto bailout;
1768                                         } else
1769                                                 retval = 0;
1770                                 }
1771
1772                                 /*
1773                                  * Next check to see if the other thread has
1774                                  * queued any completed buffers back to us.
1775                                  * (In this case we're the reader.)
1776                                  */
1777                                 for (buf = STAILQ_FIRST(&dev->peer_done_queue);
1778                                      buf != NULL;
1779                                      buf = STAILQ_FIRST(&dev->peer_done_queue)){
1780                                         STAILQ_REMOVE_HEAD(
1781                                             &dev->peer_done_queue, work_links);
1782                                         dev->num_peer_done_queue--;
1783                                         camdd_peer_done(buf);
1784                                 }
1785                                 break;
1786                         default:
1787                                 warnx("%s: unknown kevent filter %d",
1788                                       __func__, ke.filter);
1789                                 break;
1790                         }
1791                 }
1792         }
1793
1794 bailout:
1795
1796         dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
1797
1798         /* XXX KDM cleanup resources here? */
1799
1800         pthread_mutex_unlock(&dev->mutex);
1801
1802         need_exit = 1;
1803         sem_post(&camdd_sem);
1804
1805         return (NULL);
1806 }
1807
1808 /*
1809  * Simplistic translation of CCB status to our local status.
1810  */
1811 camdd_buf_status
1812 camdd_ccb_status(union ccb *ccb, int protocol)
1813 {
1814         camdd_buf_status status = CAMDD_STATUS_NONE;
1815         cam_status ccb_status;
1816
1817         ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
1818
1819         switch (protocol) {
1820         case PROTO_SCSI:
1821                 switch (ccb_status) {
1822                 case CAM_REQ_CMP: {
1823                         if (ccb->csio.resid == 0) {
1824                                 status = CAMDD_STATUS_OK;
1825                         } else if (ccb->csio.dxfer_len > ccb->csio.resid) {
1826                                 status = CAMDD_STATUS_SHORT_IO;
1827                         } else {
1828                                 status = CAMDD_STATUS_EOF;
1829                         }
1830                         break;
1831                 }
1832                 case CAM_SCSI_STATUS_ERROR: {
1833                         switch (ccb->csio.scsi_status) {
1834                         case SCSI_STATUS_OK:
1835                         case SCSI_STATUS_COND_MET:
1836                         case SCSI_STATUS_INTERMED:
1837                         case SCSI_STATUS_INTERMED_COND_MET:
1838                                 status = CAMDD_STATUS_OK;
1839                                 break;
1840                         case SCSI_STATUS_CMD_TERMINATED:
1841                         case SCSI_STATUS_CHECK_COND:
1842                         case SCSI_STATUS_QUEUE_FULL:
1843                         case SCSI_STATUS_BUSY:
1844                         case SCSI_STATUS_RESERV_CONFLICT:
1845                         default:
1846                                 status = CAMDD_STATUS_ERROR;
1847                                 break;
1848                         }
1849                         break;
1850                 }
1851                 default:
1852                         status = CAMDD_STATUS_ERROR;
1853                         break;
1854                 }
1855                 break;
1856         default:
1857                 status = CAMDD_STATUS_ERROR;
1858                 break;
1859         }
1860
1861         return (status);
1862 }
1863
1864 /*
1865  * Queue a buffer to our peer's work thread for writing.
1866  *
1867  * Returns 0 for success, -1 for failure, 1 if the other thread exited.
1868  */
1869 int
1870 camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
1871 {
1872         struct kevent ke;
1873         STAILQ_HEAD(, camdd_buf) local_queue;
1874         struct camdd_buf *buf1, *buf2;
1875         struct camdd_buf_data *data = NULL;
1876         uint64_t peer_bytes_queued = 0;
1877         int active = 1;
1878         int retval = 0;
1879
1880         STAILQ_INIT(&local_queue);
1881
1882         /*
1883          * Since we're the reader, we need to queue our I/O to the writer
1884          * in sequential order in order to make sure it gets written out
1885          * in sequential order.
1886          *
1887          * Check the next expected I/O starting offset.  If this doesn't
1888          * match, put it on the reorder queue.
1889          */
1890         if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
1891
1892                 /*
1893                  * If there is nothing on the queue, there is no sorting
1894                  * needed.
1895                  */
1896                 if (STAILQ_EMPTY(&dev->reorder_queue)) {
1897                         STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
1898                         dev->num_reorder_queue++;
1899                         goto bailout;
1900                 }
1901
1902                 /*
1903                  * Sort in ascending order by starting LBA.  There should
1904                  * be no identical LBAs.
1905                  */
1906                 for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1907                      buf1 = buf2) {
1908                         buf2 = STAILQ_NEXT(buf1, links);
1909                         if (buf->lba < buf1->lba) {
1910                                 /*
1911                                  * If we're less than the first one, then
1912                                  * we insert at the head of the list
1913                                  * because this has to be the first element
1914                                  * on the list.
1915                                  */
1916                                 STAILQ_INSERT_HEAD(&dev->reorder_queue,
1917                                                    buf, links);
1918                                 dev->num_reorder_queue++;
1919                                 break;
1920                         } else if (buf->lba > buf1->lba) {
1921                                 if (buf2 == NULL) {
1922                                         STAILQ_INSERT_TAIL(&dev->reorder_queue, 
1923                                             buf, links);
1924                                         dev->num_reorder_queue++;
1925                                         break;
1926                                 } else if (buf->lba < buf2->lba) {
1927                                         STAILQ_INSERT_AFTER(&dev->reorder_queue,
1928                                             buf1, buf, links);
1929                                         dev->num_reorder_queue++;
1930                                         break;
1931                                 }
1932                         } else {
1933                                 errx(1, "Found buffers with duplicate LBA %ju!",
1934                                      buf->lba);
1935                         }
1936                 }
1937                 goto bailout;
1938         } else {
1939
1940                 /*
1941                  * We're the next expected I/O completion, so put ourselves
1942                  * on the local queue to be sent to the writer.  We use
1943                  * work_links here so that we can queue this to the 
1944                  * peer_work_queue before taking the buffer off of the
1945                  * local_queue.
1946                  */
1947                 dev->next_completion_pos_bytes += buf->len;
1948                 STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
1949
1950                 /*
1951                  * Go through the reorder queue looking for more sequential
1952                  * I/O and add it to the local queue.
1953                  */
1954                 for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1955                      buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
1956                         /*
1957                          * As soon as we see an I/O that is out of sequence,
1958                          * we're done.
1959                          */
1960                         if ((buf1->lba * dev->sector_size) !=
1961                              dev->next_completion_pos_bytes)
1962                                 break;
1963
1964                         STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
1965                         dev->num_reorder_queue--;
1966                         STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
1967                         dev->next_completion_pos_bytes += buf1->len;
1968                 }
1969         }
1970
1971         /*
1972          * Setup the event to let the other thread know that it has work
1973          * pending.
1974          */
1975         EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
1976                NOTE_TRIGGER, 0, NULL);
1977
1978         /*
1979          * Put this on our shadow queue so that we know what we've queued
1980          * to the other thread.
1981          */
1982         STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
1983                 if (buf1->buf_type != CAMDD_BUF_DATA) {
1984                         errx(1, "%s: should have a data buffer, not an "
1985                             "indirect buffer", __func__);
1986                 }
1987                 data = &buf1->buf_type_spec.data;
1988
1989                 /*
1990                  * We only need to send one EOF to the writer, and don't
1991                  * need to continue sending EOFs after that.
1992                  */
1993                 if (buf1->status == CAMDD_STATUS_EOF) {
1994                         if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
1995                                 STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
1996                                     work_links);
1997                                 camdd_release_buf(buf1);
1998                                 retval = 1;
1999                                 continue;
2000                         }
2001                         dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
2002                 }
2003
2004
2005                 STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
2006                 peer_bytes_queued += (data->fill_len - data->resid);
2007                 dev->peer_bytes_queued += (data->fill_len - data->resid);
2008                 dev->num_peer_work_queue++;
2009         }
2010
2011         if (STAILQ_FIRST(&local_queue) == NULL)
2012                 goto bailout;
2013
2014         /*
2015          * Drop our mutex and pick up the other thread's mutex.  We need to
2016          * do this to avoid deadlocks.
2017          */
2018         pthread_mutex_unlock(&dev->mutex);
2019         pthread_mutex_lock(&dev->peer_dev->mutex);
2020
2021         if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
2022                 /*
2023                  * Put the buffers on the other thread's incoming work queue.
2024                  */
2025                 for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2026                      buf1 = STAILQ_FIRST(&local_queue)) {
2027                         STAILQ_REMOVE_HEAD(&local_queue, work_links);
2028                         STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
2029                                            work_links);
2030                 }
2031                 /*
2032                  * Send an event to the other thread's kqueue to let it know
2033                  * that there is something on the work queue.
2034                  */
2035                 retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2036                 if (retval == -1)
2037                         warn("%s: unable to add peer work_queue kevent",
2038                              __func__);
2039                 else
2040                         retval = 0;
2041         } else
2042                 active = 0;
2043
2044         pthread_mutex_unlock(&dev->peer_dev->mutex);
2045         pthread_mutex_lock(&dev->mutex);
2046
2047         /*
2048          * If the other side isn't active, run through the queue and
2049          * release all of the buffers.
2050          */
2051         if (active == 0) {
2052                 for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2053                      buf1 = STAILQ_FIRST(&local_queue)) {
2054                         STAILQ_REMOVE_HEAD(&local_queue, work_links);
2055                         STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
2056                                       links);
2057                         dev->num_peer_work_queue--;
2058                         camdd_release_buf(buf1);
2059                 }
2060                 dev->peer_bytes_queued -= peer_bytes_queued;
2061                 retval = 1;
2062         }
2063
2064 bailout:
2065         return (retval);
2066 }
2067
2068 /*
2069  * Return a buffer to the reader thread when we have completed writing it.
2070  */
2071 int
2072 camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
2073 {
2074         struct kevent ke;
2075         int retval = 0;
2076
2077         /*
2078          * Setup the event to let the other thread know that we have
2079          * completed a buffer.
2080          */
2081         EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
2082                NOTE_TRIGGER, 0, NULL);
2083
2084         /*
2085          * Drop our lock and acquire the other thread's lock before
2086          * manipulating 
2087          */
2088         pthread_mutex_unlock(&dev->mutex);
2089         pthread_mutex_lock(&dev->peer_dev->mutex);
2090
2091         /*
2092          * Put the buffer on the reader thread's peer done queue now that
2093          * we have completed it.
2094          */
2095         STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
2096                            work_links);
2097         dev->peer_dev->num_peer_done_queue++;
2098
2099         /*
2100          * Send an event to the peer thread to let it know that we've added
2101          * something to its peer done queue.
2102          */
2103         retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2104         if (retval == -1)
2105                 warn("%s: unable to add peer_done_queue kevent", __func__);
2106         else
2107                 retval = 0;
2108
2109         /*
2110          * Drop the other thread's lock and reacquire ours.
2111          */
2112         pthread_mutex_unlock(&dev->peer_dev->mutex);
2113         pthread_mutex_lock(&dev->mutex);
2114
2115         return (retval);
2116 }
2117
2118 /*
2119  * Free a buffer that was written out by the writer thread and returned to
2120  * the reader thread.
2121  */
2122 void
2123 camdd_peer_done(struct camdd_buf *buf)
2124 {
2125         struct camdd_dev *dev;
2126         struct camdd_buf_data *data;
2127
2128         dev = buf->dev;
2129         if (buf->buf_type != CAMDD_BUF_DATA) {
2130                 errx(1, "%s: should have a data buffer, not an "
2131                     "indirect buffer", __func__);
2132         }
2133
2134         data = &buf->buf_type_spec.data;
2135
2136         STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
2137         dev->num_peer_work_queue--;
2138         dev->peer_bytes_queued -= (data->fill_len - data->resid);
2139
2140         if (buf->status == CAMDD_STATUS_EOF)
2141                 dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2142
2143         STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2144 }
2145
2146 /*
2147  * Assumes caller holds the lock for this device.
2148  */
2149 void
2150 camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
2151                    int *error_count)
2152 {
2153         int retval = 0;
2154
2155         /*
2156          * If we're the reader, we need to send the completed I/O
2157          * to the writer.  If we're the writer, we need to just
2158          * free up resources, or let the reader know if we've
2159          * encountered an error.
2160          */
2161         if (dev->write_dev == 0) {
2162                 retval = camdd_queue_peer_buf(dev, buf);
2163                 if (retval != 0)
2164                         (*error_count)++;
2165         } else {
2166                 struct camdd_buf *tmp_buf, *next_buf;
2167
2168                 STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
2169                                     next_buf) {
2170                         struct camdd_buf *src_buf;
2171                         struct camdd_buf_indirect *indirect;
2172
2173                         STAILQ_REMOVE(&buf->src_list, tmp_buf,
2174                                       camdd_buf, src_links);
2175
2176                         tmp_buf->status = buf->status;
2177
2178                         if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
2179                                 camdd_complete_peer_buf(dev, tmp_buf);
2180                                 continue;
2181                         }
2182
2183                         indirect = &tmp_buf->buf_type_spec.indirect;
2184                         src_buf = indirect->src_buf;
2185                         src_buf->refcount--;
2186                         /*
2187                          * XXX KDM we probably need to account for
2188                          * exactly how many bytes we were able to
2189                          * write.  Allocate the residual to the
2190                          * first N buffers?  Or just track the
2191                          * number of bytes written?  Right now the reader
2192                          * doesn't do anything with a residual.
2193                          */
2194                         src_buf->status = buf->status;
2195                         if (src_buf->refcount <= 0)
2196                                 camdd_complete_peer_buf(dev, src_buf);
2197                         STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
2198                                            tmp_buf, links);
2199                 }
2200
2201                 STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2202         }
2203 }
2204
2205 /*
2206  * Fetch all completed commands from the pass(4) device.
2207  *
2208  * Returns the number of commands received, or -1 if any of the commands
2209  * completed with an error.  Returns 0 if no commands are available.
2210  */
2211 int
2212 camdd_pass_fetch(struct camdd_dev *dev)
2213 {
2214         struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2215         union ccb ccb;
2216         int retval = 0, num_fetched = 0, error_count = 0;
2217
2218         pthread_mutex_unlock(&dev->mutex);
2219         /*
2220          * XXX KDM we don't distinguish between EFAULT and ENOENT.
2221          */
2222         while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
2223                 struct camdd_buf *buf;
2224                 struct camdd_buf_data *data;
2225                 cam_status ccb_status;
2226                 union ccb *buf_ccb;
2227
2228                 buf = ccb.ccb_h.ccb_buf;
2229                 data = &buf->buf_type_spec.data;
2230                 buf_ccb = &data->ccb;
2231
2232                 num_fetched++;
2233
2234                 /*
2235                  * Copy the CCB back out so we get status, sense data, etc.
2236                  */
2237                 bcopy(&ccb, buf_ccb, sizeof(ccb));
2238
2239                 pthread_mutex_lock(&dev->mutex);
2240
2241                 /*
2242                  * We're now done, so take this off the active queue.
2243                  */
2244                 STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
2245                 dev->cur_active_io--;
2246
2247                 ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
2248                 if (ccb_status != CAM_REQ_CMP) {
2249                         cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
2250                                         CAM_EPF_ALL, stderr);
2251                 }
2252
2253                 switch (pass_dev->protocol) {
2254                 case PROTO_SCSI:
2255                         data->resid = ccb.csio.resid;
2256                         dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
2257                         break;
2258                 default:
2259                         return -1;
2260                         break;
2261                 }
2262
2263                 if (buf->status == CAMDD_STATUS_NONE)
2264                         buf->status = camdd_ccb_status(&ccb, pass_dev->protocol);
2265                 if (buf->status == CAMDD_STATUS_ERROR)
2266                         error_count++;
2267                 else if (buf->status == CAMDD_STATUS_EOF) {
2268                         /*
2269                          * Once we queue this buffer to our partner thread,
2270                          * he will know that we've hit EOF.
2271                          */
2272                         dev->flags |= CAMDD_DEV_FLAG_EOF;
2273                 }
2274
2275                 camdd_complete_buf(dev, buf, &error_count);
2276
2277                 /*
2278                  * Unlock in preparation for the ioctl call.
2279                  */
2280                 pthread_mutex_unlock(&dev->mutex);
2281         }
2282
2283         pthread_mutex_lock(&dev->mutex);
2284
2285         if (error_count > 0)
2286                 return (-1);
2287         else
2288                 return (num_fetched);
2289 }
2290
2291 /*
2292  * Returns -1 for error, 0 for success/continue, and 1 for resource
2293  * shortage/stop processing.
2294  */
2295 int
2296 camdd_file_run(struct camdd_dev *dev)
2297 {
2298         struct camdd_dev_file *file_dev = &dev->dev_spec.file;
2299         struct camdd_buf_data *data;
2300         struct camdd_buf *buf;
2301         off_t io_offset;
2302         int retval = 0, write_dev = dev->write_dev;
2303         int error_count = 0, no_resources = 0, double_buf_needed = 0;
2304         uint32_t num_sectors = 0, db_len = 0;
2305
2306         buf = STAILQ_FIRST(&dev->run_queue);
2307         if (buf == NULL) {
2308                 no_resources = 1;
2309                 goto bailout;
2310         } else if ((dev->write_dev == 0)
2311                 && (dev->flags & (CAMDD_DEV_FLAG_EOF |
2312                                   CAMDD_DEV_FLAG_EOF_SENT))) {
2313                 STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2314                 dev->num_run_queue--;
2315                 buf->status = CAMDD_STATUS_EOF;
2316                 error_count++;
2317                 goto bailout;
2318         }
2319
2320         /*
2321          * If we're writing, we need to go through the source buffer list
2322          * and create an S/G list.
2323          */
2324         if (write_dev != 0) {
2325                 retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
2326                     dev->sector_size, &num_sectors, &double_buf_needed);
2327                 if (retval != 0) {
2328                         no_resources = 1;
2329                         goto bailout;
2330                 }
2331         }
2332
2333         STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2334         dev->num_run_queue--;
2335
2336         data = &buf->buf_type_spec.data;
2337
2338         /*
2339          * pread(2) and pwrite(2) offsets are byte offsets.
2340          */
2341         io_offset = buf->lba * dev->sector_size;
2342
2343         /*
2344          * Unlock the mutex while we read or write.
2345          */
2346         pthread_mutex_unlock(&dev->mutex);
2347
2348         /*
2349          * Note that we don't need to double buffer if we're the reader
2350          * because in that case, we have allocated a single buffer of
2351          * sufficient size to do the read.  This copy is necessary on
2352          * writes because if one of the components of the S/G list is not
2353          * a sector size multiple, the kernel will reject the write.  This
2354          * is unfortunate but not surprising.  So this will make sure that
2355          * we're using a single buffer that is a multiple of the sector size.
2356          */
2357         if ((double_buf_needed != 0)
2358          && (data->sg_count > 1)
2359          && (write_dev != 0)) {
2360                 uint32_t cur_offset;
2361                 int i;
2362
2363                 if (file_dev->tmp_buf == NULL)
2364                         file_dev->tmp_buf = calloc(dev->blocksize, 1);
2365                 if (file_dev->tmp_buf == NULL) {
2366                         buf->status = CAMDD_STATUS_ERROR;
2367                         error_count++;
2368                         pthread_mutex_lock(&dev->mutex);
2369                         goto bailout;
2370                 }
2371                 for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
2372                         bcopy(data->iovec[i].iov_base,
2373                             &file_dev->tmp_buf[cur_offset],
2374                             data->iovec[i].iov_len);
2375                         cur_offset += data->iovec[i].iov_len;
2376                 }
2377                 db_len = cur_offset;
2378         }
2379
2380         if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
2381                 if (write_dev == 0) {
2382                         /*
2383                          * XXX KDM is there any way we would need a S/G
2384                          * list here?
2385                          */
2386                         retval = pread(file_dev->fd, data->buf,
2387                             buf->len, io_offset);
2388                 } else {
2389                         if (double_buf_needed != 0) {
2390                                 retval = pwrite(file_dev->fd, file_dev->tmp_buf,
2391                                     db_len, io_offset);
2392                         } else if (data->sg_count == 0) {
2393                                 retval = pwrite(file_dev->fd, data->buf,
2394                                     data->fill_len, io_offset);
2395                         } else {
2396                                 retval = pwritev(file_dev->fd, data->iovec,
2397                                     data->sg_count, io_offset);
2398                         }
2399                 }
2400         } else {
2401                 if (write_dev == 0) {
2402                         /*
2403                          * XXX KDM is there any way we would need a S/G
2404                          * list here?
2405                          */
2406                         retval = read(file_dev->fd, data->buf, buf->len);
2407                 } else {
2408                         if (double_buf_needed != 0) {
2409                                 retval = write(file_dev->fd, file_dev->tmp_buf,
2410                                     db_len);
2411                         } else if (data->sg_count == 0) {
2412                                 retval = write(file_dev->fd, data->buf,
2413                                     data->fill_len);
2414                         } else {
2415                                 retval = writev(file_dev->fd, data->iovec,
2416                                     data->sg_count);
2417                         }
2418                 }
2419         }
2420
2421         /* We're done, re-acquire the lock */
2422         pthread_mutex_lock(&dev->mutex);
2423
2424         if (retval >= (ssize_t)data->fill_len) {
2425                 /*
2426                  * If the bytes transferred is more than the request size,
2427                  * that indicates an overrun, which should only happen at
2428                  * the end of a transfer if we have to round up to a sector
2429                  * boundary.
2430                  */
2431                 if (buf->status == CAMDD_STATUS_NONE)
2432                         buf->status = CAMDD_STATUS_OK;
2433                 data->resid = 0;
2434                 dev->bytes_transferred += retval;
2435         } else if (retval == -1) {
2436                 warn("Error %s %s", (write_dev) ? "writing to" :
2437                     "reading from", file_dev->filename);
2438
2439                 buf->status = CAMDD_STATUS_ERROR;
2440                 data->resid = data->fill_len;
2441                 error_count++;
2442
2443                 if (dev->debug == 0)
2444                         goto bailout;
2445
2446                 if ((double_buf_needed != 0)
2447                  && (write_dev != 0)) {
2448                         fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
2449                             "offset %ju\n", __func__, file_dev->fd,
2450                             file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
2451                             (uintmax_t)io_offset);
2452                 } else if (data->sg_count == 0) {
2453                         fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
2454                             "offset %ju\n", __func__, file_dev->fd, data->buf,
2455                             data->fill_len, (uintmax_t)buf->lba,
2456                             (uintmax_t)io_offset);
2457                 } else {
2458                         int i;
2459
2460                         fprintf(stderr, "%s: fd %d, len %u, lba %ju "
2461                             "offset %ju\n", __func__, file_dev->fd, 
2462                             data->fill_len, (uintmax_t)buf->lba,
2463                             (uintmax_t)io_offset);
2464
2465                         for (i = 0; i < data->sg_count; i++) {
2466                                 fprintf(stderr, "index %d ptr %p len %zu\n",
2467                                     i, data->iovec[i].iov_base,
2468                                     data->iovec[i].iov_len);
2469                         }
2470                 }
2471         } else if (retval == 0) {
2472                 buf->status = CAMDD_STATUS_EOF;
2473                 if (dev->debug != 0)
2474                         printf("%s: got EOF from %s!\n", __func__,
2475                             file_dev->filename);
2476                 data->resid = data->fill_len;
2477                 error_count++;
2478         } else if (retval < (ssize_t)data->fill_len) {
2479                 if (buf->status == CAMDD_STATUS_NONE)
2480                         buf->status = CAMDD_STATUS_SHORT_IO;
2481                 data->resid = data->fill_len - retval;
2482                 dev->bytes_transferred += retval;
2483         }
2484
2485 bailout:
2486         if (buf != NULL) {
2487                 if (buf->status == CAMDD_STATUS_EOF) {
2488                         struct camdd_buf *buf2;
2489                         dev->flags |= CAMDD_DEV_FLAG_EOF;
2490                         STAILQ_FOREACH(buf2, &dev->run_queue, links)
2491                                 buf2->status = CAMDD_STATUS_EOF;
2492                 }
2493
2494                 camdd_complete_buf(dev, buf, &error_count);
2495         }
2496
2497         if (error_count != 0)
2498                 return (-1);
2499         else if (no_resources != 0)
2500                 return (1);
2501         else
2502                 return (0);
2503 }
2504
2505 /*
2506  * Execute one command from the run queue.  Returns 0 for success, 1 for
2507  * stop processing, and -1 for error.
2508  */
2509 int
2510 camdd_pass_run(struct camdd_dev *dev)
2511 {
2512         struct camdd_buf *buf = NULL;
2513         struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2514         struct camdd_buf_data *data;
2515         uint32_t num_blocks, sectors_used = 0;
2516         union ccb *ccb;
2517         int retval = 0, is_write = dev->write_dev;
2518         int double_buf_needed = 0;
2519
2520         buf = STAILQ_FIRST(&dev->run_queue);
2521         if (buf == NULL) {
2522                 retval = 1;
2523                 goto bailout;
2524         }
2525
2526         /*
2527          * If we're writing, we need to go through the source buffer list
2528          * and create an S/G list.
2529          */
2530         if (is_write != 0) {
2531                 retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
2532                     &sectors_used, &double_buf_needed);
2533                 if (retval != 0) {
2534                         retval = -1;
2535                         goto bailout;
2536                 }
2537         }
2538
2539         STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2540         dev->num_run_queue--;
2541
2542         data = &buf->buf_type_spec.data;
2543
2544         /*
2545          * In almost every case the number of blocks should be the device
2546          * block size.  The exception may be at the end of an I/O stream
2547          * for a partial block or at the end of a device.
2548          */
2549         if (is_write != 0)
2550                 num_blocks = sectors_used;
2551         else
2552                 num_blocks = data->fill_len / pass_dev->block_len;
2553
2554         ccb = &data->ccb;
2555
2556         switch (pass_dev->protocol) {
2557         case PROTO_SCSI:
2558                 CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
2559
2560                 scsi_read_write(&ccb->csio,
2561                                 /*retries*/ dev->retry_count,
2562                                 /*cbfcnp*/ NULL,
2563                                 /*tag_action*/ MSG_SIMPLE_Q_TAG,
2564                                 /*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
2565                                            SCSI_RW_WRITE,
2566                                 /*byte2*/ 0,
2567                                 /*minimum_cmd_size*/ dev->min_cmd_size,
2568                                 /*lba*/ buf->lba,
2569                                 /*block_count*/ num_blocks,
2570                                 /*data_ptr*/ (data->sg_count != 0) ?
2571                                              (uint8_t *)data->segs : data->buf,
2572                                 /*dxfer_len*/ (num_blocks * pass_dev->block_len),
2573                                 /*sense_len*/ SSD_FULL_SIZE,
2574                                 /*timeout*/ dev->io_timeout);
2575
2576                 if (data->sg_count != 0) {
2577                         ccb->csio.sglist_cnt = data->sg_count;
2578                 }
2579                 break;
2580         default:
2581                 retval = -1;
2582                 goto bailout;
2583         }
2584
2585         /* Disable freezing the device queue */
2586         ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
2587
2588         if (dev->retry_count != 0)
2589                 ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
2590
2591         if (data->sg_count != 0) {
2592                 ccb->ccb_h.flags |= CAM_DATA_SG;
2593         }
2594
2595         /*
2596          * Store a pointer to the buffer in the CCB.  The kernel will
2597          * restore this when we get it back, and we'll use it to identify
2598          * the buffer this CCB came from.
2599          */
2600         ccb->ccb_h.ccb_buf = buf;
2601
2602         /*
2603          * Unlock our mutex in preparation for issuing the ioctl.
2604          */
2605         pthread_mutex_unlock(&dev->mutex);
2606         /*
2607          * Queue the CCB to the pass(4) driver.
2608          */
2609         if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
2610                 pthread_mutex_lock(&dev->mutex);
2611
2612                 warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
2613                      pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
2614                 warn("%s: CCB address is %p", __func__, ccb);
2615                 retval = -1;
2616
2617                 STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2618         } else {
2619                 pthread_mutex_lock(&dev->mutex);
2620
2621                 dev->cur_active_io++;
2622                 STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
2623         }
2624
2625 bailout:
2626         return (retval);
2627 }
2628
2629 int
2630 camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
2631 {
2632         struct camdd_dev_pass *pass_dev;
2633         uint32_t num_blocks;
2634         int retval = 0;
2635
2636         pass_dev = &dev->dev_spec.pass;
2637
2638         *lba = dev->next_io_pos_bytes / dev->sector_size;
2639         *len = dev->blocksize;
2640         num_blocks = *len / dev->sector_size;
2641
2642         /*
2643          * If max_sector is 0, then we have no set limit.  This can happen
2644          * if we're writing to a file in a filesystem, or reading from
2645          * something like /dev/zero.
2646          */
2647         if ((dev->max_sector != 0)
2648          || (dev->sector_io_limit != 0)) {
2649                 uint64_t max_sector;
2650
2651                 if ((dev->max_sector != 0)
2652                  && (dev->sector_io_limit != 0)) 
2653                         max_sector = min(dev->sector_io_limit, dev->max_sector);
2654                 else if (dev->max_sector != 0)
2655                         max_sector = dev->max_sector;
2656                 else
2657                         max_sector = dev->sector_io_limit;
2658
2659
2660                 /*
2661                  * Check to see whether we're starting off past the end of
2662                  * the device.  If so, we need to just send an EOF      
2663                  * notification to the writer.
2664                  */
2665                 if (*lba > max_sector) {
2666                         *len = 0;
2667                         retval = 1;
2668                 } else if (((*lba + num_blocks) > max_sector + 1)
2669                         || ((*lba + num_blocks) < *lba)) {
2670                         /*
2671                          * If we get here (but pass the first check), we
2672                          * can trim the request length down to go to the
2673                          * end of the device.
2674                          */
2675                         num_blocks = (max_sector + 1) - *lba;
2676                         *len = num_blocks * dev->sector_size;
2677                         retval = 1;
2678                 }
2679         }
2680
2681         dev->next_io_pos_bytes += *len;
2682
2683         return (retval);
2684 }
2685
2686 /*
2687  * Returns 0 for success, 1 for EOF detected, and -1 for failure.
2688  */
2689 int
2690 camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
2691 {
2692         struct camdd_buf *buf = NULL;
2693         struct camdd_buf_data *data;
2694         struct camdd_dev_pass *pass_dev;
2695         size_t new_len;
2696         struct camdd_buf_data *rb_data;
2697         int is_write = dev->write_dev;
2698         int eof_flush_needed = 0;
2699         int retval = 0;
2700         int error;
2701
2702         pass_dev = &dev->dev_spec.pass;
2703
2704         /*
2705          * If we've gotten EOF or our partner has, we should not continue
2706          * queueing I/O.  If we're a writer, though, we should continue
2707          * to write any buffers that don't have EOF status.
2708          */
2709         if ((dev->flags & CAMDD_DEV_FLAG_EOF)
2710          || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
2711           && (is_write == 0))) {
2712                 /*
2713                  * Tell the worker thread that we have seen EOF.
2714                  */
2715                 retval = 1;
2716
2717                 /*
2718                  * If we're the writer, send the buffer back with EOF status.
2719                  */
2720                 if (is_write) {
2721                         read_buf->status = CAMDD_STATUS_EOF;
2722                         
2723                         error = camdd_complete_peer_buf(dev, read_buf);
2724                 }
2725                 goto bailout;
2726         }
2727
2728         if (is_write == 0) {
2729                 buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2730                 if (buf == NULL) {
2731                         retval = -1;
2732                         goto bailout;
2733                 }
2734                 data = &buf->buf_type_spec.data;
2735
2736                 retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
2737                 if (retval != 0) {
2738                         buf->status = CAMDD_STATUS_EOF;
2739
2740                         if ((buf->len == 0)
2741                          && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
2742                              CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
2743                                 camdd_release_buf(buf);
2744                                 goto bailout;
2745                         }
2746                         dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
2747                 }
2748
2749                 data->fill_len = buf->len;
2750                 data->src_start_offset = buf->lba * dev->sector_size;
2751
2752                 /*
2753                  * Put this on the run queue.
2754                  */
2755                 STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2756                 dev->num_run_queue++;
2757
2758                 /* We're done. */
2759                 goto bailout;
2760         }
2761
2762         /*
2763          * Check for new EOF status from the reader.
2764          */
2765         if ((read_buf->status == CAMDD_STATUS_EOF)
2766          || (read_buf->status == CAMDD_STATUS_ERROR)) {
2767                 dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2768                 if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
2769                  && (read_buf->len == 0)) {
2770                         camdd_complete_peer_buf(dev, read_buf);
2771                         retval = 1;
2772                         goto bailout;
2773                 } else
2774                         eof_flush_needed = 1;
2775         }
2776
2777         /*
2778          * See if we have a buffer we're composing with pieces from our
2779          * partner thread.
2780          */
2781         buf = STAILQ_FIRST(&dev->pending_queue);
2782         if (buf == NULL) {
2783                 uint64_t lba;
2784                 ssize_t len;
2785
2786                 retval = camdd_get_next_lba_len(dev, &lba, &len);
2787                 if (retval != 0) {
2788                         read_buf->status = CAMDD_STATUS_EOF;
2789
2790                         if (len == 0) {
2791                                 dev->flags |= CAMDD_DEV_FLAG_EOF;
2792                                 error = camdd_complete_peer_buf(dev, read_buf);
2793                                 goto bailout;
2794                         }
2795                 }
2796
2797                 /*
2798                  * If we don't have a pending buffer, we need to grab a new
2799                  * one from the free list or allocate another one.
2800                  */
2801                 buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2802                 if (buf == NULL) {
2803                         retval = 1;
2804                         goto bailout;
2805                 }
2806
2807                 buf->lba = lba;
2808                 buf->len = len;
2809
2810                 STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
2811                 dev->num_pending_queue++;
2812         }
2813
2814         data = &buf->buf_type_spec.data;
2815
2816         rb_data = &read_buf->buf_type_spec.data;
2817
2818         if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
2819          && (dev->debug != 0)) {
2820                 printf("%s: WARNING: reader offset %#jx != expected offset "
2821                     "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
2822                     (uintmax_t)dev->next_peer_pos_bytes);
2823         }
2824         dev->next_peer_pos_bytes = rb_data->src_start_offset +
2825             (rb_data->fill_len - rb_data->resid);
2826
2827         new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
2828         if (new_len < buf->len) {
2829                 /*
2830                  * There are three cases here:
2831                  * 1. We need more data to fill up a block, so we put 
2832                  *    this I/O on the queue and wait for more I/O.
2833                  * 2. We have a pending buffer in the queue that is
2834                  *    smaller than our blocksize, but we got an EOF.  So we
2835                  *    need to go ahead and flush the write out.
2836                  * 3. We got an error.
2837                  */
2838
2839                 /*
2840                  * Increment our fill length.
2841                  */
2842                 data->fill_len += (rb_data->fill_len - rb_data->resid);
2843
2844                 /*
2845                  * Add the new read buffer to the list for writing.
2846                  */
2847                 STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2848
2849                 /* Increment the count */
2850                 buf->src_count++;
2851
2852                 if (eof_flush_needed == 0) {
2853                         /*
2854                          * We need to exit, because we don't have enough
2855                          * data yet.
2856                          */
2857                         goto bailout;
2858                 } else {
2859                         /*
2860                          * Take the buffer off of the pending queue.
2861                          */
2862                         STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2863                                       links);
2864                         dev->num_pending_queue--;
2865
2866                         /*
2867                          * If we need an EOF flush, but there is no data
2868                          * to flush, go ahead and return this buffer.
2869                          */
2870                         if (data->fill_len == 0) {
2871                                 camdd_complete_buf(dev, buf, /*error_count*/0);
2872                                 retval = 1;
2873                                 goto bailout;
2874                         }
2875
2876                         /*
2877                          * Put this on the next queue for execution.
2878                          */
2879                         STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2880                         dev->num_run_queue++;
2881                 }
2882         } else if (new_len == buf->len) {
2883                 /*
2884                  * We have enough data to completey fill one block,
2885                  * so we're ready to issue the I/O.
2886                  */
2887
2888                 /*
2889                  * Take the buffer off of the pending queue.
2890                  */
2891                 STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
2892                 dev->num_pending_queue--;
2893
2894                 /*
2895                  * Add the new read buffer to the list for writing.
2896                  */
2897                 STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2898
2899                 /* Increment the count */
2900                 buf->src_count++;
2901
2902                 /*
2903                  * Increment our fill length.
2904                  */
2905                 data->fill_len += (rb_data->fill_len - rb_data->resid);
2906
2907                 /*
2908                  * Put this on the next queue for execution.
2909                  */
2910                 STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2911                 dev->num_run_queue++;
2912         } else {
2913                 struct camdd_buf *idb;
2914                 struct camdd_buf_indirect *indirect;
2915                 uint32_t len_to_go, cur_offset;
2916
2917                 
2918                 idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
2919                 if (idb == NULL) {
2920                         retval = 1;
2921                         goto bailout;
2922                 }
2923                 indirect = &idb->buf_type_spec.indirect;
2924                 indirect->src_buf = read_buf;
2925                 read_buf->refcount++;
2926                 indirect->offset = 0;
2927                 indirect->start_ptr = rb_data->buf;
2928                 /*
2929                  * We've already established that there is more
2930                  * data in read_buf than we have room for in our
2931                  * current write request.  So this particular chunk
2932                  * of the request should just be the remainder
2933                  * needed to fill up a block.
2934                  */
2935                 indirect->len = buf->len - (data->fill_len - data->resid);
2936
2937                 camdd_buf_add_child(buf, idb);
2938
2939                 /*
2940                  * This buffer is ready to execute, so we can take
2941                  * it off the pending queue and put it on the run
2942                  * queue.
2943                  */
2944                 STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2945                               links);
2946                 dev->num_pending_queue--;
2947                 STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2948                 dev->num_run_queue++;
2949
2950                 cur_offset = indirect->offset + indirect->len;
2951
2952                 /*
2953                  * The resulting I/O would be too large to fit in
2954                  * one block.  We need to split this I/O into
2955                  * multiple pieces.  Allocate as many buffers as needed.
2956                  */
2957                 for (len_to_go = rb_data->fill_len - rb_data->resid -
2958                      indirect->len; len_to_go > 0;) {
2959                         struct camdd_buf *new_buf;
2960                         struct camdd_buf_data *new_data;
2961                         uint64_t lba;
2962                         ssize_t len;
2963
2964                         retval = camdd_get_next_lba_len(dev, &lba, &len);
2965                         if ((retval != 0)
2966                          && (len == 0)) {
2967                                 /*
2968                                  * The device has already been marked
2969                                  * as EOF, and there is no space left.
2970                                  */
2971                                 goto bailout;
2972                         }
2973
2974                         new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2975                         if (new_buf == NULL) {
2976                                 retval = 1;
2977                                 goto bailout;
2978                         }
2979
2980                         new_buf->lba = lba;
2981                         new_buf->len = len;
2982
2983                         idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
2984                         if (idb == NULL) {
2985                                 retval = 1;
2986                                 goto bailout;
2987                         }
2988
2989                         indirect = &idb->buf_type_spec.indirect;
2990
2991                         indirect->src_buf = read_buf;
2992                         read_buf->refcount++;
2993                         indirect->offset = cur_offset;
2994                         indirect->start_ptr = rb_data->buf + cur_offset;
2995                         indirect->len = min(len_to_go, new_buf->len);
2996 #if 0
2997                         if (((indirect->len % dev->sector_size) != 0)
2998                          || ((indirect->offset % dev->sector_size) != 0)) {
2999                                 warnx("offset %ju len %ju not aligned with "
3000                                     "sector size %u", indirect->offset,
3001                                     (uintmax_t)indirect->len, dev->sector_size);
3002                         }
3003 #endif
3004                         cur_offset += indirect->len;
3005                         len_to_go -= indirect->len;
3006
3007                         camdd_buf_add_child(new_buf, idb);
3008
3009                         new_data = &new_buf->buf_type_spec.data;
3010
3011                         if ((new_data->fill_len == new_buf->len)
3012                          || (eof_flush_needed != 0)) {
3013                                 STAILQ_INSERT_TAIL(&dev->run_queue,
3014                                                    new_buf, links);
3015                                 dev->num_run_queue++;
3016                         } else if (new_data->fill_len < buf->len) {
3017                                 STAILQ_INSERT_TAIL(&dev->pending_queue,
3018                                                 new_buf, links);
3019                                 dev->num_pending_queue++;
3020                         } else {
3021                                 warnx("%s: too much data in new "
3022                                       "buffer!", __func__);
3023                                 retval = 1;
3024                                 goto bailout;
3025                         }
3026                 }
3027         }
3028
3029 bailout:
3030         return (retval);
3031 }
3032
3033 void
3034 camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
3035                 uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
3036 {
3037         *our_depth = dev->cur_active_io + dev->num_run_queue;
3038         if (dev->num_peer_work_queue >
3039             dev->num_peer_done_queue)
3040                 *peer_depth = dev->num_peer_work_queue -
3041                               dev->num_peer_done_queue;
3042         else
3043                 *peer_depth = 0;
3044         *our_bytes = *our_depth * dev->blocksize;
3045         *peer_bytes = dev->peer_bytes_queued;
3046 }
3047
3048 void
3049 camdd_sig_handler(int sig)
3050 {
3051         if (sig == SIGINFO)
3052                 need_status = 1;
3053         else {
3054                 need_exit = 1;
3055                 error_exit = 1;
3056         }
3057
3058         sem_post(&camdd_sem);
3059 }
3060
3061 void
3062 camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev, 
3063                    struct timespec *start_time)
3064 {
3065         struct timespec done_time;
3066         uint64_t total_ns;
3067         long double mb_sec, total_sec;
3068         int error = 0;
3069
3070         error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
3071         if (error != 0) {
3072                 warn("Unable to get done time");
3073                 return;
3074         }
3075
3076         timespecsub(&done_time, start_time);
3077         
3078         total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
3079         total_sec = total_ns;
3080         total_sec /= 1000000000;
3081
3082         fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
3083                 "%.4Lf seconds elapsed\n",
3084                 (uintmax_t)camdd_dev->bytes_transferred,
3085                 (camdd_dev->write_dev == 0) ?  "read from" : "written to",
3086                 camdd_dev->device_name,
3087                 (uintmax_t)other_dev->bytes_transferred,
3088                 (other_dev->write_dev == 0) ? "read from" : "written to",
3089                 other_dev->device_name, total_sec);
3090
3091         mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
3092         mb_sec /= 1024 * 1024;
3093         mb_sec *= 1000000000;
3094         mb_sec /= total_ns;
3095         fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
3096 }
3097
3098 int
3099 camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io,
3100          int retry_count, int timeout)
3101 {
3102         struct cam_device *new_cam_dev = NULL;
3103         struct camdd_dev *devs[2];
3104         struct timespec start_time;
3105         pthread_t threads[2];
3106         int unit = 0;
3107         int error = 0;
3108         int i;
3109
3110         if (num_io_opts != 2) {
3111                 warnx("Must have one input and one output path");
3112                 error = 1;
3113                 goto bailout;
3114         }
3115
3116         bzero(devs, sizeof(devs));
3117
3118         for (i = 0; i < num_io_opts; i++) {
3119                 switch (io_opts[i].dev_type) {
3120                 case CAMDD_DEV_PASS: {
3121                         if (isdigit(io_opts[i].dev_name[0])) {
3122                                 camdd_argmask new_arglist = CAMDD_ARG_NONE;
3123                                 int bus = 0, target = 0, lun = 0;
3124                                 int rv;
3125
3126                                 /* device specified as bus:target[:lun] */
3127                                 rv = parse_btl(io_opts[i].dev_name, &bus,
3128                                     &target, &lun, &new_arglist);
3129                                 if (rv < 2) {
3130                                         warnx("numeric device specification "
3131                                              "must be either bus:target, or "
3132                                              "bus:target:lun");
3133                                         error = 1;
3134                                         goto bailout;
3135                                 }
3136                                 /* default to 0 if lun was not specified */
3137                                 if ((new_arglist & CAMDD_ARG_LUN) == 0) {
3138                                         lun = 0;
3139                                         new_arglist |= CAMDD_ARG_LUN;
3140                                 }
3141                                 new_cam_dev = cam_open_btl(bus, target, lun,
3142                                     O_RDWR, NULL);
3143                         } else {
3144                                 char name[30];
3145
3146                                 if (cam_get_device(io_opts[i].dev_name, name,
3147                                                    sizeof name, &unit) == -1) {
3148                                         warnx("%s", cam_errbuf);
3149                                         error = 1;
3150                                         goto bailout;
3151                                 }
3152                                 new_cam_dev = cam_open_spec_device(name, unit,
3153                                     O_RDWR, NULL);
3154                         }
3155
3156                         if (new_cam_dev == NULL) {
3157                                 warnx("%s", cam_errbuf);
3158                                 error = 1;
3159                                 goto bailout;
3160                         }
3161
3162                         devs[i] = camdd_probe_pass(new_cam_dev,
3163                             /*io_opts*/ &io_opts[i],
3164                             CAMDD_ARG_ERR_RECOVER, 
3165                             /*probe_retry_count*/ 3,
3166                             /*probe_timeout*/ 5000,
3167                             /*io_retry_count*/ retry_count,
3168                             /*io_timeout*/ timeout);
3169                         if (devs[i] == NULL) {
3170                                 warn("Unable to probe device %s%u",
3171                                      new_cam_dev->device_name,
3172                                      new_cam_dev->dev_unit_num);
3173                                 error = 1;
3174                                 goto bailout;
3175                         }
3176                         break;
3177                 }
3178                 case CAMDD_DEV_FILE: {
3179                         int fd = -1;
3180
3181                         if (io_opts[i].dev_name[0] == '-') {
3182                                 if (io_opts[i].write_dev != 0)
3183                                         fd = STDOUT_FILENO;
3184                                 else
3185                                         fd = STDIN_FILENO;
3186                         } else {
3187                                 if (io_opts[i].write_dev != 0) {
3188                                         fd = open(io_opts[i].dev_name,
3189                                             O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
3190                                 } else {
3191                                         fd = open(io_opts[i].dev_name,
3192                                             O_RDONLY);
3193                                 }
3194                         }
3195                         if (fd == -1) {
3196                                 warn("error opening file %s",
3197                                     io_opts[i].dev_name);
3198                                 error = 1;
3199                                 goto bailout;
3200                         }
3201
3202                         devs[i] = camdd_probe_file(fd, &io_opts[i],
3203                             retry_count, timeout);
3204                         if (devs[i] == NULL) {
3205                                 error = 1;
3206                                 goto bailout;
3207                         }
3208
3209                         break;
3210                 }
3211                 default:
3212                         warnx("Unknown device type %d (%s)",
3213                             io_opts[i].dev_type, io_opts[i].dev_name);
3214                         error = 1;
3215                         goto bailout;
3216                         break; /*NOTREACHED */
3217                 }
3218
3219                 devs[i]->write_dev = io_opts[i].write_dev;
3220
3221                 devs[i]->start_offset_bytes = io_opts[i].offset;
3222
3223                 if (max_io != 0) {
3224                         devs[i]->sector_io_limit =
3225                             (devs[i]->start_offset_bytes /
3226                             devs[i]->sector_size) +
3227                             (max_io / devs[i]->sector_size) - 1;
3228                 }
3229
3230                 devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
3231                 devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
3232         }
3233
3234         devs[0]->peer_dev = devs[1];
3235         devs[1]->peer_dev = devs[0];
3236         devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
3237         devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
3238
3239         sem_init(&camdd_sem, /*pshared*/ 0, 0);
3240
3241         signal(SIGINFO, camdd_sig_handler);
3242         signal(SIGINT, camdd_sig_handler);
3243
3244         error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
3245         if (error != 0) {
3246                 warn("Unable to get start time");
3247                 goto bailout;
3248         }
3249
3250         for (i = 0; i < num_io_opts; i++) {
3251                 error = pthread_create(&threads[i], NULL, camdd_worker,
3252                                        (void *)devs[i]);
3253                 if (error != 0) {
3254                         warnc(error, "pthread_create() failed");
3255                         goto bailout;
3256                 }
3257         }
3258
3259         for (;;) {
3260                 if ((sem_wait(&camdd_sem) == -1)
3261                  || (need_exit != 0)) {
3262                         struct kevent ke;
3263
3264                         for (i = 0; i < num_io_opts; i++) {
3265                                 EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
3266                                     EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
3267
3268                                 devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
3269
3270                                 error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
3271                                                 NULL);
3272                                 if (error == -1)
3273                                         warn("%s: unable to wake up thread",
3274                                             __func__);
3275                                 error = 0;
3276                         }
3277                         break;
3278                 } else if (need_status != 0) {
3279                         camdd_print_status(devs[0], devs[1], &start_time);
3280                         need_status = 0;
3281                 }
3282         } 
3283         for (i = 0; i < num_io_opts; i++) {
3284                 pthread_join(threads[i], NULL);
3285         }
3286
3287         camdd_print_status(devs[0], devs[1], &start_time);
3288
3289 bailout:
3290
3291         for (i = 0; i < num_io_opts; i++)
3292                 camdd_free_dev(devs[i]);
3293
3294         return (error + error_exit);
3295 }
3296
3297 void
3298 usage(void)
3299 {
3300         fprintf(stderr,
3301 "usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
3302 "              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
3303 "              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
3304 "              <-i|-o file=/dev/nsa0,bs=512K>\n"
3305 "              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
3306 "Option description\n"
3307 "-i <arg=val>  Specify input device/file and parameters\n"
3308 "-o <arg=val>  Specify output device/file and parameters\n"
3309 "Input and Output parameters\n"
3310 "pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
3311 "file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
3312 "              or - for stdin/stdout\n"
3313 "bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
3314 "offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
3315 "              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
3316 "depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
3317 "mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
3318 "Optional arguments\n"
3319 "-C retry_cnt  Specify a retry count for pass(4) devices\n"
3320 "-E            Enable CAM error recovery for pass(4) devices\n"
3321 "-m max_io     Specify the maximum amount to be transferred in bytes or\n"
3322 "              using K, G, M, etc. suffixes\n"
3323 "-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
3324 "-v            Enable verbose error recovery\n"
3325 "-h            Print this message\n");
3326 }
3327
3328
3329 int
3330 camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
3331 {
3332         char *tmpstr, *tmpstr2;
3333         char *orig_tmpstr = NULL;
3334         int retval = 0;
3335
3336         io_opts->write_dev = is_write;
3337
3338         tmpstr = strdup(args);
3339         if (tmpstr == NULL) {
3340                 warn("strdup failed");
3341                 retval = 1;
3342                 goto bailout;
3343         }
3344         orig_tmpstr = tmpstr;
3345         while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
3346                 char *name, *value;
3347
3348                 /*
3349                  * If the user creates an empty parameter by putting in two
3350                  * commas, skip over it and look for the next field.
3351                  */
3352                 if (*tmpstr2 == '\0')
3353                         continue;
3354
3355                 name = strsep(&tmpstr2, "=");
3356                 if (*name == '\0') {
3357                         warnx("Got empty I/O parameter name");
3358                         retval = 1;
3359                         goto bailout;
3360                 }
3361                 value = strsep(&tmpstr2, "=");
3362                 if ((value == NULL)
3363                  || (*value == '\0')) {
3364                         warnx("Empty I/O parameter value for %s", name);
3365                         retval = 1;
3366                         goto bailout;
3367                 }
3368                 if (strncasecmp(name, "file", 4) == 0) {
3369                         io_opts->dev_type = CAMDD_DEV_FILE;
3370                         io_opts->dev_name = strdup(value);
3371                         if (io_opts->dev_name == NULL) {
3372                                 warn("Error allocating memory");
3373                                 retval = 1;
3374                                 goto bailout;
3375                         }
3376                 } else if (strncasecmp(name, "pass", 4) == 0) {
3377                         io_opts->dev_type = CAMDD_DEV_PASS;
3378                         io_opts->dev_name = strdup(value);
3379                         if (io_opts->dev_name == NULL) {
3380                                 warn("Error allocating memory");
3381                                 retval = 1;
3382                                 goto bailout;
3383                         }
3384                 } else if ((strncasecmp(name, "bs", 2) == 0)
3385                         || (strncasecmp(name, "blocksize", 9) == 0)) {
3386                         retval = expand_number(value, &io_opts->blocksize);
3387                         if (retval == -1) {
3388                                 warn("expand_number(3) failed on %s=%s", name,
3389                                     value);
3390                                 retval = 1;
3391                                 goto bailout;
3392                         }
3393                 } else if (strncasecmp(name, "depth", 5) == 0) {
3394                         char *endptr;
3395
3396                         io_opts->queue_depth = strtoull(value, &endptr, 0);
3397                         if (*endptr != '\0') {
3398                                 warnx("invalid queue depth %s", value);
3399                                 retval = 1;
3400                                 goto bailout;
3401                         }
3402                 } else if (strncasecmp(name, "mcs", 3) == 0) {
3403                         char *endptr;
3404
3405                         io_opts->min_cmd_size = strtol(value, &endptr, 0);
3406                         if ((*endptr != '\0')
3407                          || ((io_opts->min_cmd_size > 16)
3408                           || (io_opts->min_cmd_size < 0))) {
3409                                 warnx("invalid minimum cmd size %s", value);
3410                                 retval = 1;
3411                                 goto bailout;
3412                         }
3413                 } else if (strncasecmp(name, "offset", 6) == 0) {
3414                         retval = expand_number(value, &io_opts->offset);
3415                         if (retval == -1) {
3416                                 warn("expand_number(3) failed on %s=%s", name,
3417                                     value);
3418                                 retval = 1;
3419                                 goto bailout;
3420                         }
3421                 } else if (strncasecmp(name, "debug", 5) == 0) {
3422                         char *endptr;
3423
3424                         io_opts->debug = strtoull(value, &endptr, 0);
3425                         if (*endptr != '\0') {
3426                                 warnx("invalid debug level %s", value);
3427                                 retval = 1;
3428                                 goto bailout;
3429                         }
3430                 } else {
3431                         warnx("Unrecognized parameter %s=%s", name, value);
3432                 }
3433         }
3434 bailout:
3435         free(orig_tmpstr);
3436
3437         return (retval);
3438 }
3439
3440 int
3441 main(int argc, char **argv)
3442 {
3443         int c;
3444         camdd_argmask arglist = CAMDD_ARG_NONE;
3445         int timeout = 0, retry_count = 1;
3446         int error = 0;
3447         uint64_t max_io = 0;
3448         struct camdd_io_opts *opt_list = NULL;
3449
3450         if (argc == 1) {
3451                 usage();
3452                 exit(1);
3453         }
3454
3455         opt_list = calloc(2, sizeof(struct camdd_io_opts));
3456         if (opt_list == NULL) {
3457                 warn("Unable to allocate option list");
3458                 error = 1;
3459                 goto bailout;
3460         }
3461
3462         while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
3463                 switch (c) {
3464                 case 'C':
3465                         retry_count = strtol(optarg, NULL, 0);
3466                         if (retry_count < 0)
3467                                 errx(1, "retry count %d is < 0",
3468                                      retry_count);
3469                         arglist |= CAMDD_ARG_RETRIES;
3470                         break;
3471                 case 'E':
3472                         arglist |= CAMDD_ARG_ERR_RECOVER;
3473                         break;
3474                 case 'i':
3475                 case 'o':
3476                         if (((c == 'i')
3477                           && (opt_list[0].dev_type != CAMDD_DEV_NONE))
3478                          || ((c == 'o')
3479                           && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
3480                                 errx(1, "Only one input and output path "
3481                                     "allowed");
3482                         }
3483                         error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
3484                             (c == 'o') ? &opt_list[1] : &opt_list[0]);
3485                         if (error != 0)
3486                                 goto bailout;
3487                         break;
3488                 case 'm':
3489                         error = expand_number(optarg, &max_io);
3490                         if (error == -1) {
3491                                 warn("invalid maximum I/O amount %s", optarg);
3492                                 error = 1;
3493                                 goto bailout;
3494                         }
3495                         break;
3496                 case 't':
3497                         timeout = strtol(optarg, NULL, 0);
3498                         if (timeout < 0)
3499                                 errx(1, "invalid timeout %d", timeout);
3500                         /* Convert the timeout from seconds to ms */
3501                         timeout *= 1000;
3502                         arglist |= CAMDD_ARG_TIMEOUT;
3503                         break;
3504                 case 'v':
3505                         arglist |= CAMDD_ARG_VERBOSE;
3506                         break;
3507                 case 'h':
3508                 default:
3509                         usage();
3510                         exit(1);
3511                         break; /*NOTREACHED*/
3512                 }
3513         }
3514
3515         if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
3516          || (opt_list[1].dev_type == CAMDD_DEV_NONE))
3517                 errx(1, "Must specify both -i and -o");
3518
3519         /*
3520          * Set the timeout if the user hasn't specified one.
3521          */
3522         if (timeout == 0)
3523                 timeout = CAMDD_PASS_RW_TIMEOUT;
3524
3525         error = camdd_rw(opt_list, 2, max_io, retry_count, timeout);
3526
3527 bailout:
3528         free(opt_list);
3529
3530         exit(error);
3531 }