]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/cam/nvme/nvme_da.c
Fix cut and pasted comments to reflect differences in code from the
[FreeBSD/FreeBSD.git] / sys / cam / nvme / nvme_da.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2015 Netflix, Inc
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer,
12  *    without modification, immediately at the beginning of the file.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * Derived from ata_da.c:
29  * Copyright (c) 2009 Alexander Motin <mav@FreeBSD.org>
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36
37 #ifdef _KERNEL
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/bio.h>
41 #include <sys/sysctl.h>
42 #include <sys/taskqueue.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/conf.h>
46 #include <sys/devicestat.h>
47 #include <sys/eventhandler.h>
48 #include <sys/malloc.h>
49 #include <sys/cons.h>
50 #include <sys/proc.h>
51 #include <sys/reboot.h>
52 #include <geom/geom_disk.h>
53 #endif /* _KERNEL */
54
55 #ifndef _KERNEL
56 #include <stdio.h>
57 #include <string.h>
58 #endif /* _KERNEL */
59
60 #include <cam/cam.h>
61 #include <cam/cam_ccb.h>
62 #include <cam/cam_periph.h>
63 #include <cam/cam_xpt_periph.h>
64 #include <cam/cam_sim.h>
65 #include <cam/cam_iosched.h>
66
67 #include <cam/nvme/nvme_all.h>
68
69 typedef enum {
70         NDA_STATE_NORMAL
71 } nda_state;
72
73 typedef enum {
74         NDA_FLAG_OPEN           = 0x0001,
75         NDA_FLAG_DIRTY          = 0x0002,
76         NDA_FLAG_SCTX_INIT      = 0x0004,
77 } nda_flags;
78
79 typedef enum {
80         NDA_Q_4K   = 0x01,
81         NDA_Q_NONE = 0x00,
82 } nda_quirks;
83         
84 #define NDA_Q_BIT_STRING        \
85         "\020"                  \
86         "\001Bit 0"
87
88 typedef enum {
89         NDA_CCB_BUFFER_IO       = 0x01,
90         NDA_CCB_DUMP            = 0x02,
91         NDA_CCB_TRIM            = 0x03,
92         NDA_CCB_TYPE_MASK       = 0x0F,
93 } nda_ccb_state;
94
95 /* Offsets into our private area for storing information */
96 #define ccb_state       ppriv_field0
97 #define ccb_bp          ppriv_ptr1
98
99 struct trim_request {
100         TAILQ_HEAD(, bio) bps;
101 };
102 struct nda_softc {
103         struct   cam_iosched_softc *cam_iosched;
104         int      outstanding_cmds;      /* Number of active commands */
105         int      refcount;              /* Active xpt_action() calls */
106         nda_state state;
107         nda_flags flags;
108         nda_quirks quirks;
109         int      unmappedio;
110         uint32_t  nsid;                 /* Namespace ID for this nda device */
111         struct disk *disk;
112         struct task             sysctl_task;
113         struct sysctl_ctx_list  sysctl_ctx;
114         struct sysctl_oid       *sysctl_tree;
115         struct trim_request     trim_req;
116 #ifdef CAM_IO_STATS
117         struct sysctl_ctx_list  sysctl_stats_ctx;
118         struct sysctl_oid       *sysctl_stats_tree;
119         u_int   timeouts;
120         u_int   errors;
121         u_int   invalidations;
122 #endif
123 };
124
125 /* Need quirk table */
126
127 static  disk_strategy_t ndastrategy;
128 static  dumper_t        ndadump;
129 static  periph_init_t   ndainit;
130 static  void            ndaasync(void *callback_arg, u_int32_t code,
131                                 struct cam_path *path, void *arg);
132 static  void            ndasysctlinit(void *context, int pending);
133 static  periph_ctor_t   ndaregister;
134 static  periph_dtor_t   ndacleanup;
135 static  periph_start_t  ndastart;
136 static  periph_oninv_t  ndaoninvalidate;
137 static  void            ndadone(struct cam_periph *periph,
138                                union ccb *done_ccb);
139 static  int             ndaerror(union ccb *ccb, u_int32_t cam_flags,
140                                 u_int32_t sense_flags);
141 static void             ndashutdown(void *arg, int howto);
142 static void             ndasuspend(void *arg);
143
144 #ifndef NDA_DEFAULT_SEND_ORDERED
145 #define NDA_DEFAULT_SEND_ORDERED        1
146 #endif
147 #ifndef NDA_DEFAULT_TIMEOUT
148 #define NDA_DEFAULT_TIMEOUT 30  /* Timeout in seconds */
149 #endif
150 #ifndef NDA_DEFAULT_RETRY
151 #define NDA_DEFAULT_RETRY       4
152 #endif
153
154
155 //static int nda_retry_count = NDA_DEFAULT_RETRY;
156 static int nda_send_ordered = NDA_DEFAULT_SEND_ORDERED;
157 static int nda_default_timeout = NDA_DEFAULT_TIMEOUT;
158
159 /*
160  * All NVMe media is non-rotational, so all nvme device instances
161  * share this to implement the sysctl.
162  */
163 static int nda_rotating_media = 0;
164
165 static SYSCTL_NODE(_kern_cam, OID_AUTO, nda, CTLFLAG_RD, 0,
166             "CAM Direct Access Disk driver");
167
168 static struct periph_driver ndadriver =
169 {
170         ndainit, "nda",
171         TAILQ_HEAD_INITIALIZER(ndadriver.units), /* generation */ 0
172 };
173
174 PERIPHDRIVER_DECLARE(nda, ndadriver);
175
176 static MALLOC_DEFINE(M_NVMEDA, "nvme_da", "nvme_da buffers");
177
178 /*
179  * nice wrappers. Maybe these belong in nvme_all.c instead of
180  * here, but this is the only place that uses these. Should
181  * we ever grow another NVME periph, we should move them
182  * all there wholesale.
183  */
184
185 static void
186 nda_nvme_flush(struct nda_softc *softc, struct ccb_nvmeio *nvmeio)
187 {
188         cam_fill_nvmeio(nvmeio,
189             0,                  /* retries */
190             ndadone,            /* cbfcnp */
191             CAM_DIR_NONE,       /* flags */
192             NULL,               /* data_ptr */
193             0,                  /* dxfer_len */
194             nda_default_timeout * 1000); /* timeout 30s */
195         nvme_ns_flush_cmd(&nvmeio->cmd, softc->nsid);
196 }
197
198 static void
199 nda_nvme_trim(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
200     void *payload, uint32_t num_ranges)
201 {
202         cam_fill_nvmeio(nvmeio,
203             0,                  /* retries */
204             ndadone,            /* cbfcnp */
205             CAM_DIR_OUT,        /* flags */
206             payload,            /* data_ptr */
207             num_ranges * sizeof(struct nvme_dsm_range), /* dxfer_len */
208             nda_default_timeout * 1000); /* timeout 30s */
209         nvme_ns_trim_cmd(&nvmeio->cmd, softc->nsid, num_ranges);
210 }
211
212 static void
213 nda_nvme_write(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
214     void *payload, uint64_t lba, uint32_t len, uint32_t count)
215 {
216         cam_fill_nvmeio(nvmeio,
217             0,                  /* retries */
218             ndadone,            /* cbfcnp */
219             CAM_DIR_OUT,        /* flags */
220             payload,            /* data_ptr */
221             len,                /* dxfer_len */
222             nda_default_timeout * 1000); /* timeout 30s */
223         nvme_ns_write_cmd(&nvmeio->cmd, softc->nsid, lba, count);
224 }
225
226 static void
227 nda_nvme_rw_bio(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
228     struct bio *bp, uint32_t rwcmd)
229 {
230         int flags = rwcmd == NVME_OPC_READ ? CAM_DIR_IN : CAM_DIR_OUT;
231         void *payload;
232         uint64_t lba;
233         uint32_t count;
234
235         if (bp->bio_flags & BIO_UNMAPPED) {
236                 flags |= CAM_DATA_BIO;
237                 payload = bp;
238         } else {
239                 payload = bp->bio_data;
240         }
241
242         lba = bp->bio_pblkno;
243         count = bp->bio_bcount / softc->disk->d_sectorsize;
244
245         cam_fill_nvmeio(nvmeio,
246             0,                  /* retries */
247             ndadone,            /* cbfcnp */
248             flags,              /* flags */
249             payload,            /* data_ptr */
250             bp->bio_bcount,     /* dxfer_len */
251             nda_default_timeout * 1000); /* timeout 30s */
252         nvme_ns_rw_cmd(&nvmeio->cmd, rwcmd, softc->nsid, lba, count);
253 }
254
255 static int
256 ndaopen(struct disk *dp)
257 {
258         struct cam_periph *periph;
259         struct nda_softc *softc;
260         int error;
261
262         periph = (struct cam_periph *)dp->d_drv1;
263         if (cam_periph_acquire(periph) != 0) {
264                 return(ENXIO);
265         }
266
267         cam_periph_lock(periph);
268         if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) {
269                 cam_periph_unlock(periph);
270                 cam_periph_release(periph);
271                 return (error);
272         }
273
274         CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH,
275             ("ndaopen\n"));
276
277         softc = (struct nda_softc *)periph->softc;
278         softc->flags |= NDA_FLAG_OPEN;
279
280         cam_periph_unhold(periph);
281         cam_periph_unlock(periph);
282         return (0);
283 }
284
285 static int
286 ndaclose(struct disk *dp)
287 {
288         struct  cam_periph *periph;
289         struct  nda_softc *softc;
290         union ccb *ccb;
291         int error;
292
293         periph = (struct cam_periph *)dp->d_drv1;
294         softc = (struct nda_softc *)periph->softc;
295         cam_periph_lock(periph);
296
297         CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH,
298             ("ndaclose\n"));
299
300         if ((softc->flags & NDA_FLAG_DIRTY) != 0 &&
301             (periph->flags & CAM_PERIPH_INVALID) == 0 &&
302             cam_periph_hold(periph, PRIBIO) == 0) {
303
304                 ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL);
305                 nda_nvme_flush(softc, &ccb->nvmeio);
306                 error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0,
307                     /*sense_flags*/0, softc->disk->d_devstat);
308
309                 if (error != 0)
310                         xpt_print(periph->path, "Synchronize cache failed\n");
311                 else
312                         softc->flags &= ~NDA_FLAG_DIRTY;
313                 xpt_release_ccb(ccb);
314                 cam_periph_unhold(periph);
315         }
316
317         softc->flags &= ~NDA_FLAG_OPEN;
318
319         while (softc->refcount != 0)
320                 cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1);
321         cam_periph_unlock(periph);
322         cam_periph_release(periph);
323         return (0);     
324 }
325
326 static void
327 ndaschedule(struct cam_periph *periph)
328 {
329         struct nda_softc *softc = (struct nda_softc *)periph->softc;
330
331         if (softc->state != NDA_STATE_NORMAL)
332                 return;
333
334         cam_iosched_schedule(softc->cam_iosched, periph);
335 }
336
337 /*
338  * Actually translate the requested transfer into one the physical driver
339  * can understand.  The transfer is described by a buf and will include
340  * only one physical transfer.
341  */
342 static void
343 ndastrategy(struct bio *bp)
344 {
345         struct cam_periph *periph;
346         struct nda_softc *softc;
347         
348         periph = (struct cam_periph *)bp->bio_disk->d_drv1;
349         softc = (struct nda_softc *)periph->softc;
350
351         cam_periph_lock(periph);
352
353         CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastrategy(%p)\n", bp));
354
355         /*
356          * If the device has been made invalid, error out
357          */
358         if ((periph->flags & CAM_PERIPH_INVALID) != 0) {
359                 cam_periph_unlock(periph);
360                 biofinish(bp, NULL, ENXIO);
361                 return;
362         }
363         
364         /*
365          * Place it in the queue of disk activities for this disk
366          */
367         cam_iosched_queue_work(softc->cam_iosched, bp);
368
369         /*
370          * Schedule ourselves for performing the work.
371          */
372         ndaschedule(periph);
373         cam_periph_unlock(periph);
374
375         return;
376 }
377
378 static int
379 ndadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length)
380 {
381         struct      cam_periph *periph;
382         struct      nda_softc *softc;
383         u_int       secsize;
384         struct ccb_nvmeio nvmeio;
385         struct      disk *dp;
386         uint64_t    lba;
387         uint32_t    count;
388         int         error = 0;
389
390         dp = arg;
391         periph = dp->d_drv1;
392         softc = (struct nda_softc *)periph->softc;
393         secsize = softc->disk->d_sectorsize;
394         lba = offset / secsize;
395         count = length / secsize;
396         
397         if ((periph->flags & CAM_PERIPH_INVALID) != 0)
398                 return (ENXIO);
399
400         /* xpt_get_ccb returns a zero'd allocation for the ccb, mimic that here */
401         memset(&nvmeio, 0, sizeof(nvmeio));
402         if (length > 0) {
403                 xpt_setup_ccb(&nvmeio.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
404                 nvmeio.ccb_h.ccb_state = NDA_CCB_DUMP;
405                 nda_nvme_write(softc, &nvmeio, virtual, lba, length, count);
406                 error = cam_periph_runccb((union ccb *)&nvmeio, cam_periph_error,
407                     0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
408                 if (error != 0)
409                         printf("Aborting dump due to I/O error %d.\n", error);
410
411                 return (error);
412         }
413         
414         /* Flush */
415         xpt_setup_ccb(&nvmeio.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
416
417         nvmeio.ccb_h.ccb_state = NDA_CCB_DUMP;
418         nda_nvme_flush(softc, &nvmeio);
419         error = cam_periph_runccb((union ccb *)&nvmeio, cam_periph_error,
420             0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
421         if (error != 0)
422                 xpt_print(periph->path, "flush cmd failed\n");
423         return (error);
424 }
425
426 static void
427 ndainit(void)
428 {
429         cam_status status;
430
431         /*
432          * Install a global async callback.  This callback will
433          * receive async callbacks like "new device found".
434          */
435         status = xpt_register_async(AC_FOUND_DEVICE, ndaasync, NULL, NULL);
436
437         if (status != CAM_REQ_CMP) {
438                 printf("nda: Failed to attach master async callback "
439                        "due to status 0x%x!\n", status);
440         } else if (nda_send_ordered) {
441
442                 /* Register our event handlers */
443                 if ((EVENTHANDLER_REGISTER(power_suspend, ndasuspend,
444                                            NULL, EVENTHANDLER_PRI_LAST)) == NULL)
445                     printf("ndainit: power event registration failed!\n");
446                 if ((EVENTHANDLER_REGISTER(shutdown_post_sync, ndashutdown,
447                                            NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
448                     printf("ndainit: shutdown event registration failed!\n");
449         }
450 }
451
452 /*
453  * Callback from GEOM, called when it has finished cleaning up its
454  * resources.
455  */
456 static void
457 ndadiskgonecb(struct disk *dp)
458 {
459         struct cam_periph *periph;
460
461         periph = (struct cam_periph *)dp->d_drv1;
462
463         cam_periph_release(periph);
464 }
465
466 static void
467 ndaoninvalidate(struct cam_periph *periph)
468 {
469         struct nda_softc *softc;
470
471         softc = (struct nda_softc *)periph->softc;
472
473         /*
474          * De-register any async callbacks.
475          */
476         xpt_register_async(0, ndaasync, periph, periph->path);
477 #ifdef CAM_IO_STATS
478         softc->invalidations++;
479 #endif
480
481         /*
482          * Return all queued I/O with ENXIO.
483          * XXX Handle any transactions queued to the card
484          *     with XPT_ABORT_CCB.
485          */
486         cam_iosched_flush(softc->cam_iosched, NULL, ENXIO);
487
488         disk_gone(softc->disk);
489 }
490
491 static void
492 ndacleanup(struct cam_periph *periph)
493 {
494         struct nda_softc *softc;
495
496         softc = (struct nda_softc *)periph->softc;
497
498         cam_periph_unlock(periph);
499
500         cam_iosched_fini(softc->cam_iosched);
501
502         /*
503          * If we can't free the sysctl tree, oh well...
504          */
505         if ((softc->flags & NDA_FLAG_SCTX_INIT) != 0) {
506 #ifdef CAM_IO_STATS
507                 if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0)
508                         xpt_print(periph->path,
509                             "can't remove sysctl stats context\n");
510 #endif
511                 if (sysctl_ctx_free(&softc->sysctl_ctx) != 0)
512                         xpt_print(periph->path,
513                             "can't remove sysctl context\n");
514         }
515
516         disk_destroy(softc->disk);
517         free(softc, M_DEVBUF);
518         cam_periph_lock(periph);
519 }
520
521 static void
522 ndaasync(void *callback_arg, u_int32_t code,
523         struct cam_path *path, void *arg)
524 {
525         struct cam_periph *periph;
526
527         periph = (struct cam_periph *)callback_arg;
528         switch (code) {
529         case AC_FOUND_DEVICE:
530         {
531                 struct ccb_getdev *cgd;
532                 cam_status status;
533  
534                 cgd = (struct ccb_getdev *)arg;
535                 if (cgd == NULL)
536                         break;
537
538                 if (cgd->protocol != PROTO_NVME)
539                         break;
540
541                 /*
542                  * Allocate a peripheral instance for
543                  * this device and start the probe
544                  * process.
545                  */
546                 status = cam_periph_alloc(ndaregister, ndaoninvalidate,
547                                           ndacleanup, ndastart,
548                                           "nda", CAM_PERIPH_BIO,
549                                           path, ndaasync,
550                                           AC_FOUND_DEVICE, cgd);
551
552                 if (status != CAM_REQ_CMP
553                  && status != CAM_REQ_INPROG)
554                         printf("ndaasync: Unable to attach to new device "
555                                 "due to status 0x%x\n", status);
556                 break;
557         }
558         case AC_ADVINFO_CHANGED:
559         {
560                 uintptr_t buftype;
561
562                 buftype = (uintptr_t)arg;
563                 if (buftype == CDAI_TYPE_PHYS_PATH) {
564                         struct nda_softc *softc;
565
566                         softc = periph->softc;
567                         disk_attr_changed(softc->disk, "GEOM::physpath",
568                                           M_NOWAIT);
569                 }
570                 break;
571         }
572         case AC_LOST_DEVICE:
573         default:
574                 cam_periph_async(periph, code, path, arg);
575                 break;
576         }
577 }
578
579 static void
580 ndasysctlinit(void *context, int pending)
581 {
582         struct cam_periph *periph;
583         struct nda_softc *softc;
584         char tmpstr[32], tmpstr2[16];
585
586         periph = (struct cam_periph *)context;
587
588         /* periph was held for us when this task was enqueued */
589         if ((periph->flags & CAM_PERIPH_INVALID) != 0) {
590                 cam_periph_release(periph);
591                 return;
592         }
593
594         softc = (struct nda_softc *)periph->softc;
595         snprintf(tmpstr, sizeof(tmpstr), "CAM NDA unit %d", periph->unit_number);
596         snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number);
597
598         sysctl_ctx_init(&softc->sysctl_ctx);
599         softc->flags |= NDA_FLAG_SCTX_INIT;
600         softc->sysctl_tree = SYSCTL_ADD_NODE_WITH_LABEL(&softc->sysctl_ctx,
601                 SYSCTL_STATIC_CHILDREN(_kern_cam_nda), OID_AUTO, tmpstr2,
602                 CTLFLAG_RD, 0, tmpstr, "device_index");
603         if (softc->sysctl_tree == NULL) {
604                 printf("ndasysctlinit: unable to allocate sysctl tree\n");
605                 cam_periph_release(periph);
606                 return;
607         }
608
609         SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
610                 OID_AUTO, "unmapped_io", CTLFLAG_RD | CTLFLAG_MPSAFE,
611                 &softc->unmappedio, 0, "Unmapped I/O leaf");
612
613         SYSCTL_ADD_INT(&softc->sysctl_ctx,
614                        SYSCTL_CHILDREN(softc->sysctl_tree),
615                        OID_AUTO,
616                        "rotating",
617                        CTLFLAG_RD | CTLFLAG_MPSAFE, 
618                        &nda_rotating_media,
619                        0,
620                        "Rotating media");
621
622 #ifdef CAM_IO_STATS
623         softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx,
624                 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats",
625                 CTLFLAG_RD, 0, "Statistics");
626         if (softc->sysctl_stats_tree == NULL) {
627                 printf("ndasysctlinit: unable to allocate sysctl tree for stats\n");
628                 cam_periph_release(periph);
629                 return;
630         }
631         SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
632                 SYSCTL_CHILDREN(softc->sysctl_stats_tree),
633                 OID_AUTO, "timeouts", CTLFLAG_RD | CTLFLAG_MPSAFE,
634                 &softc->timeouts, 0,
635                 "Device timeouts reported by the SIM");
636         SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
637                 SYSCTL_CHILDREN(softc->sysctl_stats_tree),
638                 OID_AUTO, "errors", CTLFLAG_RD | CTLFLAG_MPSAFE,
639                 &softc->errors, 0,
640                 "Transport errors reported by the SIM.");
641         SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
642                 SYSCTL_CHILDREN(softc->sysctl_stats_tree),
643                 OID_AUTO, "pack_invalidations", CTLFLAG_RD | CTLFLAG_MPSAFE,
644                 &softc->invalidations, 0,
645                 "Device pack invalidations.");
646 #endif
647
648         cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx,
649             softc->sysctl_tree);
650
651         cam_periph_release(periph);
652 }
653
654 static int
655 ndagetattr(struct bio *bp)
656 {
657         int ret;
658         struct cam_periph *periph;
659
660         periph = (struct cam_periph *)bp->bio_disk->d_drv1;
661         cam_periph_lock(periph);
662         ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute,
663             periph->path);
664         cam_periph_unlock(periph);
665         if (ret == 0)
666                 bp->bio_completed = bp->bio_length;
667         return ret;
668 }
669
670 static cam_status
671 ndaregister(struct cam_periph *periph, void *arg)
672 {
673         struct nda_softc *softc;
674         struct disk *disk;
675         struct ccb_pathinq cpi;
676         const struct nvme_namespace_data *nsd;
677         const struct nvme_controller_data *cd;
678         char   announce_buf[80];
679         u_int maxio;
680         int quirks;
681
682         nsd = nvme_get_identify_ns(periph);
683         cd = nvme_get_identify_cntrl(periph);
684
685         softc = (struct nda_softc *)malloc(sizeof(*softc), M_DEVBUF,
686             M_NOWAIT | M_ZERO);
687
688         if (softc == NULL) {
689                 printf("ndaregister: Unable to probe new device. "
690                     "Unable to allocate softc\n");
691                 return(CAM_REQ_CMP_ERR);
692         }
693
694         if (cam_iosched_init(&softc->cam_iosched, periph) != 0) {
695                 printf("ndaregister: Unable to probe new device. "
696                        "Unable to allocate iosched memory\n");
697                 return(CAM_REQ_CMP_ERR);
698         }
699
700         /* ident_data parsing */
701
702         periph->softc = softc;
703
704         softc->quirks = NDA_Q_NONE;
705
706         xpt_path_inq(&cpi, periph->path);
707
708         TASK_INIT(&softc->sysctl_task, 0, ndasysctlinit, periph);
709
710         /*
711          * The name space ID is the lun, save it for later I/O
712          */
713         softc->nsid = (uint32_t)xpt_path_lun_id(periph->path);
714
715         /*
716          * Register this media as a disk
717          */
718         (void)cam_periph_hold(periph, PRIBIO);
719         cam_periph_unlock(periph);
720         snprintf(announce_buf, sizeof(announce_buf),
721             "kern.cam.nda.%d.quirks", periph->unit_number);
722         quirks = softc->quirks;
723         TUNABLE_INT_FETCH(announce_buf, &quirks);
724         softc->quirks = quirks;
725         cam_iosched_set_sort_queue(softc->cam_iosched, 0);
726         softc->disk = disk = disk_alloc();
727         strlcpy(softc->disk->d_descr, cd->mn,
728             MIN(sizeof(softc->disk->d_descr), sizeof(cd->mn)));
729         strlcpy(softc->disk->d_ident, cd->sn,
730             MIN(sizeof(softc->disk->d_ident), sizeof(cd->sn)));
731         disk->d_rotation_rate = DISK_RR_NON_ROTATING;
732         disk->d_open = ndaopen;
733         disk->d_close = ndaclose;
734         disk->d_strategy = ndastrategy;
735         disk->d_getattr = ndagetattr;
736         disk->d_dump = ndadump;
737         disk->d_gone = ndadiskgonecb;
738         disk->d_name = "nda";
739         disk->d_drv1 = periph;
740         disk->d_unit = periph->unit_number;
741         maxio = cpi.maxio;              /* Honor max I/O size of SIM */
742         if (maxio == 0)
743                 maxio = DFLTPHYS;       /* traditional default */
744         else if (maxio > MAXPHYS)
745                 maxio = MAXPHYS;        /* for safety */
746         disk->d_maxsize = maxio;
747         disk->d_sectorsize = 1 << nsd->lbaf[nsd->flbas.format].lbads;
748         disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze);
749         disk->d_delmaxsize = disk->d_mediasize;
750         disk->d_flags = DISKFLAG_DIRECT_COMPLETION;
751 //      if (cd->oncs.dsm) // XXX broken?
752                 disk->d_flags |= DISKFLAG_CANDELETE;
753         if (cd->vwc.present)
754                 disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
755         if ((cpi.hba_misc & PIM_UNMAPPED) != 0) {
756                 disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
757                 softc->unmappedio = 1;
758         }
759         /*
760          * d_ident and d_descr are both far bigger than the length of either
761          *  the serial or model number strings.
762          */
763         nvme_strvis(disk->d_descr, cd->mn,
764             sizeof(disk->d_descr), NVME_MODEL_NUMBER_LENGTH);
765         nvme_strvis(disk->d_ident, cd->sn,
766             sizeof(disk->d_ident), NVME_SERIAL_NUMBER_LENGTH);
767         disk->d_hba_vendor = cpi.hba_vendor;
768         disk->d_hba_device = cpi.hba_device;
769         disk->d_hba_subvendor = cpi.hba_subvendor;
770         disk->d_hba_subdevice = cpi.hba_subdevice;
771         disk->d_stripesize = disk->d_sectorsize;
772         disk->d_stripeoffset = 0;
773         disk->d_devstat = devstat_new_entry(periph->periph_name,
774             periph->unit_number, disk->d_sectorsize,
775             DEVSTAT_ALL_SUPPORTED,
776             DEVSTAT_TYPE_DIRECT | XPORT_DEVSTAT_TYPE(cpi.transport),
777             DEVSTAT_PRIORITY_DISK);
778         /*
779          * Add alias for older nvd drives to ease transition.
780          */
781         /* disk_add_alias(disk, "nvd"); Have reports of this causing problems */
782
783         /*
784          * Acquire a reference to the periph before we register with GEOM.
785          * We'll release this reference once GEOM calls us back (via
786          * ndadiskgonecb()) telling us that our provider has been freed.
787          */
788         if (cam_periph_acquire(periph) != 0) {
789                 xpt_print(periph->path, "%s: lost periph during "
790                           "registration!\n", __func__);
791                 cam_periph_lock(periph);
792                 return (CAM_REQ_CMP_ERR);
793         }
794         disk_create(softc->disk, DISK_VERSION);
795         cam_periph_lock(periph);
796         cam_periph_unhold(periph);
797
798         snprintf(announce_buf, sizeof(announce_buf),
799                 "%juMB (%ju %u byte sectors)",
800             (uintmax_t)((uintmax_t)disk->d_mediasize / (1024*1024)),
801                 (uintmax_t)disk->d_mediasize / disk->d_sectorsize,
802                 disk->d_sectorsize);
803         xpt_announce_periph(periph, announce_buf);
804         xpt_announce_quirks(periph, softc->quirks, NDA_Q_BIT_STRING);
805
806         /*
807          * Create our sysctl variables, now that we know
808          * we have successfully attached.
809          */
810         if (cam_periph_acquire(periph) == 0)
811                 taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task);
812
813         /*
814          * Register for device going away and info about the drive
815          * changing (though with NVMe, it can't)
816          */
817         xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED,
818             ndaasync, periph, periph->path);
819
820         softc->state = NDA_STATE_NORMAL;
821         return(CAM_REQ_CMP);
822 }
823
824 static void
825 ndastart(struct cam_periph *periph, union ccb *start_ccb)
826 {
827         struct nda_softc *softc = (struct nda_softc *)periph->softc;
828         struct ccb_nvmeio *nvmeio = &start_ccb->nvmeio;
829
830         CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart\n"));
831
832         switch (softc->state) {
833         case NDA_STATE_NORMAL:
834         {
835                 struct bio *bp;
836
837                 bp = cam_iosched_next_bio(softc->cam_iosched);
838                 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart: bio %p\n", bp));
839                 if (bp == NULL) {
840                         xpt_release_ccb(start_ccb);
841                         break;
842                 }
843
844                 switch (bp->bio_cmd) {
845                 case BIO_WRITE:
846                         softc->flags |= NDA_FLAG_DIRTY;
847                         /* FALLTHROUGH */
848                 case BIO_READ:
849                 {
850 #ifdef NDA_TEST_FAILURE
851                         int fail = 0;
852
853                         /*
854                          * Support the failure ioctls.  If the command is a
855                          * read, and there are pending forced read errors, or
856                          * if a write and pending write errors, then fail this
857                          * operation with EIO.  This is useful for testing
858                          * purposes.  Also, support having every Nth read fail.
859                          *
860                          * This is a rather blunt tool.
861                          */
862                         if (bp->bio_cmd == BIO_READ) {
863                                 if (softc->force_read_error) {
864                                         softc->force_read_error--;
865                                         fail = 1;
866                                 }
867                                 if (softc->periodic_read_error > 0) {
868                                         if (++softc->periodic_read_count >=
869                                             softc->periodic_read_error) {
870                                                 softc->periodic_read_count = 0;
871                                                 fail = 1;
872                                         }
873                                 }
874                         } else {
875                                 if (softc->force_write_error) {
876                                         softc->force_write_error--;
877                                         fail = 1;
878                                 }
879                         }
880                         if (fail) {
881                                 biofinish(bp, NULL, EIO);
882                                 xpt_release_ccb(start_ccb);
883                                 ndaschedule(periph);
884                                 return;
885                         }
886 #endif
887                         KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
888                             round_page(bp->bio_bcount + bp->bio_ma_offset) /
889                             PAGE_SIZE == bp->bio_ma_n,
890                             ("Short bio %p", bp));
891                         nda_nvme_rw_bio(softc, &start_ccb->nvmeio, bp, bp->bio_cmd == BIO_READ ?
892                             NVME_OPC_READ : NVME_OPC_WRITE);
893                         break;
894                 }
895                 case BIO_DELETE:
896                 {
897                         struct nvme_dsm_range *dsm_range;
898
899                         dsm_range =
900                             malloc(sizeof(*dsm_range), M_NVMEDA, M_ZERO | M_NOWAIT);
901                         if (dsm_range == NULL) {
902                                 biofinish(bp, NULL, ENOMEM);
903                                 xpt_release_ccb(start_ccb);
904                                 ndaschedule(periph);
905                                 return;
906                         }
907                         dsm_range->length =
908                             bp->bio_bcount / softc->disk->d_sectorsize;
909                         dsm_range->starting_lba =
910                             bp->bio_offset / softc->disk->d_sectorsize;
911                         bp->bio_driver2 = dsm_range;
912                         nda_nvme_trim(softc, &start_ccb->nvmeio, dsm_range, 1);
913                         start_ccb->ccb_h.ccb_state = NDA_CCB_TRIM;
914                         start_ccb->ccb_h.flags |= CAM_UNLOCKED;
915                         /*
916                          * Note: We can have multiple TRIMs in flight, so we don't call
917                          * cam_iosched_submit_trim(softc->cam_iosched);
918                          * since that forces the I/O scheduler to only schedule one at a time.
919                          * On NVMe drives, this is a performance disaster.
920                          */
921                         goto out;
922                 }
923                 case BIO_FLUSH:
924                         nda_nvme_flush(softc, nvmeio);
925                         break;
926                 }
927                 start_ccb->ccb_h.ccb_state = NDA_CCB_BUFFER_IO;
928                 start_ccb->ccb_h.flags |= CAM_UNLOCKED;
929 out:
930                 start_ccb->ccb_h.ccb_bp = bp;
931                 softc->outstanding_cmds++;
932                 softc->refcount++;
933                 cam_periph_unlock(periph);
934                 xpt_action(start_ccb);
935                 cam_periph_lock(periph);
936                 softc->refcount--;
937
938                 /* May have more work to do, so ensure we stay scheduled */
939                 ndaschedule(periph);
940                 break;
941                 }
942         }
943 }
944
945 static void
946 ndadone(struct cam_periph *periph, union ccb *done_ccb)
947 {
948         struct nda_softc *softc;
949         struct ccb_nvmeio *nvmeio = &done_ccb->nvmeio;
950         struct cam_path *path;
951         int state;
952
953         softc = (struct nda_softc *)periph->softc;
954         path = done_ccb->ccb_h.path;
955
956         CAM_DEBUG(path, CAM_DEBUG_TRACE, ("ndadone\n"));
957
958         state = nvmeio->ccb_h.ccb_state & NDA_CCB_TYPE_MASK;
959         switch (state) {
960         case NDA_CCB_BUFFER_IO:
961         case NDA_CCB_TRIM:
962         {
963                 struct bio *bp;
964                 int error;
965
966                 cam_periph_lock(periph);
967                 bp = (struct bio *)done_ccb->ccb_h.ccb_bp;
968                 if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
969                         error = ndaerror(done_ccb, 0, 0);
970                         if (error == ERESTART) {
971                                 /* A retry was scheduled, so just return. */
972                                 cam_periph_unlock(periph);
973                                 return;
974                         }
975                         if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
976                                 cam_release_devq(path,
977                                                  /*relsim_flags*/0,
978                                                  /*reduction*/0,
979                                                  /*timeout*/0,
980                                                  /*getcount_only*/0);
981                 } else {
982                         if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
983                                 panic("REQ_CMP with QFRZN");
984                         error = 0;
985                 }
986                 bp->bio_error = error;
987                 if (error != 0) {
988                         bp->bio_resid = bp->bio_bcount;
989                         bp->bio_flags |= BIO_ERROR;
990                 } else {
991                         bp->bio_resid = 0;
992                 }
993                 if (state == NDA_CCB_TRIM)
994                         free(bp->bio_driver2, M_NVMEDA);
995                 softc->outstanding_cmds--;
996
997                 /*
998                  * We need to call cam_iosched before we call biodone so that we
999                  * don't measure any activity that happens in the completion
1000                  * routine, which in the case of sendfile can be quite
1001                  * extensive.
1002                  */
1003                 cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb);
1004                 xpt_release_ccb(done_ccb);
1005                 if (state == NDA_CCB_TRIM) {
1006 #ifdef notyet
1007                         TAILQ_HEAD(, bio) queue;
1008                         struct bio *bp1;
1009
1010                         TAILQ_INIT(&queue);
1011                         TAILQ_CONCAT(&queue, &softc->trim_req.bps, bio_queue);
1012 #endif
1013                         /*
1014                          * Since we can have multiple trims in flight, we don't
1015                          * need to call this here.
1016                          * cam_iosched_trim_done(softc->cam_iosched);
1017                          */
1018                         ndaschedule(periph);
1019                         cam_periph_unlock(periph);
1020 #ifdef notyet
1021 /* Not yet collapsing several BIO_DELETE requests into one TRIM */
1022                         while ((bp1 = TAILQ_FIRST(&queue)) != NULL) {
1023                                 TAILQ_REMOVE(&queue, bp1, bio_queue);
1024                                 bp1->bio_error = error;
1025                                 if (error != 0) {
1026                                         bp1->bio_flags |= BIO_ERROR;
1027                                         bp1->bio_resid = bp1->bio_bcount;
1028                                 } else
1029                                         bp1->bio_resid = 0;
1030                                 biodone(bp1);
1031                         }
1032 #else
1033                         biodone(bp);
1034 #endif
1035                 } else {
1036                         ndaschedule(periph);
1037                         cam_periph_unlock(periph);
1038                         biodone(bp);
1039                 }
1040                 return;
1041         }
1042         case NDA_CCB_DUMP:
1043                 /* No-op.  We're polling */
1044                 return;
1045         default:
1046                 break;
1047         }
1048         xpt_release_ccb(done_ccb);
1049 }
1050
1051 static int
1052 ndaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags)
1053 {
1054         struct nda_softc *softc;
1055         struct cam_periph *periph;
1056
1057         periph = xpt_path_periph(ccb->ccb_h.path);
1058         softc = (struct nda_softc *)periph->softc;
1059
1060         switch (ccb->ccb_h.status & CAM_STATUS_MASK) {
1061         case CAM_CMD_TIMEOUT:
1062 #ifdef CAM_IO_STATS
1063                 softc->timeouts++;
1064 #endif
1065                 break;
1066         case CAM_REQ_ABORTED:
1067         case CAM_REQ_CMP_ERR:
1068         case CAM_REQ_TERMIO:
1069         case CAM_UNREC_HBA_ERROR:
1070         case CAM_DATA_RUN_ERR:
1071         case CAM_ATA_STATUS_ERROR:
1072 #ifdef CAM_IO_STATS
1073                 softc->errors++;
1074 #endif
1075                 break;
1076         default:
1077                 break;
1078         }
1079
1080         return(cam_periph_error(ccb, cam_flags, sense_flags));
1081 }
1082
1083 /*
1084  * Step through all NDA peripheral drivers, and if the device is still open,
1085  * sync the disk cache to physical media.
1086  */
1087 static void
1088 ndaflush(void)
1089 {
1090         struct cam_periph *periph;
1091         struct nda_softc *softc;
1092         union ccb *ccb;
1093         int error;
1094
1095         CAM_PERIPH_FOREACH(periph, &ndadriver) {
1096                 softc = (struct nda_softc *)periph->softc;
1097
1098                 if (SCHEDULER_STOPPED()) {
1099                         /*
1100                          * If we paniced with the lock held or the periph is not
1101                          * open, do not recurse.  Otherwise, call ndadump since
1102                          * that avoids the sleeping cam_periph_getccb does if no
1103                          * CCBs are available.
1104                          */
1105                         if (!cam_periph_owned(periph) &&
1106                             (softc->flags & NDA_FLAG_OPEN)) {
1107                                 ndadump(softc->disk, NULL, 0, 0, 0);
1108                         }
1109                         continue;
1110                 }
1111
1112                 /*
1113                  * We only sync the cache if the drive is still open
1114                  */
1115                 cam_periph_lock(periph);
1116                 if ((softc->flags & NDA_FLAG_OPEN) == 0) {
1117                         cam_periph_unlock(periph);
1118                         continue;
1119                 }
1120
1121                 ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL);
1122                 nda_nvme_flush(softc, &ccb->nvmeio);
1123                 error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0,
1124                     /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY,
1125                     softc->disk->d_devstat);
1126                 if (error != 0)
1127                         xpt_print(periph->path, "Synchronize cache failed\n");
1128                 xpt_release_ccb(ccb);
1129                 cam_periph_unlock(periph);
1130         }
1131 }
1132
1133 static void
1134 ndashutdown(void *arg, int howto)
1135 {
1136
1137         ndaflush();
1138 }
1139
1140 static void
1141 ndasuspend(void *arg)
1142 {
1143
1144         ndaflush();
1145 }