]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/nvme/nvme_ctrlr.c
Merge OpenSSL 1.0.1e.
[FreeBSD/FreeBSD.git] / sys / dev / nvme / nvme_ctrlr.c
1 /*-
2  * Copyright (C) 2012 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/bus.h>
32 #include <sys/conf.h>
33 #include <sys/ioccom.h>
34 #include <sys/smp.h>
35
36 #include <dev/pci/pcireg.h>
37 #include <dev/pci/pcivar.h>
38
39 #include "nvme_private.h"
40
41 static void
42 nvme_ctrlr_cb(void *arg, const struct nvme_completion *status)
43 {
44         struct nvme_completion  *cpl = arg;
45         struct mtx              *mtx;
46
47         /*
48          * Copy status into the argument passed by the caller, so that
49          *  the caller can check the status to determine if the
50          *  the request passed or failed.
51          */
52         memcpy(cpl, status, sizeof(*cpl));
53         mtx = mtx_pool_find(mtxpool_sleep, cpl);
54         mtx_lock(mtx);
55         wakeup(cpl);
56         mtx_unlock(mtx);
57 }
58
59 static int
60 nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
61 {
62
63         /* Chatham puts the NVMe MMRs behind BAR 2/3, not BAR 0/1. */
64         if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
65                 ctrlr->resource_id = PCIR_BAR(2);
66         else
67                 ctrlr->resource_id = PCIR_BAR(0);
68
69         ctrlr->resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY,
70             &ctrlr->resource_id, 0, ~0, 1, RF_ACTIVE);
71
72         if(ctrlr->resource == NULL) {
73                 device_printf(ctrlr->dev, "unable to allocate pci resource\n");
74                 return (ENOMEM);
75         }
76
77         ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
78         ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
79         ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
80
81         /*
82          * The NVMe spec allows for the MSI-X table to be placed behind
83          *  BAR 4/5, separate from the control/doorbell registers.  Always
84          *  try to map this bar, because it must be mapped prior to calling
85          *  pci_alloc_msix().  If the table isn't behind BAR 4/5,
86          *  bus_alloc_resource() will just return NULL which is OK.
87          */
88         ctrlr->bar4_resource_id = PCIR_BAR(4);
89         ctrlr->bar4_resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY,
90             &ctrlr->bar4_resource_id, 0, ~0, 1, RF_ACTIVE);
91
92         return (0);
93 }
94
95 #ifdef CHATHAM2
96 static int
97 nvme_ctrlr_allocate_chatham_bar(struct nvme_controller *ctrlr)
98 {
99
100         ctrlr->chatham_resource_id = PCIR_BAR(CHATHAM_CONTROL_BAR);
101         ctrlr->chatham_resource = bus_alloc_resource(ctrlr->dev,
102             SYS_RES_MEMORY, &ctrlr->chatham_resource_id, 0, ~0, 1,
103             RF_ACTIVE);
104
105         if(ctrlr->chatham_resource == NULL) {
106                 device_printf(ctrlr->dev, "unable to alloc pci resource\n");
107                 return (ENOMEM);
108         }
109
110         ctrlr->chatham_bus_tag = rman_get_bustag(ctrlr->chatham_resource);
111         ctrlr->chatham_bus_handle =
112             rman_get_bushandle(ctrlr->chatham_resource);
113
114         return (0);
115 }
116
117 static void
118 nvme_ctrlr_setup_chatham(struct nvme_controller *ctrlr)
119 {
120         uint64_t reg1, reg2, reg3;
121         uint64_t temp1, temp2;
122         uint32_t temp3;
123         uint32_t use_flash_timings = 0;
124
125         DELAY(10000);
126
127         temp3 = chatham_read_4(ctrlr, 0x8080);
128
129         device_printf(ctrlr->dev, "Chatham version: 0x%x\n", temp3);
130
131         ctrlr->chatham_lbas = chatham_read_4(ctrlr, 0x8068) - 0x110;
132         ctrlr->chatham_size = ctrlr->chatham_lbas * 512;
133
134         device_printf(ctrlr->dev, "Chatham size: %lld\n",
135             (long long)ctrlr->chatham_size);
136
137         reg1 = reg2 = reg3 = ctrlr->chatham_size - 1;
138
139         TUNABLE_INT_FETCH("hw.nvme.use_flash_timings", &use_flash_timings);
140         if (use_flash_timings) {
141                 device_printf(ctrlr->dev, "Chatham: using flash timings\n");
142                 temp1 = 0x00001b58000007d0LL;
143                 temp2 = 0x000000cb00000131LL;
144         } else {
145                 device_printf(ctrlr->dev, "Chatham: using DDR timings\n");
146                 temp1 = temp2 = 0x0LL;
147         }
148
149         chatham_write_8(ctrlr, 0x8000, reg1);
150         chatham_write_8(ctrlr, 0x8008, reg2);
151         chatham_write_8(ctrlr, 0x8010, reg3);
152
153         chatham_write_8(ctrlr, 0x8020, temp1);
154         temp3 = chatham_read_4(ctrlr, 0x8020);
155
156         chatham_write_8(ctrlr, 0x8028, temp2);
157         temp3 = chatham_read_4(ctrlr, 0x8028);
158
159         chatham_write_8(ctrlr, 0x8030, temp1);
160         chatham_write_8(ctrlr, 0x8038, temp2);
161         chatham_write_8(ctrlr, 0x8040, temp1);
162         chatham_write_8(ctrlr, 0x8048, temp2);
163         chatham_write_8(ctrlr, 0x8050, temp1);
164         chatham_write_8(ctrlr, 0x8058, temp2);
165
166         DELAY(10000);
167 }
168
169 static void
170 nvme_chatham_populate_cdata(struct nvme_controller *ctrlr)
171 {
172         struct nvme_controller_data *cdata;
173
174         cdata = &ctrlr->cdata;
175
176         cdata->vid = 0x8086;
177         cdata->ssvid = 0x2011;
178
179         /*
180          * Chatham2 puts garbage data in these fields when we
181          *  invoke IDENTIFY_CONTROLLER, so we need to re-zero
182          *  the fields before calling bcopy().
183          */
184         memset(cdata->sn, 0, sizeof(cdata->sn));
185         memcpy(cdata->sn, "2012", strlen("2012"));
186         memset(cdata->mn, 0, sizeof(cdata->mn));
187         memcpy(cdata->mn, "CHATHAM2", strlen("CHATHAM2"));
188         memset(cdata->fr, 0, sizeof(cdata->fr));
189         memcpy(cdata->fr, "0", strlen("0"));
190         cdata->rab = 8;
191         cdata->aerl = 3;
192         cdata->lpa.ns_smart = 1;
193         cdata->sqes.min = 6;
194         cdata->sqes.max = 6;
195         cdata->sqes.min = 4;
196         cdata->sqes.max = 4;
197         cdata->nn = 1;
198
199         /* Chatham2 doesn't support DSM command */
200         cdata->oncs.dsm = 0;
201
202         cdata->vwc.present = 1;
203 }
204 #endif /* CHATHAM2 */
205
206 static void
207 nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
208 {
209         struct nvme_qpair       *qpair;
210         uint32_t                num_entries;
211
212         qpair = &ctrlr->adminq;
213
214         num_entries = NVME_ADMIN_ENTRIES;
215         TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries);
216         /*
217          * If admin_entries was overridden to an invalid value, revert it
218          *  back to our default value.
219          */
220         if (num_entries < NVME_MIN_ADMIN_ENTRIES ||
221             num_entries > NVME_MAX_ADMIN_ENTRIES) {
222                 printf("nvme: invalid hw.nvme.admin_entries=%d specified\n",
223                     num_entries);
224                 num_entries = NVME_ADMIN_ENTRIES;
225         }
226
227         /*
228          * The admin queue's max xfer size is treated differently than the
229          *  max I/O xfer size.  16KB is sufficient here - maybe even less?
230          */
231         nvme_qpair_construct(qpair, 
232                              0, /* qpair ID */
233                              0, /* vector */
234                              num_entries,
235                              NVME_ADMIN_TRACKERS,
236                              16*1024, /* max xfer size */
237                              ctrlr);
238 }
239
240 static int
241 nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
242 {
243         struct nvme_qpair       *qpair;
244         union cap_lo_register   cap_lo;
245         int                     i, num_entries, num_trackers;
246
247         num_entries = NVME_IO_ENTRIES;
248         TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries);
249
250         /*
251          * NVMe spec sets a hard limit of 64K max entries, but
252          *  devices may specify a smaller limit, so we need to check
253          *  the MQES field in the capabilities register.
254          */
255         cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
256         num_entries = min(num_entries, cap_lo.bits.mqes+1);
257
258         num_trackers = NVME_IO_TRACKERS;
259         TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers);
260
261         num_trackers = max(num_trackers, NVME_MIN_IO_TRACKERS);
262         num_trackers = min(num_trackers, NVME_MAX_IO_TRACKERS);
263         /*
264          * No need to have more trackers than entries in the submit queue.
265          *  Note also that for a queue size of N, we can only have (N-1)
266          *  commands outstanding, hence the "-1" here.
267          */
268         num_trackers = min(num_trackers, (num_entries-1));
269
270         ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
271         TUNABLE_INT_FETCH("hw.nvme.max_xfer_size", &ctrlr->max_xfer_size);
272         /*
273          * Check that tunable doesn't specify a size greater than what our
274          *  driver supports, and is an even PAGE_SIZE multiple.
275          */
276         if (ctrlr->max_xfer_size > NVME_MAX_XFER_SIZE ||
277             ctrlr->max_xfer_size % PAGE_SIZE)
278                 ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
279
280         ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair),
281             M_NVME, M_ZERO | M_NOWAIT);
282
283         if (ctrlr->ioq == NULL)
284                 return (ENOMEM);
285
286         for (i = 0; i < ctrlr->num_io_queues; i++) {
287                 qpair = &ctrlr->ioq[i];
288
289                 /*
290                  * Admin queue has ID=0. IO queues start at ID=1 -
291                  *  hence the 'i+1' here.
292                  *
293                  * For I/O queues, use the controller-wide max_xfer_size
294                  *  calculated in nvme_attach().
295                  */
296                 nvme_qpair_construct(qpair,
297                                      i+1, /* qpair ID */
298                                      ctrlr->msix_enabled ? i+1 : 0, /* vector */
299                                      num_entries,
300                                      num_trackers,
301                                      ctrlr->max_xfer_size,
302                                      ctrlr);
303
304                 if (ctrlr->per_cpu_io_queues)
305                         bus_bind_intr(ctrlr->dev, qpair->res, i);
306         }
307
308         return (0);
309 }
310
311 static int
312 nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr)
313 {
314         int ms_waited;
315         union cc_register cc;
316         union csts_register csts;
317
318         cc.raw = nvme_mmio_read_4(ctrlr, cc);
319         csts.raw = nvme_mmio_read_4(ctrlr, csts);
320
321         if (!cc.bits.en) {
322                 device_printf(ctrlr->dev, "%s called with cc.en = 0\n",
323                     __func__);
324                 return (ENXIO);
325         }
326
327         ms_waited = 0;
328
329         while (!csts.bits.rdy) {
330                 DELAY(1000);
331                 if (ms_waited++ > ctrlr->ready_timeout_in_ms) {
332                         device_printf(ctrlr->dev, "controller did not become "
333                             "ready within %d ms\n", ctrlr->ready_timeout_in_ms);
334                         return (ENXIO);
335                 }
336                 csts.raw = nvme_mmio_read_4(ctrlr, csts);
337         }
338
339         return (0);
340 }
341
342 static void
343 nvme_ctrlr_disable(struct nvme_controller *ctrlr)
344 {
345         union cc_register cc;
346         union csts_register csts;
347
348         cc.raw = nvme_mmio_read_4(ctrlr, cc);
349         csts.raw = nvme_mmio_read_4(ctrlr, csts);
350
351         if (cc.bits.en == 1 && csts.bits.rdy == 0)
352                 nvme_ctrlr_wait_for_ready(ctrlr);
353
354         cc.bits.en = 0;
355         nvme_mmio_write_4(ctrlr, cc, cc.raw);
356         DELAY(5000);
357 }
358
359 static int
360 nvme_ctrlr_enable(struct nvme_controller *ctrlr)
361 {
362         union cc_register       cc;
363         union csts_register     csts;
364         union aqa_register      aqa;
365
366         cc.raw = nvme_mmio_read_4(ctrlr, cc);
367         csts.raw = nvme_mmio_read_4(ctrlr, csts);
368
369         if (cc.bits.en == 1) {
370                 if (csts.bits.rdy == 1)
371                         return (0);
372                 else
373                         return (nvme_ctrlr_wait_for_ready(ctrlr));
374         }
375
376         nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
377         DELAY(5000);
378         nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
379         DELAY(5000);
380
381         aqa.raw = 0;
382         /* acqs and asqs are 0-based. */
383         aqa.bits.acqs = ctrlr->adminq.num_entries-1;
384         aqa.bits.asqs = ctrlr->adminq.num_entries-1;
385         nvme_mmio_write_4(ctrlr, aqa, aqa.raw);
386         DELAY(5000);
387
388         cc.bits.en = 1;
389         cc.bits.css = 0;
390         cc.bits.ams = 0;
391         cc.bits.shn = 0;
392         cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
393         cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
394
395         /* This evaluates to 0, which is according to spec. */
396         cc.bits.mps = (PAGE_SIZE >> 13);
397
398         nvme_mmio_write_4(ctrlr, cc, cc.raw);
399         DELAY(5000);
400
401         return (nvme_ctrlr_wait_for_ready(ctrlr));
402 }
403
404 int
405 nvme_ctrlr_reset(struct nvme_controller *ctrlr)
406 {
407
408         nvme_ctrlr_disable(ctrlr);
409         return (nvme_ctrlr_enable(ctrlr));
410 }
411
412 /*
413  * Disable this code for now, since Chatham doesn't support
414  *  AERs so I have no good way to test them.
415  */
416 #if 0
417 static void
418 nvme_async_event_cb(void *arg, const struct nvme_completion *status)
419 {
420         struct nvme_controller *ctrlr = arg;
421
422         printf("Asynchronous event occurred.\n");
423
424         /* TODO: decode async event type based on status */
425         /* TODO: check status for any error bits */
426
427         /*
428          * Repost an asynchronous event request so that it can be
429          *  used again by the controller.
430          */
431         nvme_ctrlr_cmd_asynchronous_event_request(ctrlr, nvme_async_event_cb,
432             ctrlr);
433 }
434 #endif
435
436 static int
437 nvme_ctrlr_identify(struct nvme_controller *ctrlr)
438 {
439         struct mtx              *mtx;
440         struct nvme_completion  cpl;
441         int                     status;
442
443         mtx = mtx_pool_find(mtxpool_sleep, &cpl);
444
445         mtx_lock(mtx);
446         nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
447             nvme_ctrlr_cb, &cpl);
448         status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
449         mtx_unlock(mtx);
450         if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
451                 printf("nvme_identify_controller failed!\n");
452                 return (ENXIO);
453         }
454
455 #ifdef CHATHAM2
456         if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
457                 nvme_chatham_populate_cdata(ctrlr);
458 #endif
459
460         return (0);
461 }
462
463 static int
464 nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr)
465 {
466         struct mtx              *mtx;
467         struct nvme_completion  cpl;
468         int                     cq_allocated, sq_allocated, status;
469
470         mtx = mtx_pool_find(mtxpool_sleep, &cpl);
471
472         mtx_lock(mtx);
473         nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues,
474             nvme_ctrlr_cb, &cpl);
475         status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
476         mtx_unlock(mtx);
477         if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
478                 printf("nvme_set_num_queues failed!\n");
479                 return (ENXIO);
480         }
481
482         /*
483          * Data in cdw0 is 0-based.
484          * Lower 16-bits indicate number of submission queues allocated.
485          * Upper 16-bits indicate number of completion queues allocated.
486          */
487         sq_allocated = (cpl.cdw0 & 0xFFFF) + 1;
488         cq_allocated = (cpl.cdw0 >> 16) + 1;
489
490         /*
491          * Check that the controller was able to allocate the number of
492          *  queues we requested.  If not, revert to one IO queue.
493          */
494         if (sq_allocated < ctrlr->num_io_queues ||
495             cq_allocated < ctrlr->num_io_queues) {
496                 ctrlr->num_io_queues = 1;
497                 ctrlr->per_cpu_io_queues = 0;
498
499                 /* TODO: destroy extra queues that were created
500                  *  previously but now found to be not needed.
501                  */
502         }
503
504         return (0);
505 }
506
507 static int
508 nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr)
509 {
510         struct mtx              *mtx;
511         struct nvme_qpair       *qpair;
512         struct nvme_completion  cpl;
513         int                     i, status;
514
515         mtx = mtx_pool_find(mtxpool_sleep, &cpl);
516
517         for (i = 0; i < ctrlr->num_io_queues; i++) {
518                 qpair = &ctrlr->ioq[i];
519
520                 mtx_lock(mtx);
521                 nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector,
522                     nvme_ctrlr_cb, &cpl);
523                 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
524                 mtx_unlock(mtx);
525                 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
526                         printf("nvme_create_io_cq failed!\n");
527                         return (ENXIO);
528                 }
529
530                 mtx_lock(mtx);
531                 nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair,
532                     nvme_ctrlr_cb, &cpl);
533                 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
534                 mtx_unlock(mtx);
535                 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
536                         printf("nvme_create_io_sq failed!\n");
537                         return (ENXIO);
538                 }
539         }
540
541         return (0);
542 }
543
544 static int
545 nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
546 {
547         struct nvme_namespace   *ns;
548         int                     i, status;
549
550         for (i = 0; i < ctrlr->cdata.nn; i++) {
551                 ns = &ctrlr->ns[i];
552                 status = nvme_ns_construct(ns, i+1, ctrlr);
553                 if (status != 0)
554                         return (status);
555         }
556
557         return (0);
558 }
559
560 static void
561 nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
562 {
563         union nvme_critical_warning_state       state;
564         uint8_t                                 num_async_events;
565
566         state.raw = 0xFF;
567         state.bits.reserved = 0;
568         nvme_ctrlr_cmd_set_asynchronous_event_config(ctrlr, state, NULL, NULL);
569
570         /* aerl is a zero-based value, so we need to add 1 here. */
571         num_async_events = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1));
572
573         /*
574          * Disable this code for now, since Chatham doesn't support
575          *  AERs so I have no good way to test them.
576          */
577 #if 0
578         for (int i = 0; i < num_async_events; i++)
579                 nvme_ctrlr_cmd_asynchronous_event_request(ctrlr,
580                     nvme_async_event_cb, ctrlr);
581 #endif
582 }
583
584 static void
585 nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr)
586 {
587
588         ctrlr->int_coal_time = 0;
589         TUNABLE_INT_FETCH("hw.nvme.int_coal_time",
590             &ctrlr->int_coal_time);
591
592         ctrlr->int_coal_threshold = 0;
593         TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold",
594             &ctrlr->int_coal_threshold);
595
596         nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time,
597             ctrlr->int_coal_threshold, NULL, NULL);
598 }
599
600 void
601 nvme_ctrlr_start(void *ctrlr_arg)
602 {
603         struct nvme_controller *ctrlr = ctrlr_arg;
604
605         if (nvme_ctrlr_identify(ctrlr) != 0)
606                 goto err;
607
608         if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0)
609                 goto err;
610
611         if (nvme_ctrlr_create_qpairs(ctrlr) != 0)
612                 goto err;
613
614         if (nvme_ctrlr_construct_namespaces(ctrlr) != 0)
615                 goto err;
616
617         nvme_ctrlr_configure_aer(ctrlr);
618         nvme_ctrlr_configure_int_coalescing(ctrlr);
619
620         ctrlr->is_started = TRUE;
621
622 err:
623
624         /*
625          * Initialize sysctls, even if controller failed to start, to
626          *  assist with debugging admin queue pair.
627          */
628         nvme_sysctl_initialize_ctrlr(ctrlr);
629         config_intrhook_disestablish(&ctrlr->config_hook);
630 }
631
632 static void
633 nvme_ctrlr_intx_handler(void *arg)
634 {
635         struct nvme_controller *ctrlr = arg;
636
637         nvme_mmio_write_4(ctrlr, intms, 1);
638
639         nvme_qpair_process_completions(&ctrlr->adminq);
640
641         if (ctrlr->ioq[0].cpl)
642                 nvme_qpair_process_completions(&ctrlr->ioq[0]);
643
644         nvme_mmio_write_4(ctrlr, intmc, 1);
645 }
646
647 static int
648 nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
649 {
650
651         ctrlr->num_io_queues = 1;
652         ctrlr->per_cpu_io_queues = 0;
653         ctrlr->rid = 0;
654         ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
655             &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
656
657         if (ctrlr->res == NULL) {
658                 device_printf(ctrlr->dev, "unable to allocate shared IRQ\n");
659                 return (ENOMEM);
660         }
661
662         bus_setup_intr(ctrlr->dev, ctrlr->res,
663             INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
664             ctrlr, &ctrlr->tag);
665
666         if (ctrlr->tag == NULL) {
667                 device_printf(ctrlr->dev,
668                     "unable to setup legacy interrupt handler\n");
669                 return (ENOMEM);
670         }
671
672         return (0);
673 }
674
675 static int
676 nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
677     struct thread *td)
678 {
679         struct nvme_controller  *ctrlr;
680         struct nvme_completion  cpl;
681         struct mtx              *mtx;
682
683         ctrlr = cdev->si_drv1;
684
685         switch (cmd) {
686         case NVME_IDENTIFY_CONTROLLER:
687 #ifdef CHATHAM2
688                 /*
689                  * Don't refresh data on Chatham, since Chatham returns
690                  *  garbage on IDENTIFY anyways.
691                  */
692                 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) {
693                         memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata));
694                         break;
695                 }
696 #endif
697                 /* Refresh data before returning to user. */
698                 mtx = mtx_pool_find(mtxpool_sleep, &cpl);
699                 mtx_lock(mtx);
700                 nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
701                     nvme_ctrlr_cb, &cpl);
702                 msleep(&cpl, mtx, PRIBIO, "nvme_ioctl", 0);
703                 mtx_unlock(mtx);
704                 if (cpl.sf_sc || cpl.sf_sct)
705                         return (ENXIO);
706                 memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata));
707                 break;
708         default:
709                 return (ENOTTY);
710         }
711
712         return (0);
713 }
714
715 static struct cdevsw nvme_ctrlr_cdevsw = {
716         .d_version =    D_VERSION,
717         .d_flags =      0,
718         .d_ioctl =      nvme_ctrlr_ioctl
719 };
720
721 int
722 nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
723 {
724         union cap_lo_register   cap_lo;
725         union cap_hi_register   cap_hi;
726         int                     num_vectors, per_cpu_io_queues, status = 0;
727
728         ctrlr->dev = dev;
729         ctrlr->is_started = FALSE;
730
731         status = nvme_ctrlr_allocate_bar(ctrlr);
732
733         if (status != 0)
734                 return (status);
735
736 #ifdef CHATHAM2
737         if (pci_get_devid(dev) == CHATHAM_PCI_ID) {
738                 status = nvme_ctrlr_allocate_chatham_bar(ctrlr);
739                 if (status != 0)
740                         return (status);
741                 nvme_ctrlr_setup_chatham(ctrlr);
742         }
743 #endif
744
745         /*
746          * Software emulators may set the doorbell stride to something
747          *  other than zero, but this driver is not set up to handle that.
748          */
749         cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi);
750         if (cap_hi.bits.dstrd != 0)
751                 return (ENXIO);
752
753         /* Get ready timeout value from controller, in units of 500ms. */
754         cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
755         ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500;
756
757         per_cpu_io_queues = 1;
758         TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
759         ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE;
760
761         if (ctrlr->per_cpu_io_queues)
762                 ctrlr->num_io_queues = mp_ncpus;
763         else
764                 ctrlr->num_io_queues = 1;
765
766         ctrlr->force_intx = 0;
767         TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
768
769         ctrlr->msix_enabled = 1;
770
771         if (ctrlr->force_intx) {
772                 ctrlr->msix_enabled = 0;
773                 goto intx;
774         }
775
776         /* One vector per IO queue, plus one vector for admin queue. */
777         num_vectors = ctrlr->num_io_queues + 1;
778
779         if (pci_msix_count(dev) < num_vectors) {
780                 ctrlr->msix_enabled = 0;
781                 goto intx;
782         }
783
784         if (pci_alloc_msix(dev, &num_vectors) != 0)
785                 ctrlr->msix_enabled = 0;
786
787 intx:
788
789         if (!ctrlr->msix_enabled)
790                 nvme_ctrlr_configure_intx(ctrlr);
791
792         nvme_ctrlr_construct_admin_qpair(ctrlr);
793
794         status = nvme_ctrlr_construct_io_qpairs(ctrlr);
795
796         if (status != 0)
797                 return (status);
798
799         ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
800             "nvme%d", device_get_unit(dev));
801
802         if (ctrlr->cdev == NULL)
803                 return (ENXIO);
804
805         ctrlr->cdev->si_drv1 = (void *)ctrlr;
806
807         return (0);
808 }
809
810 void
811 nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
812     struct nvme_request *req)
813 {
814
815         nvme_qpair_submit_request(&ctrlr->adminq, req);
816 }
817
818 void
819 nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
820     struct nvme_request *req)
821 {
822         struct nvme_qpair       *qpair;
823
824         if (ctrlr->per_cpu_io_queues)
825                 qpair = &ctrlr->ioq[curcpu];
826         else
827                 qpair = &ctrlr->ioq[0];
828
829         nvme_qpair_submit_request(qpair, req);
830 }