]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/nvme/nvme_ctrlr.c
Merge bmake-20121111
[FreeBSD/FreeBSD.git] / sys / dev / nvme / nvme_ctrlr.c
1 /*-
2  * Copyright (C) 2012 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/bus.h>
32 #include <sys/conf.h>
33 #include <sys/ioccom.h>
34 #include <sys/smp.h>
35
36 #include <dev/pci/pcireg.h>
37 #include <dev/pci/pcivar.h>
38
39 #include "nvme_private.h"
40
41 static void
42 nvme_ctrlr_cb(void *arg, const struct nvme_completion *status)
43 {
44         struct nvme_completion  *cpl = arg;
45         struct mtx              *mtx;
46
47         /*
48          * Copy status into the argument passed by the caller, so that
49          *  the caller can check the status to determine if the
50          *  the request passed or failed.
51          */
52         memcpy(cpl, status, sizeof(*cpl));
53         mtx = mtx_pool_find(mtxpool_sleep, cpl);
54         mtx_lock(mtx);
55         wakeup(cpl);
56         mtx_unlock(mtx);
57 }
58
59 static int
60 nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
61 {
62
63         /* Chatham puts the NVMe MMRs behind BAR 2/3, not BAR 0/1. */
64         if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
65                 ctrlr->resource_id = PCIR_BAR(2);
66         else
67                 ctrlr->resource_id = PCIR_BAR(0);
68
69         ctrlr->resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY,
70             &ctrlr->resource_id, 0, ~0, 1, RF_ACTIVE);
71
72         if(ctrlr->resource == NULL) {
73                 device_printf(ctrlr->dev, "unable to allocate pci resource\n");
74                 return (ENOMEM);
75         }
76
77         ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
78         ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
79         ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
80
81         return (0);
82 }
83
84 #ifdef CHATHAM2
85 static int
86 nvme_ctrlr_allocate_chatham_bar(struct nvme_controller *ctrlr)
87 {
88
89         ctrlr->chatham_resource_id = PCIR_BAR(CHATHAM_CONTROL_BAR);
90         ctrlr->chatham_resource = bus_alloc_resource(ctrlr->dev,
91             SYS_RES_MEMORY, &ctrlr->chatham_resource_id, 0, ~0, 1,
92             RF_ACTIVE);
93
94         if(ctrlr->chatham_resource == NULL) {
95                 device_printf(ctrlr->dev, "unable to alloc pci resource\n");
96                 return (ENOMEM);
97         }
98
99         ctrlr->chatham_bus_tag = rman_get_bustag(ctrlr->chatham_resource);
100         ctrlr->chatham_bus_handle =
101             rman_get_bushandle(ctrlr->chatham_resource);
102
103         return (0);
104 }
105
106 static void
107 nvme_ctrlr_setup_chatham(struct nvme_controller *ctrlr)
108 {
109         uint64_t reg1, reg2, reg3;
110         uint64_t temp1, temp2;
111         uint32_t temp3;
112         uint32_t use_flash_timings = 0;
113
114         DELAY(10000);
115
116         temp3 = chatham_read_4(ctrlr, 0x8080);
117
118         device_printf(ctrlr->dev, "Chatham version: 0x%x\n", temp3);
119
120         ctrlr->chatham_lbas = chatham_read_4(ctrlr, 0x8068) - 0x110;
121         ctrlr->chatham_size = ctrlr->chatham_lbas * 512;
122
123         device_printf(ctrlr->dev, "Chatham size: %lld\n",
124             (long long)ctrlr->chatham_size);
125
126         reg1 = reg2 = reg3 = ctrlr->chatham_size - 1;
127
128         TUNABLE_INT_FETCH("hw.nvme.use_flash_timings", &use_flash_timings);
129         if (use_flash_timings) {
130                 device_printf(ctrlr->dev, "Chatham: using flash timings\n");
131                 temp1 = 0x00001b58000007d0LL;
132                 temp2 = 0x000000cb00000131LL;
133         } else {
134                 device_printf(ctrlr->dev, "Chatham: using DDR timings\n");
135                 temp1 = temp2 = 0x0LL;
136         }
137
138         chatham_write_8(ctrlr, 0x8000, reg1);
139         chatham_write_8(ctrlr, 0x8008, reg2);
140         chatham_write_8(ctrlr, 0x8010, reg3);
141
142         chatham_write_8(ctrlr, 0x8020, temp1);
143         temp3 = chatham_read_4(ctrlr, 0x8020);
144
145         chatham_write_8(ctrlr, 0x8028, temp2);
146         temp3 = chatham_read_4(ctrlr, 0x8028);
147
148         chatham_write_8(ctrlr, 0x8030, temp1);
149         chatham_write_8(ctrlr, 0x8038, temp2);
150         chatham_write_8(ctrlr, 0x8040, temp1);
151         chatham_write_8(ctrlr, 0x8048, temp2);
152         chatham_write_8(ctrlr, 0x8050, temp1);
153         chatham_write_8(ctrlr, 0x8058, temp2);
154
155         DELAY(10000);
156 }
157
158 static void
159 nvme_chatham_populate_cdata(struct nvme_controller *ctrlr)
160 {
161         struct nvme_controller_data *cdata;
162
163         cdata = &ctrlr->cdata;
164
165         cdata->vid = 0x8086;
166         cdata->ssvid = 0x2011;
167
168         /*
169          * Chatham2 puts garbage data in these fields when we
170          *  invoke IDENTIFY_CONTROLLER, so we need to re-zero
171          *  the fields before calling bcopy().
172          */
173         memset(cdata->sn, 0, sizeof(cdata->sn));
174         memcpy(cdata->sn, "2012", strlen("2012"));
175         memset(cdata->mn, 0, sizeof(cdata->mn));
176         memcpy(cdata->mn, "CHATHAM2", strlen("CHATHAM2"));
177         memset(cdata->fr, 0, sizeof(cdata->fr));
178         memcpy(cdata->fr, "0", strlen("0"));
179         cdata->rab = 8;
180         cdata->aerl = 3;
181         cdata->lpa.ns_smart = 1;
182         cdata->sqes.min = 6;
183         cdata->sqes.max = 6;
184         cdata->sqes.min = 4;
185         cdata->sqes.max = 4;
186         cdata->nn = 1;
187
188         /* Chatham2 doesn't support DSM command */
189         cdata->oncs.dsm = 0;
190
191         cdata->vwc.present = 1;
192 }
193 #endif /* CHATHAM2 */
194
195 static void
196 nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
197 {
198         struct nvme_qpair       *qpair;
199         uint32_t                num_entries;
200
201         qpair = &ctrlr->adminq;
202
203         num_entries = NVME_ADMIN_ENTRIES;
204         TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries);
205         /*
206          * If admin_entries was overridden to an invalid value, revert it
207          *  back to our default value.
208          */
209         if (num_entries < NVME_MIN_ADMIN_ENTRIES ||
210             num_entries > NVME_MAX_ADMIN_ENTRIES) {
211                 printf("nvme: invalid hw.nvme.admin_entries=%d specified\n",
212                     num_entries);
213                 num_entries = NVME_ADMIN_ENTRIES;
214         }
215
216         /*
217          * The admin queue's max xfer size is treated differently than the
218          *  max I/O xfer size.  16KB is sufficient here - maybe even less?
219          */
220         nvme_qpair_construct(qpair, 
221                              0, /* qpair ID */
222                              0, /* vector */
223                              num_entries,
224                              NVME_ADMIN_TRACKERS,
225                              16*1024, /* max xfer size */
226                              ctrlr);
227 }
228
229 static int
230 nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
231 {
232         struct nvme_qpair       *qpair;
233         union cap_lo_register   cap_lo;
234         int                     i, num_entries, num_trackers;
235
236         num_entries = NVME_IO_ENTRIES;
237         TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries);
238
239         /*
240          * NVMe spec sets a hard limit of 64K max entries, but
241          *  devices may specify a smaller limit, so we need to check
242          *  the MQES field in the capabilities register.
243          */
244         cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
245         num_entries = min(num_entries, cap_lo.bits.mqes+1);
246
247         num_trackers = NVME_IO_TRACKERS;
248         TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers);
249
250         num_trackers = max(num_trackers, NVME_MIN_IO_TRACKERS);
251         num_trackers = min(num_trackers, NVME_MAX_IO_TRACKERS);
252         /*
253          * No need to have more trackers than entries in the submit queue.
254          *  Note also that for a queue size of N, we can only have (N-1)
255          *  commands outstanding, hence the "-1" here.
256          */
257         num_trackers = min(num_trackers, (num_entries-1));
258
259         ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
260         TUNABLE_INT_FETCH("hw.nvme.max_xfer_size", &ctrlr->max_xfer_size);
261         /*
262          * Check that tunable doesn't specify a size greater than what our
263          *  driver supports, and is an even PAGE_SIZE multiple.
264          */
265         if (ctrlr->max_xfer_size > NVME_MAX_XFER_SIZE ||
266             ctrlr->max_xfer_size % PAGE_SIZE)
267                 ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
268
269         ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair),
270             M_NVME, M_ZERO | M_NOWAIT);
271
272         if (ctrlr->ioq == NULL)
273                 return (ENOMEM);
274
275         for (i = 0; i < ctrlr->num_io_queues; i++) {
276                 qpair = &ctrlr->ioq[i];
277
278                 /*
279                  * Admin queue has ID=0. IO queues start at ID=1 -
280                  *  hence the 'i+1' here.
281                  *
282                  * For I/O queues, use the controller-wide max_xfer_size
283                  *  calculated in nvme_attach().
284                  */
285                 nvme_qpair_construct(qpair,
286                                      i+1, /* qpair ID */
287                                      ctrlr->msix_enabled ? i+1 : 0, /* vector */
288                                      num_entries,
289                                      num_trackers,
290                                      ctrlr->max_xfer_size,
291                                      ctrlr);
292
293                 if (ctrlr->per_cpu_io_queues)
294                         bus_bind_intr(ctrlr->dev, qpair->res, i);
295         }
296
297         return (0);
298 }
299
300 static int
301 nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr)
302 {
303         int ms_waited;
304         union cc_register cc;
305         union csts_register csts;
306
307         cc.raw = nvme_mmio_read_4(ctrlr, cc);
308         csts.raw = nvme_mmio_read_4(ctrlr, csts);
309
310         if (!cc.bits.en) {
311                 device_printf(ctrlr->dev, "%s called with cc.en = 0\n",
312                     __func__);
313                 return (ENXIO);
314         }
315
316         ms_waited = 0;
317
318         while (!csts.bits.rdy) {
319                 DELAY(1000);
320                 if (ms_waited++ > ctrlr->ready_timeout_in_ms) {
321                         device_printf(ctrlr->dev, "controller did not become "
322                             "ready within %d ms\n", ctrlr->ready_timeout_in_ms);
323                         return (ENXIO);
324                 }
325                 csts.raw = nvme_mmio_read_4(ctrlr, csts);
326         }
327
328         return (0);
329 }
330
331 static void
332 nvme_ctrlr_disable(struct nvme_controller *ctrlr)
333 {
334         union cc_register cc;
335         union csts_register csts;
336
337         cc.raw = nvme_mmio_read_4(ctrlr, cc);
338         csts.raw = nvme_mmio_read_4(ctrlr, csts);
339
340         if (cc.bits.en == 1 && csts.bits.rdy == 0)
341                 nvme_ctrlr_wait_for_ready(ctrlr);
342
343         cc.bits.en = 0;
344         nvme_mmio_write_4(ctrlr, cc, cc.raw);
345         DELAY(5000);
346 }
347
348 static int
349 nvme_ctrlr_enable(struct nvme_controller *ctrlr)
350 {
351         union cc_register       cc;
352         union csts_register     csts;
353         union aqa_register      aqa;
354
355         cc.raw = nvme_mmio_read_4(ctrlr, cc);
356         csts.raw = nvme_mmio_read_4(ctrlr, csts);
357
358         if (cc.bits.en == 1) {
359                 if (csts.bits.rdy == 1)
360                         return (0);
361                 else
362                         return (nvme_ctrlr_wait_for_ready(ctrlr));
363         }
364
365         nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
366         DELAY(5000);
367         nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
368         DELAY(5000);
369
370         aqa.raw = 0;
371         /* acqs and asqs are 0-based. */
372         aqa.bits.acqs = ctrlr->adminq.num_entries-1;
373         aqa.bits.asqs = ctrlr->adminq.num_entries-1;
374         nvme_mmio_write_4(ctrlr, aqa, aqa.raw);
375         DELAY(5000);
376
377         cc.bits.en = 1;
378         cc.bits.css = 0;
379         cc.bits.ams = 0;
380         cc.bits.shn = 0;
381         cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
382         cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
383
384         /* This evaluates to 0, which is according to spec. */
385         cc.bits.mps = (PAGE_SIZE >> 13);
386
387         nvme_mmio_write_4(ctrlr, cc, cc.raw);
388         DELAY(5000);
389
390         return (nvme_ctrlr_wait_for_ready(ctrlr));
391 }
392
393 int
394 nvme_ctrlr_reset(struct nvme_controller *ctrlr)
395 {
396
397         nvme_ctrlr_disable(ctrlr);
398         return (nvme_ctrlr_enable(ctrlr));
399 }
400
401 /*
402  * Disable this code for now, since Chatham doesn't support
403  *  AERs so I have no good way to test them.
404  */
405 #if 0
406 static void
407 nvme_async_event_cb(void *arg, const struct nvme_completion *status)
408 {
409         struct nvme_controller *ctrlr = arg;
410
411         printf("Asynchronous event occurred.\n");
412
413         /* TODO: decode async event type based on status */
414         /* TODO: check status for any error bits */
415
416         /*
417          * Repost an asynchronous event request so that it can be
418          *  used again by the controller.
419          */
420         nvme_ctrlr_cmd_asynchronous_event_request(ctrlr, nvme_async_event_cb,
421             ctrlr);
422 }
423 #endif
424
425 static int
426 nvme_ctrlr_identify(struct nvme_controller *ctrlr)
427 {
428         struct mtx              *mtx;
429         struct nvme_completion  cpl;
430         int                     status;
431
432         mtx = mtx_pool_find(mtxpool_sleep, &cpl);
433
434         mtx_lock(mtx);
435         nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
436             nvme_ctrlr_cb, &cpl);
437         status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
438         mtx_unlock(mtx);
439         if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
440                 printf("nvme_identify_controller failed!\n");
441                 return (ENXIO);
442         }
443
444 #ifdef CHATHAM2
445         if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
446                 nvme_chatham_populate_cdata(ctrlr);
447 #endif
448
449         return (0);
450 }
451
452 static int
453 nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr)
454 {
455         struct mtx              *mtx;
456         struct nvme_completion  cpl;
457         int                     cq_allocated, sq_allocated, status;
458
459         mtx = mtx_pool_find(mtxpool_sleep, &cpl);
460
461         mtx_lock(mtx);
462         nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues,
463             nvme_ctrlr_cb, &cpl);
464         status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
465         mtx_unlock(mtx);
466         if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
467                 printf("nvme_set_num_queues failed!\n");
468                 return (ENXIO);
469         }
470
471         /*
472          * Data in cdw0 is 0-based.
473          * Lower 16-bits indicate number of submission queues allocated.
474          * Upper 16-bits indicate number of completion queues allocated.
475          */
476         sq_allocated = (cpl.cdw0 & 0xFFFF) + 1;
477         cq_allocated = (cpl.cdw0 >> 16) + 1;
478
479         /*
480          * Check that the controller was able to allocate the number of
481          *  queues we requested.  If not, revert to one IO queue.
482          */
483         if (sq_allocated < ctrlr->num_io_queues ||
484             cq_allocated < ctrlr->num_io_queues) {
485                 ctrlr->num_io_queues = 1;
486                 ctrlr->per_cpu_io_queues = 0;
487
488                 /* TODO: destroy extra queues that were created
489                  *  previously but now found to be not needed.
490                  */
491         }
492
493         return (0);
494 }
495
496 static int
497 nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr)
498 {
499         struct mtx              *mtx;
500         struct nvme_qpair       *qpair;
501         struct nvme_completion  cpl;
502         int                     i, status;
503
504         mtx = mtx_pool_find(mtxpool_sleep, &cpl);
505
506         for (i = 0; i < ctrlr->num_io_queues; i++) {
507                 qpair = &ctrlr->ioq[i];
508
509                 mtx_lock(mtx);
510                 nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector,
511                     nvme_ctrlr_cb, &cpl);
512                 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
513                 mtx_unlock(mtx);
514                 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
515                         printf("nvme_create_io_cq failed!\n");
516                         return (ENXIO);
517                 }
518
519                 mtx_lock(mtx);
520                 nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair,
521                     nvme_ctrlr_cb, &cpl);
522                 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5);
523                 mtx_unlock(mtx);
524                 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) {
525                         printf("nvme_create_io_sq failed!\n");
526                         return (ENXIO);
527                 }
528         }
529
530         return (0);
531 }
532
533 static int
534 nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
535 {
536         struct nvme_namespace   *ns;
537         int                     i, status;
538
539         for (i = 0; i < ctrlr->cdata.nn; i++) {
540                 ns = &ctrlr->ns[i];
541                 status = nvme_ns_construct(ns, i+1, ctrlr);
542                 if (status != 0)
543                         return (status);
544         }
545
546         return (0);
547 }
548
549 static void
550 nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
551 {
552         union nvme_critical_warning_state       state;
553         uint8_t                                 num_async_events;
554
555         state.raw = 0xFF;
556         state.bits.reserved = 0;
557         nvme_ctrlr_cmd_set_asynchronous_event_config(ctrlr, state, NULL, NULL);
558
559         /* aerl is a zero-based value, so we need to add 1 here. */
560         num_async_events = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1));
561
562         /*
563          * Disable this code for now, since Chatham doesn't support
564          *  AERs so I have no good way to test them.
565          */
566 #if 0
567         for (int i = 0; i < num_async_events; i++)
568                 nvme_ctrlr_cmd_asynchronous_event_request(ctrlr,
569                     nvme_async_event_cb, ctrlr);
570 #endif
571 }
572
573 static void
574 nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr)
575 {
576
577         ctrlr->int_coal_time = 0;
578         TUNABLE_INT_FETCH("hw.nvme.int_coal_time",
579             &ctrlr->int_coal_time);
580
581         ctrlr->int_coal_threshold = 0;
582         TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold",
583             &ctrlr->int_coal_threshold);
584
585         nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time,
586             ctrlr->int_coal_threshold, NULL, NULL);
587 }
588
589 void
590 nvme_ctrlr_start(void *ctrlr_arg)
591 {
592         struct nvme_controller *ctrlr = ctrlr_arg;
593
594         if (nvme_ctrlr_identify(ctrlr) != 0)
595                 goto err;
596
597         if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0)
598                 goto err;
599
600         if (nvme_ctrlr_create_qpairs(ctrlr) != 0)
601                 goto err;
602
603         if (nvme_ctrlr_construct_namespaces(ctrlr) != 0)
604                 goto err;
605
606         nvme_ctrlr_configure_aer(ctrlr);
607         nvme_ctrlr_configure_int_coalescing(ctrlr);
608
609         ctrlr->is_started = TRUE;
610
611 err:
612
613         /*
614          * Initialize sysctls, even if controller failed to start, to
615          *  assist with debugging admin queue pair.
616          */
617         nvme_sysctl_initialize_ctrlr(ctrlr);
618         config_intrhook_disestablish(&ctrlr->config_hook);
619 }
620
621 static void
622 nvme_ctrlr_intx_task(void *arg, int pending)
623 {
624         struct nvme_controller *ctrlr = arg;
625
626         nvme_qpair_process_completions(&ctrlr->adminq);
627
628         if (ctrlr->ioq[0].cpl)
629                 nvme_qpair_process_completions(&ctrlr->ioq[0]);
630
631         nvme_mmio_write_4(ctrlr, intmc, 1);
632 }
633
634 static void
635 nvme_ctrlr_intx_handler(void *arg)
636 {
637         struct nvme_controller *ctrlr = arg;
638
639         nvme_mmio_write_4(ctrlr, intms, 1);
640         taskqueue_enqueue_fast(ctrlr->taskqueue, &ctrlr->task);
641 }
642
643 static int
644 nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
645 {
646
647         ctrlr->num_io_queues = 1;
648         ctrlr->per_cpu_io_queues = 0;
649         ctrlr->rid = 0;
650         ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
651             &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
652
653         if (ctrlr->res == NULL) {
654                 device_printf(ctrlr->dev, "unable to allocate shared IRQ\n");
655                 return (ENOMEM);
656         }
657
658         bus_setup_intr(ctrlr->dev, ctrlr->res,
659             INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
660             ctrlr, &ctrlr->tag);
661
662         if (ctrlr->tag == NULL) {
663                 device_printf(ctrlr->dev,
664                     "unable to setup legacy interrupt handler\n");
665                 return (ENOMEM);
666         }
667
668         TASK_INIT(&ctrlr->task, 0, nvme_ctrlr_intx_task, ctrlr);
669         ctrlr->taskqueue = taskqueue_create_fast("nvme_taskq", M_NOWAIT,
670             taskqueue_thread_enqueue, &ctrlr->taskqueue);
671         taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_NET,
672             "%s intx taskq", device_get_nameunit(ctrlr->dev));
673
674         return (0);
675 }
676
677 static int
678 nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
679     struct thread *td)
680 {
681         struct nvme_controller  *ctrlr;
682         struct nvme_completion  cpl;
683         struct mtx              *mtx;
684
685         ctrlr = cdev->si_drv1;
686
687         switch (cmd) {
688         case NVME_IDENTIFY_CONTROLLER:
689 #ifdef CHATHAM2
690                 /*
691                  * Don't refresh data on Chatham, since Chatham returns
692                  *  garbage on IDENTIFY anyways.
693                  */
694                 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) {
695                         memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata));
696                         break;
697                 }
698 #endif
699                 /* Refresh data before returning to user. */
700                 mtx = mtx_pool_find(mtxpool_sleep, &cpl);
701                 mtx_lock(mtx);
702                 nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
703                     nvme_ctrlr_cb, &cpl);
704                 msleep(&cpl, mtx, PRIBIO, "nvme_ioctl", 0);
705                 mtx_unlock(mtx);
706                 if (cpl.sf_sc || cpl.sf_sct)
707                         return (ENXIO);
708                 memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata));
709                 break;
710         default:
711                 return (ENOTTY);
712         }
713
714         return (0);
715 }
716
717 static struct cdevsw nvme_ctrlr_cdevsw = {
718         .d_version =    D_VERSION,
719         .d_flags =      0,
720         .d_ioctl =      nvme_ctrlr_ioctl
721 };
722
723 int
724 nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
725 {
726         union cap_lo_register   cap_lo;
727         union cap_hi_register   cap_hi;
728         int                     num_vectors, per_cpu_io_queues, status = 0;
729
730         ctrlr->dev = dev;
731         ctrlr->is_started = FALSE;
732
733         status = nvme_ctrlr_allocate_bar(ctrlr);
734
735         if (status != 0)
736                 return (status);
737
738 #ifdef CHATHAM2
739         if (pci_get_devid(dev) == CHATHAM_PCI_ID) {
740                 status = nvme_ctrlr_allocate_chatham_bar(ctrlr);
741                 if (status != 0)
742                         return (status);
743                 nvme_ctrlr_setup_chatham(ctrlr);
744         }
745 #endif
746
747         /*
748          * Software emulators may set the doorbell stride to something
749          *  other than zero, but this driver is not set up to handle that.
750          */
751         cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi);
752         if (cap_hi.bits.dstrd != 0)
753                 return (ENXIO);
754
755         /* Get ready timeout value from controller, in units of 500ms. */
756         cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
757         ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500;
758
759         per_cpu_io_queues = 1;
760         TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
761         ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE;
762
763         if (ctrlr->per_cpu_io_queues)
764                 ctrlr->num_io_queues = mp_ncpus;
765         else
766                 ctrlr->num_io_queues = 1;
767
768         ctrlr->force_intx = 0;
769         TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
770
771         ctrlr->msix_enabled = 1;
772
773         if (ctrlr->force_intx) {
774                 ctrlr->msix_enabled = 0;
775                 goto intx;
776         }
777
778         /* One vector per IO queue, plus one vector for admin queue. */
779         num_vectors = ctrlr->num_io_queues + 1;
780
781         if (pci_msix_count(dev) < num_vectors) {
782                 ctrlr->msix_enabled = 0;
783                 goto intx;
784         }
785
786         if (pci_alloc_msix(dev, &num_vectors) != 0)
787                 ctrlr->msix_enabled = 0;
788
789 intx:
790
791         if (!ctrlr->msix_enabled)
792                 nvme_ctrlr_configure_intx(ctrlr);
793
794         nvme_ctrlr_construct_admin_qpair(ctrlr);
795
796         status = nvme_ctrlr_construct_io_qpairs(ctrlr);
797
798         if (status != 0)
799                 return (status);
800
801         ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
802             "nvme%d", device_get_unit(dev));
803
804         if (ctrlr->cdev == NULL)
805                 return (ENXIO);
806
807         ctrlr->cdev->si_drv1 = (void *)ctrlr;
808
809         return (0);
810 }
811
812 void
813 nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
814     struct nvme_request *req)
815 {
816
817         nvme_qpair_submit_request(&ctrlr->adminq, req);
818 }
819
820 void
821 nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
822     struct nvme_request *req)
823 {
824         struct nvme_qpair       *qpair;
825
826         if (ctrlr->per_cpu_io_queues)
827                 qpair = &ctrlr->ioq[curcpu];
828         else
829                 qpair = &ctrlr->ioq[0];
830
831         nvme_qpair_submit_request(qpair, req);
832 }