]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/nvme/nvme_qpair.c
MFC
[FreeBSD/FreeBSD.git] / sys / dev / nvme / nvme_qpair.c
1 /*-
2  * Copyright (C) 2012 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/bus.h>
32
33 #include <dev/pci/pcivar.h>
34
35 #include "nvme_private.h"
36
37 static boolean_t
38 nvme_completion_check_retry(const struct nvme_completion *cpl)
39 {
40         /*
41          * TODO: spec is not clear how commands that are aborted due
42          *  to TLER will be marked.  So for now, it seems
43          *  NAMESPACE_NOT_READY is the only case where we should
44          *  look at the DNR bit.
45          */
46         switch (cpl->sf_sct) {
47         case NVME_SCT_GENERIC:
48                 switch (cpl->sf_sc) {
49                 case NVME_SC_NAMESPACE_NOT_READY:
50                         if (cpl->sf_dnr)
51                                 return (0);
52                         else
53                                 return (1);
54                 case NVME_SC_INVALID_OPCODE:
55                 case NVME_SC_INVALID_FIELD:
56                 case NVME_SC_COMMAND_ID_CONFLICT:
57                 case NVME_SC_DATA_TRANSFER_ERROR:
58                 case NVME_SC_ABORTED_POWER_LOSS:
59                 case NVME_SC_INTERNAL_DEVICE_ERROR:
60                 case NVME_SC_ABORTED_BY_REQUEST:
61                 case NVME_SC_ABORTED_SQ_DELETION:
62                 case NVME_SC_ABORTED_FAILED_FUSED:
63                 case NVME_SC_ABORTED_MISSING_FUSED:
64                 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
65                 case NVME_SC_COMMAND_SEQUENCE_ERROR:
66                 case NVME_SC_LBA_OUT_OF_RANGE:
67                 case NVME_SC_CAPACITY_EXCEEDED:
68                 default:
69                         return (0);
70                 }
71         case NVME_SCT_COMMAND_SPECIFIC:
72         case NVME_SCT_MEDIA_ERROR:
73         case NVME_SCT_VENDOR_SPECIFIC:
74         default:
75                 return (0);
76         }
77 }
78
79 static void
80 nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
81     uint16_t cid)
82 {
83
84         bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map);
85         bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map);
86
87         bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp,
88             sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0);
89
90         callout_init_mtx(&tr->timer, &qpair->lock, 0);
91         tr->cid = cid;
92         tr->qpair = qpair;
93 }
94
95 void
96 nvme_qpair_process_completions(struct nvme_qpair *qpair)
97 {
98         struct nvme_tracker     *tr;
99         struct nvme_request     *req;
100         struct nvme_completion  *cpl;
101         boolean_t               retry, error;
102
103         qpair->num_intr_handler_calls++;
104
105         while (1) {
106                 cpl = &qpair->cpl[qpair->cq_head];
107
108                 if (cpl->p != qpair->phase)
109                         break;
110
111                 tr = qpair->act_tr[cpl->cid];
112                 req = tr->req;
113
114                 KASSERT(tr,
115                     ("completion queue has entries but no active trackers\n"));
116
117                 error = cpl->sf_sc || cpl->sf_sct;
118                 retry = error && nvme_completion_check_retry(cpl);
119
120                 if (error) {
121                         nvme_dump_completion(cpl);
122                         nvme_dump_command(&tr->req->cmd);
123                 }
124
125                 qpair->act_tr[cpl->cid] = NULL;
126
127                 KASSERT(cpl->cid == req->cmd.cid,
128                     ("cpl cid does not match cmd cid\n"));
129
130                 if (req->cb_fn && !retry)
131                         req->cb_fn(req->cb_arg, cpl);
132
133                 qpair->sq_head = cpl->sqhd;
134
135                 mtx_lock(&qpair->lock);
136                 callout_stop(&tr->timer);
137
138                 if (retry)
139                         nvme_qpair_submit_cmd(qpair, tr);
140                 else {
141                         if (req->payload_size > 0 || req->uio != NULL)
142                                 bus_dmamap_unload(qpair->dma_tag,
143                                     tr->payload_dma_map);
144
145                         nvme_free_request(req);
146
147                         SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist);
148
149                         if (!STAILQ_EMPTY(&qpair->queued_req)) {
150                                 req = STAILQ_FIRST(&qpair->queued_req);
151                                 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
152                                 nvme_qpair_submit_request(qpair, req);
153                         }
154                 }
155
156                 mtx_unlock(&qpair->lock);
157
158                 if (++qpair->cq_head == qpair->num_entries) {
159                         qpair->cq_head = 0;
160                         qpair->phase = !qpair->phase;
161                 }
162
163                 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
164                     qpair->cq_head);
165         }
166 }
167
168 static void
169 nvme_qpair_msix_handler(void *arg)
170 {
171         struct nvme_qpair *qpair = arg;
172
173         nvme_qpair_process_completions(qpair);
174 }
175
176 void
177 nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
178     uint16_t vector, uint32_t num_entries, uint32_t num_trackers,
179     uint32_t max_xfer_size, struct nvme_controller *ctrlr)
180 {
181         struct nvme_tracker     *tr;
182         uint32_t                i;
183
184         qpair->id = id;
185         qpair->vector = vector;
186         qpair->num_entries = num_entries;
187 #ifdef CHATHAM2
188         /*
189          * Chatham prototype board starts having issues at higher queue
190          *  depths.  So use a conservative estimate here of no more than 64
191          *  outstanding I/O per queue at any one point.
192          */
193         if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
194                 num_trackers = min(num_trackers, 64);
195 #endif
196         qpair->num_trackers = num_trackers;
197         qpair->max_xfer_size = max_xfer_size;
198         qpair->ctrlr = ctrlr;
199
200         /*
201          * First time through the completion queue, HW will set phase
202          *  bit on completions to 1.  So set this to 1 here, indicating
203          *  we're looking for a 1 to know which entries have completed.
204          *  we'll toggle the bit each time when the completion queue
205          *  rolls over.
206          */
207         qpair->phase = 1;
208
209         if (ctrlr->msix_enabled) {
210
211                 /*
212                  * MSI-X vector resource IDs start at 1, so we add one to
213                  *  the queue's vector to get the corresponding rid to use.
214                  */
215                 qpair->rid = vector + 1;
216
217                 qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
218                     &qpair->rid, RF_ACTIVE);
219
220                 bus_setup_intr(ctrlr->dev, qpair->res,
221                     INTR_TYPE_MISC | INTR_MPSAFE, NULL,
222                     nvme_qpair_msix_handler, qpair, &qpair->tag);
223         }
224
225         mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
226
227         bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
228             sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR,
229             BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size,
230             (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0,
231             NULL, NULL, &qpair->dma_tag);
232
233         qpair->num_cmds = 0;
234         qpair->num_intr_handler_calls = 0;
235         qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
236
237         /* TODO: error checking on contigmalloc, bus_dmamap_load calls */
238         qpair->cmd = contigmalloc(qpair->num_entries *
239             sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT,
240             0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
241         qpair->cpl = contigmalloc(qpair->num_entries *
242             sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT,
243             0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
244
245         bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map);
246         bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map);
247
248         bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map,
249             qpair->cmd, qpair->num_entries * sizeof(struct nvme_command),
250             nvme_single_map, &qpair->cmd_bus_addr, 0);
251         bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map,
252             qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion),
253             nvme_single_map, &qpair->cpl_bus_addr, 0);
254
255         qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl);
256         qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);
257
258         SLIST_INIT(&qpair->free_tr);
259         STAILQ_INIT(&qpair->queued_req);
260
261         for (i = 0; i < qpair->num_trackers; i++) {
262                 tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_NOWAIT);
263
264                 if (tr == NULL) {
265                         printf("warning: nvme tracker malloc failed\n");
266                         break;
267                 }
268
269                 nvme_qpair_construct_tracker(qpair, tr, i);
270                 SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist);
271         }
272
273         qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries,
274             M_NVME, M_ZERO | M_NOWAIT);
275 }
276
277 static void
278 nvme_qpair_destroy(struct nvme_qpair *qpair)
279 {
280         struct nvme_tracker *tr;
281
282         if (qpair->tag)
283                 bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
284
285         if (qpair->res)
286                 bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ,
287                     rman_get_rid(qpair->res), qpair->res);
288
289         if (qpair->dma_tag)
290                 bus_dma_tag_destroy(qpair->dma_tag);
291
292         if (qpair->act_tr)
293                 free(qpair->act_tr, M_NVME);
294
295         while (!SLIST_EMPTY(&qpair->free_tr)) {
296                 tr = SLIST_FIRST(&qpair->free_tr);
297                 SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
298                 bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map);
299                 bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map);
300                 free(tr, M_NVME);
301         }
302 }
303
304 void
305 nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
306 {
307
308         /*
309          * For NVMe, you don't send delete queue commands for the admin
310          *  queue, so we just need to unload and free the cmd and cpl memory.
311          */
312         bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
313         bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
314
315         contigfree(qpair->cmd,
316             qpair->num_entries * sizeof(struct nvme_command), M_NVME);
317
318         bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
319         bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
320         contigfree(qpair->cpl,
321             qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
322
323         nvme_qpair_destroy(qpair);
324 }
325
326 static void
327 nvme_free_cmd_ring(void *arg, const struct nvme_completion *status)
328 {
329         struct nvme_qpair *qpair;
330
331         qpair = (struct nvme_qpair *)arg;
332         bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
333         bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
334         contigfree(qpair->cmd,
335             qpair->num_entries * sizeof(struct nvme_command), M_NVME);
336         qpair->cmd = NULL;
337 }
338
339 static void
340 nvme_free_cpl_ring(void *arg, const struct nvme_completion *status)
341 {
342         struct nvme_qpair *qpair;
343
344         qpair = (struct nvme_qpair *)arg;
345         bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
346         bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
347         contigfree(qpair->cpl,
348             qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
349         qpair->cpl = NULL;
350 }
351
352 void
353 nvme_io_qpair_destroy(struct nvme_qpair *qpair)
354 {
355         struct nvme_controller *ctrlr = qpair->ctrlr;
356
357         if (qpair->num_entries > 0) {
358
359                 nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring,
360                     qpair);
361                 /* Spin until free_cmd_ring sets qpair->cmd to NULL. */
362                 while (qpair->cmd)
363                         DELAY(5);
364
365                 nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring,
366                     qpair);
367                 /* Spin until free_cpl_ring sets qpair->cmd to NULL. */
368                 while (qpair->cpl)
369                         DELAY(5);
370
371                 nvme_qpair_destroy(qpair);
372         }
373 }
374
375 static void
376 nvme_timeout(void *arg)
377 {
378         /*
379          * TODO: Add explicit abort operation here, once nvme(4) supports
380          *  abort commands.
381          */
382 }
383
384 void
385 nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr)
386 {
387         struct nvme_request *req;
388
389         req = tr->req;
390         req->cmd.cid = tr->cid;
391         qpair->act_tr[tr->cid] = tr;
392
393 #if __FreeBSD_version >= 800030
394         callout_reset_curcpu(&tr->timer, NVME_TIMEOUT_IN_SEC * hz,
395             nvme_timeout, tr);
396 #else
397         callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr);
398 #endif
399
400         /* Copy the command from the tracker to the submission queue. */
401         memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
402
403         if (++qpair->sq_tail == qpair->num_entries)
404                 qpair->sq_tail = 0;
405
406         wmb();
407         nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl,
408             qpair->sq_tail);
409
410         qpair->num_cmds++;
411 }
412
413 void
414 nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
415 {
416         struct nvme_tracker     *tr;
417         int                     err;
418
419         mtx_lock(&qpair->lock);
420
421         tr = SLIST_FIRST(&qpair->free_tr);
422
423         if (tr == NULL) {
424                 /*
425                  * No tracker is available.  Put the request on the qpair's
426                  *  request queue to be processed when a tracker frees up
427                  *  via a command completion.
428                  */
429                 STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
430                 goto ret;
431         }
432
433         SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
434         tr->req = req;
435
436         if (req->uio == NULL) {
437                 if (req->payload_size > 0) {
438                         err = bus_dmamap_load(tr->qpair->dma_tag,
439                                               tr->payload_dma_map, req->payload,
440                                               req->payload_size,
441                                               nvme_payload_map, tr, 0);
442                         if (err != 0)
443                                 panic("bus_dmamap_load returned non-zero!\n");
444                 } else
445                         nvme_qpair_submit_cmd(tr->qpair, tr);
446         } else {
447                 err = bus_dmamap_load_uio(tr->qpair->dma_tag,
448                                           tr->payload_dma_map, req->uio,
449                                           nvme_payload_map_uio, tr, 0);
450                 if (err != 0)
451                         panic("bus_dmamap_load returned non-zero!\n");
452         }
453
454 ret:
455         mtx_unlock(&qpair->lock);
456 }