2 * Copyright (C) 2012 Intel Corporation
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/param.h>
33 #include "nvme_private.h"
36 nvme_completion_check_retry(const struct nvme_completion *cpl)
39 * TODO: spec is not clear how commands that are aborted due
40 * to TLER will be marked. So for now, it seems
41 * NAMESPACE_NOT_READY is the only case where we should
42 * look at the DNR bit.
44 switch (cpl->sf_sct) {
45 case NVME_SCT_GENERIC:
47 case NVME_SC_NAMESPACE_NOT_READY:
52 case NVME_SC_INVALID_OPCODE:
53 case NVME_SC_INVALID_FIELD:
54 case NVME_SC_COMMAND_ID_CONFLICT:
55 case NVME_SC_DATA_TRANSFER_ERROR:
56 case NVME_SC_ABORTED_POWER_LOSS:
57 case NVME_SC_INTERNAL_DEVICE_ERROR:
58 case NVME_SC_ABORTED_BY_REQUEST:
59 case NVME_SC_ABORTED_SQ_DELETION:
60 case NVME_SC_ABORTED_FAILED_FUSED:
61 case NVME_SC_ABORTED_MISSING_FUSED:
62 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
63 case NVME_SC_COMMAND_SEQUENCE_ERROR:
64 case NVME_SC_LBA_OUT_OF_RANGE:
65 case NVME_SC_CAPACITY_EXCEEDED:
69 case NVME_SCT_COMMAND_SPECIFIC:
70 case NVME_SCT_MEDIA_ERROR:
71 case NVME_SCT_VENDOR_SPECIFIC:
78 nvme_qpair_allocate_tracker(struct nvme_qpair *qpair, boolean_t alloc_prp_list)
80 struct nvme_tracker *tr;
81 struct nvme_prp_list *prp_list;
83 mtx_lock(&qpair->lock);
85 tr = SLIST_FIRST(&qpair->free_tr);
87 /* TODO: fail if malloc returns NULL */
88 tr = malloc(sizeof(struct nvme_tracker), M_NVME,
91 bus_dmamap_create(qpair->dma_tag, 0, &tr->dma_map);
92 callout_init_mtx(&tr->timer, &qpair->lock, 0);
93 tr->cid = qpair->num_tr++;
95 SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
98 prp_list = SLIST_FIRST(&qpair->free_prp_list);
100 if (prp_list == NULL) {
101 prp_list = malloc(sizeof(struct nvme_prp_list),
102 M_NVME, M_ZERO | M_NOWAIT);
104 bus_dmamap_create(qpair->dma_tag, 0, &prp_list->dma_map);
106 bus_dmamap_load(qpair->dma_tag, prp_list->dma_map,
107 prp_list->prp, sizeof(struct nvme_prp_list),
108 nvme_single_map, &prp_list->bus_addr, 0);
110 qpair->num_prp_list++;
112 SLIST_REMOVE_HEAD(&qpair->free_prp_list, slist);
115 tr->prp_list = prp_list;
122 nvme_qpair_process_completions(struct nvme_qpair *qpair)
124 struct nvme_tracker *tr;
125 struct nvme_completion *cpl;
126 boolean_t retry, error;
129 cpl = &qpair->cpl[qpair->cq_head];
131 if (cpl->p != qpair->phase)
134 tr = qpair->act_tr[cpl->cid];
136 ("completion queue has entries but no active trackers\n"));
138 error = cpl->sf_sc || cpl->sf_sct;
139 retry = error && nvme_completion_check_retry(cpl);
142 nvme_dump_completion(cpl);
143 nvme_dump_command(&tr->cmd);
146 qpair->act_tr[cpl->cid] = NULL;
148 KASSERT(cpl->cid == tr->cmd.cid,
149 ("cpl cid does not match cmd cid\n"));
151 if (tr->cb_fn && !retry)
152 tr->cb_fn(tr->cb_arg, cpl);
154 qpair->sq_head = cpl->sqhd;
156 mtx_lock(&qpair->lock);
157 callout_stop(&tr->timer);
160 /* nvme_qpair_submit_cmd() will release the lock. */
161 nvme_qpair_submit_cmd(qpair, tr);
164 SLIST_INSERT_HEAD(&qpair->free_prp_list,
165 tr->prp_list, slist);
169 if (tr->payload_size > 0)
170 bus_dmamap_unload(qpair->dma_tag, tr->dma_map);
172 SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist);
174 mtx_unlock(&qpair->lock);
177 if (++qpair->cq_head == qpair->num_entries) {
179 qpair->phase = !qpair->phase;
182 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
188 nvme_qpair_msix_handler(void *arg)
190 struct nvme_qpair *qpair = arg;
192 nvme_qpair_process_completions(qpair);
196 nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
197 uint16_t vector, uint32_t num_entries, uint32_t max_xfer_size,
198 struct nvme_controller *ctrlr)
202 qpair->vector = vector;
203 qpair->num_entries = num_entries;
204 qpair->max_xfer_size = max_xfer_size;
205 qpair->ctrlr = ctrlr;
208 * First time through the completion queue, HW will set phase
209 * bit on completions to 1. So set this to 1 here, indicating
210 * we're looking for a 1 to know which entries have completed.
211 * we'll toggle the bit each time when the completion queue
216 if (ctrlr->msix_enabled) {
219 * MSI-X vector resource IDs start at 1, so we add one to
220 * the queue's vector to get the corresponding rid to use.
222 qpair->rid = vector + 1;
224 qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
225 &qpair->rid, RF_ACTIVE);
227 bus_setup_intr(ctrlr->dev, qpair->res,
228 INTR_TYPE_MISC | INTR_MPSAFE, NULL,
229 nvme_qpair_msix_handler, qpair, &qpair->tag);
232 mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
234 bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
235 sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR,
236 BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size,
237 (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0,
238 NULL, NULL, &qpair->dma_tag);
242 qpair->num_prp_list = 0;
243 qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
245 /* TODO: error checking on contigmalloc, bus_dmamap_load calls */
246 qpair->cmd = contigmalloc(qpair->num_entries *
247 sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT,
248 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
249 qpair->cpl = contigmalloc(qpair->num_entries *
250 sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT,
251 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
253 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map);
254 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map);
256 bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map,
257 qpair->cmd, qpair->num_entries * sizeof(struct nvme_command),
258 nvme_single_map, &qpair->cmd_bus_addr, 0);
259 bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map,
260 qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion),
261 nvme_single_map, &qpair->cpl_bus_addr, 0);
263 qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl);
264 qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);
266 SLIST_INIT(&qpair->free_tr);
267 SLIST_INIT(&qpair->free_prp_list);
269 qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries,
270 M_NVME, M_ZERO | M_NOWAIT);
274 nvme_qpair_destroy(struct nvme_qpair *qpair)
276 struct nvme_tracker *tr;
277 struct nvme_prp_list *prp_list;
280 bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
283 bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ,
284 rman_get_rid(qpair->res), qpair->res);
287 bus_dma_tag_destroy(qpair->dma_tag);
290 free(qpair->act_tr, M_NVME);
292 while (!SLIST_EMPTY(&qpair->free_tr)) {
293 tr = SLIST_FIRST(&qpair->free_tr);
294 SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
295 bus_dmamap_destroy(qpair->dma_tag, tr->dma_map);
299 while (!SLIST_EMPTY(&qpair->free_prp_list)) {
300 prp_list = SLIST_FIRST(&qpair->free_prp_list);
301 SLIST_REMOVE_HEAD(&qpair->free_prp_list, slist);
302 bus_dmamap_destroy(qpair->dma_tag, prp_list->dma_map);
303 free(prp_list, M_NVME);
308 nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
312 * For NVMe, you don't send delete queue commands for the admin
313 * queue, so we just need to unload and free the cmd and cpl memory.
315 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
316 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
318 contigfree(qpair->cmd,
319 qpair->num_entries * sizeof(struct nvme_command), M_NVME);
321 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
322 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
323 contigfree(qpair->cpl,
324 qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
326 nvme_qpair_destroy(qpair);
330 nvme_free_cmd_ring(void *arg, const struct nvme_completion *status)
332 struct nvme_qpair *qpair;
334 qpair = (struct nvme_qpair *)arg;
335 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
336 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
337 contigfree(qpair->cmd,
338 qpair->num_entries * sizeof(struct nvme_command), M_NVME);
343 nvme_free_cpl_ring(void *arg, const struct nvme_completion *status)
345 struct nvme_qpair *qpair;
347 qpair = (struct nvme_qpair *)arg;
348 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
349 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
350 contigfree(qpair->cpl,
351 qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
356 nvme_io_qpair_destroy(struct nvme_qpair *qpair)
358 struct nvme_controller *ctrlr = qpair->ctrlr;
360 if (qpair->num_entries > 0) {
362 nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring,
364 /* Spin until free_cmd_ring sets qpair->cmd to NULL. */
368 nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring,
370 /* Spin until free_cpl_ring sets qpair->cmd to NULL. */
374 nvme_qpair_destroy(qpair);
379 nvme_timeout(void *arg)
382 * TODO: Add explicit abort operation here, once nvme(4) supports
388 nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr)
391 tr->cmd.cid = tr->cid;
392 qpair->act_tr[tr->cid] = tr;
395 * TODO: rather than spin until entries free up, put this tracker
396 * on a queue, and submit from the interrupt handler when
399 if ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head) {
401 mtx_unlock(&qpair->lock);
403 mtx_lock(&qpair->lock);
404 } while ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head);
407 callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr);
409 /* Copy the command from the tracker to the submission queue. */
410 memcpy(&qpair->cmd[qpair->sq_tail], &tr->cmd, sizeof(tr->cmd));
412 if (++qpair->sq_tail == qpair->num_entries)
416 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl,
421 mtx_unlock(&qpair->lock);