/*- * Copyright (C) 2012 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include "nvme_private.h" static boolean_t nvme_completion_check_retry(const struct nvme_completion *cpl) { /* * TODO: spec is not clear how commands that are aborted due * to TLER will be marked. So for now, it seems * NAMESPACE_NOT_READY is the only case where we should * look at the DNR bit. */ switch (cpl->sf_sct) { case NVME_SCT_GENERIC: switch (cpl->sf_sc) { case NVME_SC_NAMESPACE_NOT_READY: if (cpl->sf_dnr) return (0); else return (1); case NVME_SC_INVALID_OPCODE: case NVME_SC_INVALID_FIELD: case NVME_SC_COMMAND_ID_CONFLICT: case NVME_SC_DATA_TRANSFER_ERROR: case NVME_SC_ABORTED_POWER_LOSS: case NVME_SC_INTERNAL_DEVICE_ERROR: case NVME_SC_ABORTED_BY_REQUEST: case NVME_SC_ABORTED_SQ_DELETION: case NVME_SC_ABORTED_FAILED_FUSED: case NVME_SC_ABORTED_MISSING_FUSED: case NVME_SC_INVALID_NAMESPACE_OR_FORMAT: case NVME_SC_COMMAND_SEQUENCE_ERROR: case NVME_SC_LBA_OUT_OF_RANGE: case NVME_SC_CAPACITY_EXCEEDED: default: return (0); } case NVME_SCT_COMMAND_SPECIFIC: case NVME_SCT_MEDIA_ERROR: case NVME_SCT_VENDOR_SPECIFIC: default: return (0); } } struct nvme_tracker * nvme_qpair_allocate_tracker(struct nvme_qpair *qpair, boolean_t alloc_prp_list) { struct nvme_tracker *tr; struct nvme_prp_list *prp_list; mtx_lock(&qpair->lock); tr = SLIST_FIRST(&qpair->free_tr); if (tr == NULL) { /* TODO: fail if malloc returns NULL */ tr = malloc(sizeof(struct nvme_tracker), M_NVME, M_ZERO | M_NOWAIT); bus_dmamap_create(qpair->dma_tag, 0, &tr->dma_map); callout_init_mtx(&tr->timer, &qpair->lock, 0); tr->cid = qpair->num_tr++; } else SLIST_REMOVE_HEAD(&qpair->free_tr, slist); if (alloc_prp_list) { prp_list = SLIST_FIRST(&qpair->free_prp_list); if (prp_list == NULL) { prp_list = malloc(sizeof(struct nvme_prp_list), M_NVME, M_ZERO | M_NOWAIT); bus_dmamap_create(qpair->dma_tag, 0, &prp_list->dma_map); bus_dmamap_load(qpair->dma_tag, prp_list->dma_map, prp_list->prp, sizeof(struct nvme_prp_list), nvme_single_map, &prp_list->bus_addr, 0); qpair->num_prp_list++; } else { SLIST_REMOVE_HEAD(&qpair->free_prp_list, slist); } tr->prp_list = prp_list; } return (tr); } void nvme_qpair_process_completions(struct nvme_qpair *qpair) { struct nvme_tracker *tr; struct nvme_completion *cpl; boolean_t retry, error; while (1) { cpl = &qpair->cpl[qpair->cq_head]; if (cpl->p != qpair->phase) break; tr = qpair->act_tr[cpl->cid]; KASSERT(tr, ("completion queue has entries but no active trackers\n")); error = cpl->sf_sc || cpl->sf_sct; retry = error && nvme_completion_check_retry(cpl); if (error) { nvme_dump_completion(cpl); nvme_dump_command(&tr->cmd); } qpair->act_tr[cpl->cid] = NULL; KASSERT(cpl->cid == tr->cmd.cid, ("cpl cid does not match cmd cid\n")); if (tr->cb_fn && !retry) tr->cb_fn(tr->cb_arg, cpl); qpair->sq_head = cpl->sqhd; mtx_lock(&qpair->lock); callout_stop(&tr->timer); if (retry) /* nvme_qpair_submit_cmd() will release the lock. */ nvme_qpair_submit_cmd(qpair, tr); else { if (tr->prp_list) { SLIST_INSERT_HEAD(&qpair->free_prp_list, tr->prp_list, slist); tr->prp_list = NULL; } if (tr->payload_size > 0) bus_dmamap_unload(qpair->dma_tag, tr->dma_map); SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist); mtx_unlock(&qpair->lock); } if (++qpair->cq_head == qpair->num_entries) { qpair->cq_head = 0; qpair->phase = !qpair->phase; } nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl, qpair->cq_head); } } static void nvme_qpair_msix_handler(void *arg) { struct nvme_qpair *qpair = arg; nvme_qpair_process_completions(qpair); } void nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, uint16_t vector, uint32_t num_entries, uint32_t max_xfer_size, struct nvme_controller *ctrlr) { qpair->id = id; qpair->vector = vector; qpair->num_entries = num_entries; qpair->max_xfer_size = max_xfer_size; qpair->ctrlr = ctrlr; /* * First time through the completion queue, HW will set phase * bit on completions to 1. So set this to 1 here, indicating * we're looking for a 1 to know which entries have completed. * we'll toggle the bit each time when the completion queue * rolls over. */ qpair->phase = 1; if (ctrlr->msix_enabled) { /* * MSI-X vector resource IDs start at 1, so we add one to * the queue's vector to get the corresponding rid to use. */ qpair->rid = vector + 1; qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, &qpair->rid, RF_ACTIVE); bus_setup_intr(ctrlr->dev, qpair->res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_qpair_msix_handler, qpair, &qpair->tag); } mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF); bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size, (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0, NULL, NULL, &qpair->dma_tag); qpair->num_cmds = 0; qpair->num_tr = 0; qpair->num_prp_list = 0; qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; /* TODO: error checking on contigmalloc, bus_dmamap_load calls */ qpair->cmd = contigmalloc(qpair->num_entries * sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT, 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); qpair->cpl = contigmalloc(qpair->num_entries * sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT, 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map); bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map); bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map, qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), nvme_single_map, &qpair->cmd_bus_addr, 0); bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map, qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), nvme_single_map, &qpair->cpl_bus_addr, 0); qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl); qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl); SLIST_INIT(&qpair->free_tr); SLIST_INIT(&qpair->free_prp_list); qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries, M_NVME, M_ZERO | M_NOWAIT); } static void nvme_qpair_destroy(struct nvme_qpair *qpair) { struct nvme_tracker *tr; struct nvme_prp_list *prp_list; if (qpair->tag) bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag); if (qpair->res) bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ, rman_get_rid(qpair->res), qpair->res); if (qpair->dma_tag) bus_dma_tag_destroy(qpair->dma_tag); if (qpair->act_tr) free(qpair->act_tr, M_NVME); while (!SLIST_EMPTY(&qpair->free_tr)) { tr = SLIST_FIRST(&qpair->free_tr); SLIST_REMOVE_HEAD(&qpair->free_tr, slist); bus_dmamap_destroy(qpair->dma_tag, tr->dma_map); free(tr, M_NVME); } while (!SLIST_EMPTY(&qpair->free_prp_list)) { prp_list = SLIST_FIRST(&qpair->free_prp_list); SLIST_REMOVE_HEAD(&qpair->free_prp_list, slist); bus_dmamap_destroy(qpair->dma_tag, prp_list->dma_map); free(prp_list, M_NVME); } } void nvme_admin_qpair_destroy(struct nvme_qpair *qpair) { /* * For NVMe, you don't send delete queue commands for the admin * queue, so we just need to unload and free the cmd and cpl memory. */ bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); contigfree(qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), M_NVME); bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); contigfree(qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), M_NVME); nvme_qpair_destroy(qpair); } static void nvme_free_cmd_ring(void *arg, const struct nvme_completion *status) { struct nvme_qpair *qpair; qpair = (struct nvme_qpair *)arg; bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); contigfree(qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), M_NVME); qpair->cmd = NULL; } static void nvme_free_cpl_ring(void *arg, const struct nvme_completion *status) { struct nvme_qpair *qpair; qpair = (struct nvme_qpair *)arg; bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); contigfree(qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), M_NVME); qpair->cpl = NULL; } void nvme_io_qpair_destroy(struct nvme_qpair *qpair) { struct nvme_controller *ctrlr = qpair->ctrlr; if (qpair->num_entries > 0) { nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring, qpair); /* Spin until free_cmd_ring sets qpair->cmd to NULL. */ while (qpair->cmd) DELAY(5); nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring, qpair); /* Spin until free_cpl_ring sets qpair->cmd to NULL. */ while (qpair->cpl) DELAY(5); nvme_qpair_destroy(qpair); } } static void nvme_timeout(void *arg) { /* * TODO: Add explicit abort operation here, once nvme(4) supports * abort commands. */ } void nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr) { tr->cmd.cid = tr->cid; qpair->act_tr[tr->cid] = tr; /* * TODO: rather than spin until entries free up, put this tracker * on a queue, and submit from the interrupt handler when * entries free up. */ if ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head) { do { mtx_unlock(&qpair->lock); DELAY(5); mtx_lock(&qpair->lock); } while ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head); } callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr); /* Copy the command from the tracker to the submission queue. */ memcpy(&qpair->cmd[qpair->sq_tail], &tr->cmd, sizeof(tr->cmd)); if (++qpair->sq_tail == qpair->num_entries) qpair->sq_tail = 0; wmb(); nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl, qpair->sq_tail); qpair->num_cmds++; mtx_unlock(&qpair->lock); }