From 21b6da584b35ff0ec85cb31c5e27affa573506dc Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Thu, 18 Oct 2012 00:44:39 +0000 Subject: [PATCH] Preallocate a limited number of nvme_tracker objects per qpair, rather than dynamically creating them at runtime. Sponsored by: Intel --- sys/dev/nvme/nvme_ctrlr.c | 25 +++++++++-- sys/dev/nvme/nvme_private.h | 23 +++++++--- sys/dev/nvme/nvme_qpair.c | 89 +++++++++++++++++-------------------- sys/dev/nvme/nvme_sysctl.c | 6 +-- 4 files changed, 80 insertions(+), 63 deletions(-) diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index adc34e9be75..94987e272b7 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -217,7 +217,13 @@ nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr) * The admin queue's max xfer size is treated differently than the * max I/O xfer size. 16KB is sufficient here - maybe even less? */ - nvme_qpair_construct(qpair, 0, 0, num_entries, 16*1024, ctrlr); + nvme_qpair_construct(qpair, + 0, /* qpair ID */ + 0, /* vector */ + num_entries, + NVME_ADMIN_TRACKERS, + 16*1024, /* max xfer size */ + ctrlr); } static int @@ -225,13 +231,11 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) { struct nvme_qpair *qpair; union cap_lo_register cap_lo; - int i, num_entries; + int i, num_entries, num_trackers; num_entries = NVME_IO_ENTRIES; TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries); - num_entries = max(num_entries, NVME_MIN_IO_ENTRIES); - /* * NVMe spec sets a hard limit of 64K max entries, but * devices may specify a smaller limit, so we need to check @@ -240,6 +244,18 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); num_entries = min(num_entries, cap_lo.bits.mqes+1); + num_trackers = NVME_IO_TRACKERS; + TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers); + + num_trackers = max(num_trackers, NVME_MIN_IO_TRACKERS); + num_trackers = min(num_trackers, NVME_MAX_IO_TRACKERS); + /* + * No need to have more trackers than entries in the submit queue. + * Note also that for a queue size of N, we can only have (N-1) + * commands outstanding, hence the "-1" here. + */ + num_trackers = min(num_trackers, (num_entries-1)); + ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; TUNABLE_INT_FETCH("hw.nvme.max_xfer_size", &ctrlr->max_xfer_size); /* @@ -270,6 +286,7 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) i+1, /* qpair ID */ ctrlr->msix_enabled ? i+1 : 0, /* vector */ num_entries, + num_trackers, ctrlr->max_xfer_size, ctrlr); diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 9554725ace8..0797b53d74c 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -68,14 +68,25 @@ MALLOC_DECLARE(M_NVME); */ #define NVME_MAX_XFER_SIZE NVME_MAX_PRP_LIST_ENTRIES * PAGE_SIZE +#define NVME_ADMIN_TRACKERS (16) #define NVME_ADMIN_ENTRIES (128) /* min and max are defined in admin queue attributes section of spec */ #define NVME_MIN_ADMIN_ENTRIES (2) #define NVME_MAX_ADMIN_ENTRIES (4096) -#define NVME_IO_ENTRIES (1024) -/* min is a reasonable value picked for the nvme(4) driver */ -#define NVME_MIN_IO_ENTRIES (128) +/* + * NVME_IO_ENTRIES defines the size of an I/O qpair's submission and completion + * queues, while NVME_IO_TRACKERS defines the maximum number of I/O that we + * will allow outstanding on an I/O qpair at any time. The only advantage in + * having IO_ENTRIES > IO_TRACKERS is for debugging purposes - when dumping + * the contents of the submission and completion queues, it will show a longer + * history of data. + */ +#define NVME_IO_ENTRIES (256) +#define NVME_IO_TRACKERS (128) +#define NVME_MIN_IO_TRACKERS (16) +#define NVME_MAX_IO_TRACKERS (1024) + /* * NVME_MAX_IO_ENTRIES is not defined, since it is specified in CC.MQES * for each controller. @@ -134,6 +145,7 @@ struct nvme_qpair { uint32_t max_xfer_size; uint32_t num_entries; + uint32_t num_trackers; uint32_t sq_tdbl_off; uint32_t cq_hdbl_off; @@ -155,8 +167,6 @@ struct nvme_qpair { bus_dmamap_t cpl_dma_map; uint64_t cpl_bus_addr; - uint32_t num_tr; - SLIST_HEAD(, nvme_tracker) free_tr; struct nvme_tracker **act_tr; @@ -348,12 +358,11 @@ void nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr, void nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, uint16_t vector, uint32_t num_entries, - uint32_t max_xfer_size, + uint32_t num_trackers, uint32_t max_xfer_size, struct nvme_controller *ctrlr); void nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr); void nvme_qpair_process_completions(struct nvme_qpair *qpair); -struct nvme_tracker * nvme_qpair_allocate_tracker(struct nvme_qpair *qpair); void nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req); diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index 97a5c28a22a..0abac7cbdb6 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -74,42 +74,20 @@ nvme_completion_check_retry(const struct nvme_completion *cpl) } } -struct nvme_tracker * -nvme_qpair_allocate_tracker(struct nvme_qpair *qpair) +static void +nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, + uint16_t cid) { - struct nvme_tracker *tr; - - tr = SLIST_FIRST(&qpair->free_tr); - if (tr == NULL) { - /* - * We can't support more trackers than we have entries in - * our queue, because it would generate invalid indices - * into the qpair's active tracker array. - */ - if (qpair->num_tr == qpair->num_entries) { - return (NULL); - } - - tr = malloc(sizeof(struct nvme_tracker), M_NVME, - M_ZERO | M_NOWAIT); - - if (tr == NULL) { - return (NULL); - } - bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map); - bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map); + bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map); + bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map); - bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp, - sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0); - - callout_init_mtx(&tr->timer, &qpair->lock, 0); - tr->cid = qpair->num_tr++; - tr->qpair = qpair; - } else - SLIST_REMOVE_HEAD(&qpair->free_tr, slist); + bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp, + sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0); - return (tr); + callout_init_mtx(&tr->timer, &qpair->lock, 0); + tr->cid = cid; + tr->qpair = qpair; } void @@ -163,6 +141,10 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair) tr->payload_dma_map); nvme_free_request(req); + + if (SLIST_EMPTY(&qpair->free_tr)) + wakeup(qpair); + SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist); } @@ -188,9 +170,11 @@ nvme_qpair_msix_handler(void *arg) void nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, - uint16_t vector, uint32_t num_entries, uint32_t max_xfer_size, - struct nvme_controller *ctrlr) + uint16_t vector, uint32_t num_entries, uint32_t num_trackers, + uint32_t max_xfer_size, struct nvme_controller *ctrlr) { + struct nvme_tracker *tr; + uint32_t i; qpair->id = id; qpair->vector = vector; @@ -233,7 +217,7 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, qpair->num_cmds = 0; qpair->num_intr_handler_calls = 0; - qpair->num_tr = 0; + qpair->num_trackers = num_trackers; qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; /* TODO: error checking on contigmalloc, bus_dmamap_load calls */ @@ -259,6 +243,18 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, SLIST_INIT(&qpair->free_tr); + for (i = 0; i < num_trackers; i++) { + tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_NOWAIT); + + if (tr == NULL) { + printf("warning: nvme tracker malloc failed\n"); + break; + } + + nvme_qpair_construct_tracker(qpair, tr, i); + SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist); + } + qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries, M_NVME, M_ZERO | M_NOWAIT); } @@ -379,19 +375,6 @@ nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr) req->cmd.cid = tr->cid; qpair->act_tr[tr->cid] = tr; - /* - * TODO: rather than spin until entries free up, put this tracker - * on a queue, and submit from the interrupt handler when - * entries free up. - */ - if ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head) { - do { - mtx_unlock(&qpair->lock); - DELAY(5); - mtx_lock(&qpair->lock); - } while ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head); - } - callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr); /* Copy the command from the tracker to the submission queue. */ @@ -415,7 +398,15 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) mtx_lock(&qpair->lock); - tr = nvme_qpair_allocate_tracker(qpair); + tr = SLIST_FIRST(&qpair->free_tr); + + while (tr == NULL) { + msleep(qpair, &qpair->lock, PRIBIO, "qpair_tr", 0); + printf("msleep\n"); + tr = SLIST_FIRST(&qpair->free_tr); + } + + SLIST_REMOVE_HEAD(&qpair->free_tr, slist); tr->req = req; if (req->uio == NULL) { diff --git a/sys/dev/nvme/nvme_sysctl.c b/sys/dev/nvme/nvme_sysctl.c index 9cfdcddfe3f..85187afd826 100644 --- a/sys/dev/nvme/nvme_sysctl.c +++ b/sys/dev/nvme/nvme_sysctl.c @@ -192,9 +192,9 @@ nvme_sysctl_initialize_queue(struct nvme_qpair *qpair, SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries", CTLFLAG_RD, &qpair->num_entries, 0, "Number of entries in hardware queue"); - SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_tr", - CTLFLAG_RD, &qpair->num_tr, 0, - "Number of trackers allocated"); + SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_trackers", + CTLFLAG_RD, &qpair->num_trackers, 0, + "Number of trackers pre-allocated for this queue pair"); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head", CTLFLAG_RD, &qpair->sq_head, 0, "Current head of submission queue (as observed by driver)"); -- 2.45.2