2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2017 Shunsuke Mie
5 * Copyright (c) 2018 Leon Dang
7 * Function crc16 Copyright (c) 2017, Fedor Uporov
8 * Obtained from function ext2_crc16() in sys/fs/ext2fs/ext2_csum.c
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * bhyve PCIe-NVMe device emulation.
36 * -s <n>,nvme,devpath,maxq=#,qsz=#,ioslots=#,sectsz=#,ser=A-Z,eui64=#
43 * maxq = max number of queues
44 * qsz = max elements in each queue
45 * ioslots = max number of concurrent io requests
46 * sectsz = sector size (defaults to blockif sector size)
47 * ser = serial number (20-chars max)
48 * eui64 = IEEE Extended Unique Identifier (8 byte value)
53 - create async event for smart and log
57 #include <sys/cdefs.h>
58 __FBSDID("$FreeBSD$");
60 #include <sys/types.h>
61 #include <net/ieee_oui.h>
65 #include <semaphore.h>
73 #include <machine/atomic.h>
74 #include <machine/vmm.h>
77 #include <dev/nvme/nvme.h>
85 static int nvme_debug = 0;
86 #define DPRINTF(params) if (nvme_debug) PRINTLN params
87 #define WPRINTF(params) PRINTLN params
89 /* defaults; can be overridden */
90 #define NVME_MSIX_BAR 4
92 #define NVME_IOSLOTS 8
94 /* The NVMe spec defines bits 13:4 in BAR0 as reserved */
95 #define NVME_MMIO_SPACE_MIN (1 << 14)
97 #define NVME_QUEUES 16
98 #define NVME_MAX_QENTRIES 2048
100 #define NVME_PRP2_ITEMS (PAGE_SIZE/sizeof(uint64_t))
101 #define NVME_MAX_BLOCKIOVS 512
103 /* This is a synthetic status code to indicate there is no status */
104 #define NVME_NO_STATUS 0xffff
105 #define NVME_COMPLETION_VALID(c) ((c).status != NVME_NO_STATUS)
109 /* Convert a zero-based value into a one-based value */
110 #define ONE_BASED(zero) ((zero) + 1)
111 /* Convert a one-based value into a zero-based value */
112 #define ZERO_BASED(one) ((one) - 1)
114 /* Encode number of SQ's and CQ's for Set/Get Features */
115 #define NVME_FEATURE_NUM_QUEUES(sc) \
116 (ZERO_BASED((sc)->num_squeues) & 0xffff) | \
117 (ZERO_BASED((sc)->num_cqueues) & 0xffff) << 16;
119 #define NVME_DOORBELL_OFFSET offsetof(struct nvme_registers, doorbell)
121 enum nvme_controller_register_offsets {
122 NVME_CR_CAP_LOW = 0x00,
123 NVME_CR_CAP_HI = 0x04,
125 NVME_CR_INTMS = 0x0c,
126 NVME_CR_INTMC = 0x10,
131 NVME_CR_ASQ_LOW = 0x28,
132 NVME_CR_ASQ_HI = 0x2c,
133 NVME_CR_ACQ_LOW = 0x30,
134 NVME_CR_ACQ_HI = 0x34,
137 enum nvme_cmd_cdw11 {
138 NVME_CMD_CDW11_PC = 0x0001,
139 NVME_CMD_CDW11_IEN = 0x0002,
140 NVME_CMD_CDW11_IV = 0xFFFF0000,
148 #define NVME_CQ_INTEN 0x01
149 #define NVME_CQ_INTCOAL 0x02
151 struct nvme_completion_queue {
152 struct nvme_completion *qbase;
154 uint16_t tail; /* nvme progress */
155 uint16_t head; /* guest progress */
161 struct nvme_submission_queue {
162 struct nvme_command *qbase;
164 uint16_t head; /* nvme progress */
165 uint16_t tail; /* guest progress */
166 uint16_t cqid; /* completion queue id */
167 int busy; /* queue is being processed */
171 enum nvme_storage_type {
172 NVME_STOR_BLOCKIF = 0,
176 struct pci_nvme_blockstore {
177 enum nvme_storage_type type;
181 uint32_t sectsz_bits;
183 uint32_t deallocate:1;
186 struct pci_nvme_ioreq {
187 struct pci_nvme_softc *sc;
188 STAILQ_ENTRY(pci_nvme_ioreq) link;
189 struct nvme_submission_queue *nvme_sq;
192 /* command information */
197 uint64_t prev_gpaddr;
201 * lock if all iovs consumed (big IO);
202 * complete transaction before continuing
207 struct blockif_req io_req;
209 /* pad to fit up to 512 page descriptors from guest IO request */
210 struct iovec iovpadding[NVME_MAX_BLOCKIOVS-BLOCKIF_IOV_MAX];
214 /* Dataset Management bit in ONCS reflects backing storage capability */
215 NVME_DATASET_MANAGEMENT_AUTO,
216 /* Unconditionally set Dataset Management bit in ONCS */
217 NVME_DATASET_MANAGEMENT_ENABLE,
218 /* Unconditionally clear Dataset Management bit in ONCS */
219 NVME_DATASET_MANAGEMENT_DISABLE,
222 struct pci_nvme_softc {
223 struct pci_devinst *nsc_pi;
227 struct nvme_registers regs;
229 struct nvme_namespace_data nsdata;
230 struct nvme_controller_data ctrldata;
231 struct nvme_error_information_entry err_log;
232 struct nvme_health_information_page health_log;
233 struct nvme_firmware_page fw_log;
235 struct pci_nvme_blockstore nvstore;
237 uint16_t max_qentries; /* max entries per queue */
238 uint32_t max_queues; /* max number of IO SQ's or CQ's */
239 uint32_t num_cqueues;
240 uint32_t num_squeues;
242 struct pci_nvme_ioreq *ioreqs;
243 STAILQ_HEAD(, pci_nvme_ioreq) ioreqs_free; /* free list of ioreqs */
244 uint32_t pending_ios;
249 * Memory mapped Submission and Completion queues
250 * Each array includes both Admin and IO queues
252 struct nvme_completion_queue *compl_queues;
253 struct nvme_submission_queue *submit_queues;
255 /* controller features */
256 uint32_t intr_coales_aggr_time; /* 0x08: uS to delay intr */
257 uint32_t intr_coales_aggr_thresh; /* 0x08: compl-Q entries */
258 uint32_t async_ev_config; /* 0x0B: async event config */
260 enum nvme_dsm_type dataset_management;
264 static void pci_nvme_io_partial(struct blockif_req *br, int err);
266 /* Controller Configuration utils */
267 #define NVME_CC_GET_EN(cc) \
268 ((cc) >> NVME_CC_REG_EN_SHIFT & NVME_CC_REG_EN_MASK)
269 #define NVME_CC_GET_CSS(cc) \
270 ((cc) >> NVME_CC_REG_CSS_SHIFT & NVME_CC_REG_CSS_MASK)
271 #define NVME_CC_GET_SHN(cc) \
272 ((cc) >> NVME_CC_REG_SHN_SHIFT & NVME_CC_REG_SHN_MASK)
273 #define NVME_CC_GET_IOSQES(cc) \
274 ((cc) >> NVME_CC_REG_IOSQES_SHIFT & NVME_CC_REG_IOSQES_MASK)
275 #define NVME_CC_GET_IOCQES(cc) \
276 ((cc) >> NVME_CC_REG_IOCQES_SHIFT & NVME_CC_REG_IOCQES_MASK)
278 #define NVME_CC_WRITE_MASK \
279 ((NVME_CC_REG_EN_MASK << NVME_CC_REG_EN_SHIFT) | \
280 (NVME_CC_REG_IOSQES_MASK << NVME_CC_REG_IOSQES_SHIFT) | \
281 (NVME_CC_REG_IOCQES_MASK << NVME_CC_REG_IOCQES_SHIFT))
283 #define NVME_CC_NEN_WRITE_MASK \
284 ((NVME_CC_REG_CSS_MASK << NVME_CC_REG_CSS_SHIFT) | \
285 (NVME_CC_REG_MPS_MASK << NVME_CC_REG_MPS_SHIFT) | \
286 (NVME_CC_REG_AMS_MASK << NVME_CC_REG_AMS_SHIFT))
288 /* Controller Status utils */
289 #define NVME_CSTS_GET_RDY(sts) \
290 ((sts) >> NVME_CSTS_REG_RDY_SHIFT & NVME_CSTS_REG_RDY_MASK)
292 #define NVME_CSTS_RDY (1 << NVME_CSTS_REG_RDY_SHIFT)
294 /* Completion Queue status word utils */
295 #define NVME_STATUS_P (1 << NVME_STATUS_P_SHIFT)
296 #define NVME_STATUS_MASK \
297 ((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\
298 (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT))
300 #define NVME_ONCS_DSM (NVME_CTRLR_DATA_ONCS_DSM_MASK << \
301 NVME_CTRLR_DATA_ONCS_DSM_SHIFT)
304 cpywithpad(char *dst, size_t dst_size, const char *src, char pad)
308 len = strnlen(src, dst_size);
309 memset(dst, pad, dst_size);
310 memcpy(dst, src, len);
314 pci_nvme_status_tc(uint16_t *status, uint16_t type, uint16_t code)
317 *status &= ~NVME_STATUS_MASK;
318 *status |= (type & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT |
319 (code & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT;
323 pci_nvme_status_genc(uint16_t *status, uint16_t code)
326 pci_nvme_status_tc(status, NVME_SCT_GENERIC, code);
330 pci_nvme_toggle_phase(uint16_t *status, int prev)
334 *status &= ~NVME_STATUS_P;
336 *status |= NVME_STATUS_P;
340 pci_nvme_init_ctrldata(struct pci_nvme_softc *sc)
342 struct nvme_controller_data *cd = &sc->ctrldata;
347 cpywithpad((char *)cd->mn, sizeof(cd->mn), "bhyve-NVMe", ' ');
348 cpywithpad((char *)cd->fr, sizeof(cd->fr), "1.0", ' ');
350 /* Num of submission commands that we can handle at a time (2^rab) */
360 cd->mdts = 9; /* max data transfer size (2^mdts * CAP.MPSMIN) */
362 cd->ver = 0x00010300;
364 cd->oacs = 1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT;
368 cd->lpa = 0; /* TODO: support some simple things like SMART */
369 cd->elpe = 0; /* max error log page entries */
370 cd->npss = 1; /* number of power states support */
372 /* Warning Composite Temperature Threshold */
375 cd->sqes = (6 << NVME_CTRLR_DATA_SQES_MAX_SHIFT) |
376 (6 << NVME_CTRLR_DATA_SQES_MIN_SHIFT);
377 cd->cqes = (4 << NVME_CTRLR_DATA_CQES_MAX_SHIFT) |
378 (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT);
379 cd->nn = 1; /* number of namespaces */
382 switch (sc->dataset_management) {
383 case NVME_DATASET_MANAGEMENT_AUTO:
384 if (sc->nvstore.deallocate)
385 cd->oncs |= NVME_ONCS_DSM;
387 case NVME_DATASET_MANAGEMENT_ENABLE:
388 cd->oncs |= NVME_ONCS_DSM;
396 cd->power_state[0].mp = 10;
400 * Calculate the CRC-16 of the given buffer
401 * See copyright attribution at top of file
404 crc16(uint16_t crc, const void *buffer, unsigned int len)
406 const unsigned char *cp = buffer;
407 /* CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1). */
408 static uint16_t const crc16_table[256] = {
409 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
410 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
411 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
412 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
413 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
414 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
415 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
416 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
417 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
418 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
419 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
420 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
421 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
422 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
423 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
424 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
425 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
426 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
427 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
428 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
429 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
430 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
431 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
432 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
433 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
434 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
435 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
436 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
437 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
438 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
439 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
440 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
444 crc = (((crc >> 8) & 0xffU) ^
445 crc16_table[(crc ^ *cp++) & 0xffU]) & 0x0000ffffU;
450 pci_nvme_init_nsdata(struct pci_nvme_softc *sc,
451 struct nvme_namespace_data *nd, uint32_t nsid,
452 struct pci_nvme_blockstore *nvstore)
455 /* Get capacity and block size information from backing store */
456 nd->nsze = nvstore->size / nvstore->sectsz;
460 if (nvstore->type == NVME_STOR_BLOCKIF)
461 nvstore->deallocate = blockif_candelete(nvstore->ctx);
463 nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */
466 /* Create an EUI-64 if user did not provide one */
467 if (nvstore->eui64 == 0) {
469 uint64_t eui64 = nvstore->eui64;
471 asprintf(&data, "%s%u%u%u", vmname, sc->nsc_pi->pi_bus,
472 sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
475 eui64 = OUI_FREEBSD_NVME_LOW | crc16(0, data, strlen(data));
478 nvstore->eui64 = (eui64 << 16) | (nsid & 0xffff);
480 be64enc(nd->eui64, nvstore->eui64);
482 /* LBA data-sz = 2^lbads */
483 nd->lbaf[0] = nvstore->sectsz_bits << NVME_NS_DATA_LBAF_LBADS_SHIFT;
487 pci_nvme_init_logpages(struct pci_nvme_softc *sc)
490 memset(&sc->err_log, 0, sizeof(sc->err_log));
491 memset(&sc->health_log, 0, sizeof(sc->health_log));
492 memset(&sc->fw_log, 0, sizeof(sc->fw_log));
496 pci_nvme_reset_locked(struct pci_nvme_softc *sc)
498 DPRINTF(("%s", __func__));
500 sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) |
501 (1 << NVME_CAP_LO_REG_CQR_SHIFT) |
502 (60 << NVME_CAP_LO_REG_TO_SHIFT);
504 sc->regs.cap_hi = 1 << NVME_CAP_HI_REG_CSS_NVM_SHIFT;
506 sc->regs.vs = 0x00010300; /* NVMe v1.3 */
511 sc->num_cqueues = sc->num_squeues = sc->max_queues;
512 if (sc->submit_queues != NULL) {
513 for (int i = 0; i < sc->num_squeues + 1; i++) {
515 * The Admin Submission Queue is at index 0.
516 * It must not be changed at reset otherwise the
517 * emulation will be out of sync with the guest.
520 sc->submit_queues[i].qbase = NULL;
521 sc->submit_queues[i].size = 0;
522 sc->submit_queues[i].cqid = 0;
524 sc->submit_queues[i].tail = 0;
525 sc->submit_queues[i].head = 0;
526 sc->submit_queues[i].busy = 0;
529 sc->submit_queues = calloc(sc->num_squeues + 1,
530 sizeof(struct nvme_submission_queue));
532 if (sc->compl_queues != NULL) {
533 for (int i = 0; i < sc->num_cqueues + 1; i++) {
534 /* See Admin Submission Queue note above */
536 sc->compl_queues[i].qbase = NULL;
537 sc->compl_queues[i].size = 0;
540 sc->compl_queues[i].tail = 0;
541 sc->compl_queues[i].head = 0;
544 sc->compl_queues = calloc(sc->num_cqueues + 1,
545 sizeof(struct nvme_completion_queue));
547 for (int i = 0; i < sc->num_cqueues + 1; i++)
548 pthread_mutex_init(&sc->compl_queues[i].mtx, NULL);
553 pci_nvme_reset(struct pci_nvme_softc *sc)
555 pthread_mutex_lock(&sc->mtx);
556 pci_nvme_reset_locked(sc);
557 pthread_mutex_unlock(&sc->mtx);
561 pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc)
565 DPRINTF(("%s", __func__));
567 asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1;
568 sc->submit_queues[0].size = asqs;
569 sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq,
570 sizeof(struct nvme_command) * asqs);
572 DPRINTF(("%s mapping Admin-SQ guest 0x%lx, host: %p",
573 __func__, sc->regs.asq, sc->submit_queues[0].qbase));
575 acqs = ((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) &
576 NVME_AQA_REG_ACQS_MASK) + 1;
577 sc->compl_queues[0].size = acqs;
578 sc->compl_queues[0].qbase = vm_map_gpa(ctx, sc->regs.acq,
579 sizeof(struct nvme_completion) * acqs);
580 DPRINTF(("%s mapping Admin-CQ guest 0x%lx, host: %p",
581 __func__, sc->regs.acq, sc->compl_queues[0].qbase));
585 nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *b,
586 size_t len, enum nvme_copy_dir dir)
591 if (len > (8 * 1024)) {
595 /* Copy from the start of prp1 to the end of the physical page */
596 bytes = PAGE_SIZE - (prp1 & PAGE_MASK);
597 bytes = MIN(bytes, len);
599 p = vm_map_gpa(ctx, prp1, bytes);
604 if (dir == NVME_COPY_TO_PRP)
616 len = MIN(len, PAGE_SIZE);
618 p = vm_map_gpa(ctx, prp2, len);
623 if (dir == NVME_COPY_TO_PRP)
632 nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command,
633 struct nvme_completion* compl)
635 uint16_t qid = command->cdw10 & 0xffff;
637 DPRINTF(("%s DELETE_IO_SQ %u", __func__, qid));
638 if (qid == 0 || qid > sc->num_squeues) {
639 WPRINTF(("%s NOT PERMITTED queue id %u / num_squeues %u",
640 __func__, qid, sc->num_squeues));
641 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
642 NVME_SC_INVALID_QUEUE_IDENTIFIER);
646 sc->submit_queues[qid].qbase = NULL;
647 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
652 nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command,
653 struct nvme_completion* compl)
655 if (command->cdw11 & NVME_CMD_CDW11_PC) {
656 uint16_t qid = command->cdw10 & 0xffff;
657 struct nvme_submission_queue *nsq;
659 if ((qid == 0) || (qid > sc->num_squeues)) {
660 WPRINTF(("%s queue index %u > num_squeues %u",
661 __func__, qid, sc->num_squeues));
662 pci_nvme_status_tc(&compl->status,
663 NVME_SCT_COMMAND_SPECIFIC,
664 NVME_SC_INVALID_QUEUE_IDENTIFIER);
668 nsq = &sc->submit_queues[qid];
669 nsq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff);
671 nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
672 sizeof(struct nvme_command) * (size_t)nsq->size);
673 nsq->cqid = (command->cdw11 >> 16) & 0xffff;
674 nsq->qpriority = (command->cdw11 >> 1) & 0x03;
676 DPRINTF(("%s sq %u size %u gaddr %p cqid %u", __func__,
677 qid, nsq->size, nsq->qbase, nsq->cqid));
679 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
681 DPRINTF(("%s completed creating IOSQ qid %u",
685 * Guest sent non-cont submission queue request.
686 * This setting is unsupported by this emulation.
688 WPRINTF(("%s unsupported non-contig (list-based) "
689 "create i/o submission queue", __func__));
691 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
697 nvme_opc_delete_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command,
698 struct nvme_completion* compl)
700 uint16_t qid = command->cdw10 & 0xffff;
702 DPRINTF(("%s DELETE_IO_CQ %u", __func__, qid));
703 if (qid == 0 || qid > sc->num_cqueues) {
704 WPRINTF(("%s queue index %u / num_cqueues %u",
705 __func__, qid, sc->num_cqueues));
706 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
707 NVME_SC_INVALID_QUEUE_IDENTIFIER);
711 sc->compl_queues[qid].qbase = NULL;
712 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
717 nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command,
718 struct nvme_completion* compl)
720 if (command->cdw11 & NVME_CMD_CDW11_PC) {
721 uint16_t qid = command->cdw10 & 0xffff;
722 struct nvme_completion_queue *ncq;
724 if ((qid == 0) || (qid > sc->num_cqueues)) {
725 WPRINTF(("%s queue index %u > num_cqueues %u",
726 __func__, qid, sc->num_cqueues));
727 pci_nvme_status_tc(&compl->status,
728 NVME_SCT_COMMAND_SPECIFIC,
729 NVME_SC_INVALID_QUEUE_IDENTIFIER);
733 ncq = &sc->compl_queues[qid];
734 ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1;
735 ncq->intr_vec = (command->cdw11 >> 16) & 0xffff;
736 ncq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff);
738 ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx,
740 sizeof(struct nvme_command) * (size_t)ncq->size);
742 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
745 * Non-contig completion queue unsupported.
747 WPRINTF(("%s unsupported non-contig (list-based) "
748 "create i/o completion queue",
751 /* 0x12 = Invalid Use of Controller Memory Buffer */
752 pci_nvme_status_genc(&compl->status, 0x12);
759 nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command,
760 struct nvme_completion* compl)
762 uint32_t logsize = (1 + ((command->cdw10 >> 16) & 0xFFF)) * 2;
763 uint8_t logpage = command->cdw10 & 0xFF;
765 DPRINTF(("%s log page %u len %u", __func__, logpage, logsize));
767 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
771 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
772 command->prp2, (uint8_t *)&sc->err_log, logsize,
775 case NVME_LOG_HEALTH_INFORMATION:
776 /* TODO: present some smart info */
777 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
778 command->prp2, (uint8_t *)&sc->health_log, logsize,
781 case NVME_LOG_FIRMWARE_SLOT:
782 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
783 command->prp2, (uint8_t *)&sc->fw_log, logsize,
787 WPRINTF(("%s get log page %x command not supported",
790 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
791 NVME_SC_INVALID_LOG_PAGE);
798 nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command,
799 struct nvme_completion* compl)
803 DPRINTF(("%s identify 0x%x nsid 0x%x", __func__,
804 command->cdw10 & 0xFF, command->nsid));
806 switch (command->cdw10 & 0xFF) {
807 case 0x00: /* return Identify Namespace data structure */
808 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
809 command->prp2, (uint8_t *)&sc->nsdata, sizeof(sc->nsdata),
812 case 0x01: /* return Identify Controller data structure */
813 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
814 command->prp2, (uint8_t *)&sc->ctrldata,
815 sizeof(sc->ctrldata),
818 case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */
819 dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
820 sizeof(uint32_t) * 1024);
821 ((uint32_t *)dest)[0] = 1;
822 ((uint32_t *)dest)[1] = 0;
825 pci_nvme_status_genc(&compl->status,
826 NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
828 case 0x03: /* list of NSID structures in CDW1.NSID, 4096 bytes */
835 DPRINTF(("%s unsupported identify command requested 0x%x",
836 __func__, command->cdw10 & 0xFF));
837 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
841 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
846 nvme_set_feature_queues(struct pci_nvme_softc* sc, struct nvme_command* command,
847 struct nvme_completion* compl)
849 uint16_t nqr; /* Number of Queues Requested */
851 nqr = command->cdw11 & 0xFFFF;
853 WPRINTF(("%s: Illegal NSQR value %#x", __func__, nqr));
854 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
858 sc->num_squeues = ONE_BASED(nqr);
859 if (sc->num_squeues > sc->max_queues) {
860 DPRINTF(("NSQR=%u is greater than max %u", sc->num_squeues,
862 sc->num_squeues = sc->max_queues;
865 nqr = (command->cdw11 >> 16) & 0xFFFF;
867 WPRINTF(("%s: Illegal NCQR value %#x", __func__, nqr));
868 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
872 sc->num_cqueues = ONE_BASED(nqr);
873 if (sc->num_cqueues > sc->max_queues) {
874 DPRINTF(("NCQR=%u is greater than max %u", sc->num_cqueues,
876 sc->num_cqueues = sc->max_queues;
879 compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc);
885 nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command,
886 struct nvme_completion* compl)
888 int feature = command->cdw10 & 0xFF;
891 DPRINTF(("%s feature 0x%x", __func__, feature));
895 case NVME_FEAT_ARBITRATION:
896 DPRINTF((" arbitration 0x%x", command->cdw11));
898 case NVME_FEAT_POWER_MANAGEMENT:
899 DPRINTF((" power management 0x%x", command->cdw11));
901 case NVME_FEAT_LBA_RANGE_TYPE:
902 DPRINTF((" lba range 0x%x", command->cdw11));
904 case NVME_FEAT_TEMPERATURE_THRESHOLD:
905 DPRINTF((" temperature threshold 0x%x", command->cdw11));
907 case NVME_FEAT_ERROR_RECOVERY:
908 DPRINTF((" error recovery 0x%x", command->cdw11));
910 case NVME_FEAT_VOLATILE_WRITE_CACHE:
911 DPRINTF((" volatile write cache 0x%x", command->cdw11));
913 case NVME_FEAT_NUMBER_OF_QUEUES:
914 nvme_set_feature_queues(sc, command, compl);
916 case NVME_FEAT_INTERRUPT_COALESCING:
917 DPRINTF((" interrupt coalescing 0x%x", command->cdw11));
920 sc->intr_coales_aggr_time = ((command->cdw11 >> 8) & 0xFF)*100;
922 sc->intr_coales_aggr_thresh = command->cdw11 & 0xFF;
924 case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION:
925 iv = command->cdw11 & 0xFFFF;
927 DPRINTF((" interrupt vector configuration 0x%x",
930 for (uint32_t i = 0; i < sc->num_cqueues + 1; i++) {
931 if (sc->compl_queues[i].intr_vec == iv) {
932 if (command->cdw11 & (1 << 16))
933 sc->compl_queues[i].intr_en |=
936 sc->compl_queues[i].intr_en &=
941 case NVME_FEAT_WRITE_ATOMICITY:
942 DPRINTF((" write atomicity 0x%x", command->cdw11));
944 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
945 DPRINTF((" async event configuration 0x%x",
947 sc->async_ev_config = command->cdw11;
949 case NVME_FEAT_SOFTWARE_PROGRESS_MARKER:
950 DPRINTF((" software progress marker 0x%x",
954 DPRINTF((" autonomous power state transition 0x%x",
958 WPRINTF(("%s invalid feature", __func__));
959 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
963 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
968 nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command,
969 struct nvme_completion* compl)
971 int feature = command->cdw10 & 0xFF;
973 DPRINTF(("%s feature 0x%x", __func__, feature));
978 case NVME_FEAT_ARBITRATION:
979 DPRINTF((" arbitration"));
981 case NVME_FEAT_POWER_MANAGEMENT:
982 DPRINTF((" power management"));
984 case NVME_FEAT_LBA_RANGE_TYPE:
985 DPRINTF((" lba range"));
987 case NVME_FEAT_TEMPERATURE_THRESHOLD:
988 DPRINTF((" temperature threshold"));
989 switch ((command->cdw11 >> 20) & 0x3) {
991 /* Over temp threshold */
992 compl->cdw0 = 0xFFFF;
995 /* Under temp threshold */
999 WPRINTF((" invalid threshold type select"));
1000 pci_nvme_status_genc(&compl->status,
1001 NVME_SC_INVALID_FIELD);
1005 case NVME_FEAT_ERROR_RECOVERY:
1006 DPRINTF((" error recovery"));
1008 case NVME_FEAT_VOLATILE_WRITE_CACHE:
1009 DPRINTF((" volatile write cache"));
1011 case NVME_FEAT_NUMBER_OF_QUEUES:
1012 compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc);
1014 DPRINTF((" number of queues (submit %u, completion %u)",
1015 compl->cdw0 & 0xFFFF,
1016 (compl->cdw0 >> 16) & 0xFFFF));
1019 case NVME_FEAT_INTERRUPT_COALESCING:
1020 DPRINTF((" interrupt coalescing"));
1022 case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION:
1023 DPRINTF((" interrupt vector configuration"));
1025 case NVME_FEAT_WRITE_ATOMICITY:
1026 DPRINTF((" write atomicity"));
1028 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
1029 DPRINTF((" async event configuration"));
1030 sc->async_ev_config = command->cdw11;
1032 case NVME_FEAT_SOFTWARE_PROGRESS_MARKER:
1033 DPRINTF((" software progress marker"));
1036 DPRINTF((" autonomous power state transition"));
1039 WPRINTF(("%s invalid feature 0x%x", __func__, feature));
1040 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1044 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1049 nvme_opc_abort(struct pci_nvme_softc* sc, struct nvme_command* command,
1050 struct nvme_completion* compl)
1052 DPRINTF(("%s submission queue %u, command ID 0x%x", __func__,
1053 command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF));
1055 /* TODO: search for the command ID and abort it */
1058 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1063 nvme_opc_async_event_req(struct pci_nvme_softc* sc,
1064 struct nvme_command* command, struct nvme_completion* compl)
1066 DPRINTF(("%s async event request 0x%x", __func__, command->cdw11));
1069 * TODO: raise events when they happen based on the Set Features cmd.
1070 * These events happen async, so only set completion successful if
1071 * there is an event reflective of the request to get event.
1073 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
1074 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED);
1079 pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value)
1081 struct nvme_completion compl;
1082 struct nvme_command *cmd;
1083 struct nvme_submission_queue *sq;
1084 struct nvme_completion_queue *cq;
1087 DPRINTF(("%s index %u", __func__, (uint32_t)value));
1089 sq = &sc->submit_queues[0];
1090 cq = &sc->compl_queues[0];
1092 sqhead = atomic_load_acq_short(&sq->head);
1094 if (atomic_testandset_int(&sq->busy, 1)) {
1095 DPRINTF(("%s SQ busy, head %u, tail %u",
1096 __func__, sqhead, sq->tail));
1100 DPRINTF(("sqhead %u, tail %u", sqhead, sq->tail));
1102 while (sqhead != atomic_load_acq_short(&sq->tail)) {
1103 cmd = &(sq->qbase)[sqhead];
1108 case NVME_OPC_DELETE_IO_SQ:
1109 DPRINTF(("%s command DELETE_IO_SQ", __func__));
1110 nvme_opc_delete_io_sq(sc, cmd, &compl);
1112 case NVME_OPC_CREATE_IO_SQ:
1113 DPRINTF(("%s command CREATE_IO_SQ", __func__));
1114 nvme_opc_create_io_sq(sc, cmd, &compl);
1116 case NVME_OPC_DELETE_IO_CQ:
1117 DPRINTF(("%s command DELETE_IO_CQ", __func__));
1118 nvme_opc_delete_io_cq(sc, cmd, &compl);
1120 case NVME_OPC_CREATE_IO_CQ:
1121 DPRINTF(("%s command CREATE_IO_CQ", __func__));
1122 nvme_opc_create_io_cq(sc, cmd, &compl);
1124 case NVME_OPC_GET_LOG_PAGE:
1125 DPRINTF(("%s command GET_LOG_PAGE", __func__));
1126 nvme_opc_get_log_page(sc, cmd, &compl);
1128 case NVME_OPC_IDENTIFY:
1129 DPRINTF(("%s command IDENTIFY", __func__));
1130 nvme_opc_identify(sc, cmd, &compl);
1132 case NVME_OPC_ABORT:
1133 DPRINTF(("%s command ABORT", __func__));
1134 nvme_opc_abort(sc, cmd, &compl);
1136 case NVME_OPC_SET_FEATURES:
1137 DPRINTF(("%s command SET_FEATURES", __func__));
1138 nvme_opc_set_features(sc, cmd, &compl);
1140 case NVME_OPC_GET_FEATURES:
1141 DPRINTF(("%s command GET_FEATURES", __func__));
1142 nvme_opc_get_features(sc, cmd, &compl);
1144 case NVME_OPC_ASYNC_EVENT_REQUEST:
1145 DPRINTF(("%s command ASYNC_EVENT_REQ", __func__));
1146 /* XXX dont care, unhandled for now
1147 nvme_opc_async_event_req(sc, cmd, &compl);
1149 compl.status = NVME_NO_STATUS;
1152 WPRINTF(("0x%x command is not implemented",
1154 pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE);
1156 sqhead = (sqhead + 1) % sq->size;
1158 if (NVME_COMPLETION_VALID(compl)) {
1159 struct nvme_completion *cp;
1162 cp = &(cq->qbase)[cq->tail];
1163 cp->cdw0 = compl.cdw0;
1168 phase = NVME_STATUS_GET_P(cp->status);
1169 cp->status = compl.status;
1170 pci_nvme_toggle_phase(&cp->status, phase);
1172 cq->tail = (cq->tail + 1) % cq->size;
1176 DPRINTF(("setting sqhead %u", sqhead));
1177 atomic_store_short(&sq->head, sqhead);
1178 atomic_store_int(&sq->busy, 0);
1180 if (cq->head != cq->tail)
1181 pci_generate_msix(sc->nsc_pi, 0);
1186 pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req,
1187 uint64_t gpaddr, size_t size, int do_write, uint64_t lba)
1192 /* concatenate contig block-iovs to minimize number of iovs */
1193 if ((req->prev_gpaddr + req->prev_size) == gpaddr) {
1194 iovidx = req->io_req.br_iovcnt - 1;
1196 req->io_req.br_iov[iovidx].iov_base =
1197 paddr_guest2host(req->sc->nsc_pi->pi_vmctx,
1198 req->prev_gpaddr, size);
1200 req->prev_size += size;
1201 req->io_req.br_resid += size;
1203 req->io_req.br_iov[iovidx].iov_len = req->prev_size;
1205 pthread_mutex_lock(&req->mtx);
1207 iovidx = req->io_req.br_iovcnt;
1208 if (iovidx == NVME_MAX_BLOCKIOVS) {
1211 DPRINTF(("large I/O, doing partial req"));
1214 req->io_req.br_iovcnt = 0;
1216 req->io_req.br_callback = pci_nvme_io_partial;
1219 err = blockif_read(sc->nvstore.ctx,
1222 err = blockif_write(sc->nvstore.ctx,
1225 /* wait until req completes before cont */
1227 pthread_cond_wait(&req->cv, &req->mtx);
1230 req->io_req.br_offset = lba;
1231 req->io_req.br_resid = 0;
1232 req->io_req.br_param = req;
1235 req->io_req.br_iov[iovidx].iov_base =
1236 paddr_guest2host(req->sc->nsc_pi->pi_vmctx,
1239 req->io_req.br_iov[iovidx].iov_len = size;
1241 req->prev_gpaddr = gpaddr;
1242 req->prev_size = size;
1243 req->io_req.br_resid += size;
1245 req->io_req.br_iovcnt++;
1247 pthread_mutex_unlock(&req->mtx);
1250 /* RAM buffer: read/write directly */
1251 void *p = sc->nvstore.ctx;
1254 if ((lba + size) > sc->nvstore.size) {
1255 WPRINTF(("%s write would overflow RAM", __func__));
1259 p = (void *)((uintptr_t)p + (uintptr_t)lba);
1260 gptr = paddr_guest2host(sc->nsc_pi->pi_vmctx, gpaddr, size);
1262 memcpy(p, gptr, size);
1264 memcpy(gptr, p, size);
1270 pci_nvme_set_completion(struct pci_nvme_softc *sc,
1271 struct nvme_submission_queue *sq, int sqid, uint16_t cid,
1272 uint32_t cdw0, uint16_t status, int ignore_busy)
1274 struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid];
1275 struct nvme_completion *compl;
1278 DPRINTF(("%s sqid %d cqid %u cid %u status: 0x%x 0x%x",
1279 __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status),
1280 NVME_STATUS_GET_SC(status)));
1282 pthread_mutex_lock(&cq->mtx);
1284 assert(cq->qbase != NULL);
1286 compl = &cq->qbase[cq->tail];
1290 compl->sqhd = atomic_load_acq_short(&sq->head);
1294 phase = NVME_STATUS_GET_P(compl->status);
1295 compl->status = status;
1296 pci_nvme_toggle_phase(&compl->status, phase);
1298 cq->tail = (cq->tail + 1) % cq->size;
1300 pthread_mutex_unlock(&cq->mtx);
1302 if (cq->head != cq->tail) {
1303 if (cq->intr_en & NVME_CQ_INTEN) {
1304 pci_generate_msix(sc->nsc_pi, cq->intr_vec);
1306 DPRINTF(("%s: CQ%u interrupt disabled\n",
1307 __func__, sq->cqid));
1313 pci_nvme_release_ioreq(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req)
1316 req->nvme_sq = NULL;
1319 pthread_mutex_lock(&sc->mtx);
1321 STAILQ_INSERT_TAIL(&sc->ioreqs_free, req, link);
1324 /* when no more IO pending, can set to ready if device reset/enabled */
1325 if (sc->pending_ios == 0 &&
1326 NVME_CC_GET_EN(sc->regs.cc) && !(NVME_CSTS_GET_RDY(sc->regs.csts)))
1327 sc->regs.csts |= NVME_CSTS_RDY;
1329 pthread_mutex_unlock(&sc->mtx);
1331 sem_post(&sc->iosemlock);
1334 static struct pci_nvme_ioreq *
1335 pci_nvme_get_ioreq(struct pci_nvme_softc *sc)
1337 struct pci_nvme_ioreq *req = NULL;;
1339 sem_wait(&sc->iosemlock);
1340 pthread_mutex_lock(&sc->mtx);
1342 req = STAILQ_FIRST(&sc->ioreqs_free);
1343 assert(req != NULL);
1344 STAILQ_REMOVE_HEAD(&sc->ioreqs_free, link);
1350 pthread_mutex_unlock(&sc->mtx);
1352 req->io_req.br_iovcnt = 0;
1353 req->io_req.br_offset = 0;
1354 req->io_req.br_resid = 0;
1355 req->io_req.br_param = req;
1356 req->prev_gpaddr = 0;
1363 pci_nvme_io_done(struct blockif_req *br, int err)
1365 struct pci_nvme_ioreq *req = br->br_param;
1366 struct nvme_submission_queue *sq = req->nvme_sq;
1367 uint16_t code, status;
1369 DPRINTF(("%s error %d %s", __func__, err, strerror(err)));
1371 /* TODO return correct error */
1372 code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS;
1373 pci_nvme_status_genc(&status, code);
1375 pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status, 0);
1376 pci_nvme_release_ioreq(req->sc, req);
1380 pci_nvme_io_partial(struct blockif_req *br, int err)
1382 struct pci_nvme_ioreq *req = br->br_param;
1384 DPRINTF(("%s error %d %s", __func__, err, strerror(err)));
1386 pthread_cond_signal(&req->cv);
1390 pci_nvme_dealloc_sm(struct blockif_req *br, int err)
1392 struct pci_nvme_ioreq *req = br->br_param;
1393 struct pci_nvme_softc *sc = req->sc;
1398 pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR);
1399 } else if ((req->prev_gpaddr + 1) == (req->prev_size)) {
1400 pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
1402 struct iovec *iov = req->io_req.br_iov;
1405 iov += req->prev_gpaddr;
1407 /* The iov_* values already include the sector size */
1408 req->io_req.br_offset = (off_t)iov->iov_base;
1409 req->io_req.br_resid = iov->iov_len;
1410 if (blockif_delete(sc->nvstore.ctx, &req->io_req)) {
1411 pci_nvme_status_genc(&status,
1412 NVME_SC_INTERNAL_DEVICE_ERROR);
1418 pci_nvme_set_completion(sc, req->nvme_sq, req->sqid,
1419 req->cid, 0, status, 0);
1420 pci_nvme_release_ioreq(sc, req);
1425 nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
1426 struct nvme_command *cmd,
1427 struct pci_nvme_blockstore *nvstore,
1428 struct pci_nvme_ioreq *req,
1433 if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) {
1434 pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE);
1438 if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) {
1439 struct nvme_dsm_range *range;
1441 int sectsz = sc->nvstore.sectsz;
1444 * DSM calls are advisory only, and compliant controllers
1445 * may choose to take no actions (i.e. return Success).
1447 if (!nvstore->deallocate) {
1448 pci_nvme_status_genc(status, NVME_SC_SUCCESS);
1453 pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
1457 /* copy locally because a range entry could straddle PRPs */
1458 range = calloc(1, NVME_MAX_DSM_TRIM);
1459 if (range == NULL) {
1460 pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
1463 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
1464 (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP);
1466 req->opc = cmd->opc;
1467 req->cid = cmd->cid;
1468 req->nsid = cmd->nsid;
1470 * If the request is for more than a single range, store
1471 * the ranges in the br_iov. Optimize for the common case
1472 * of a single range.
1474 * Note that NVMe Number of Ranges is a zero based value
1476 nr = cmd->cdw10 & 0xff;
1478 req->io_req.br_iovcnt = 0;
1479 req->io_req.br_offset = range[0].starting_lba * sectsz;
1480 req->io_req.br_resid = range[0].length * sectsz;
1483 req->io_req.br_callback = pci_nvme_io_done;
1485 struct iovec *iov = req->io_req.br_iov;
1487 for (r = 0; r <= nr; r++) {
1488 iov[r].iov_base = (void *)(range[r].starting_lba * sectsz);
1489 iov[r].iov_len = range[r].length * sectsz;
1491 req->io_req.br_callback = pci_nvme_dealloc_sm;
1494 * Use prev_gpaddr to track the current entry and
1495 * prev_size to track the number of entries
1497 req->prev_gpaddr = 0;
1501 err = blockif_delete(nvstore->ctx, &req->io_req);
1503 pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
1512 pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx)
1514 struct nvme_submission_queue *sq;
1519 /* handle all submissions up to sq->tail index */
1520 sq = &sc->submit_queues[idx];
1522 if (atomic_testandset_int(&sq->busy, 1)) {
1523 DPRINTF(("%s sqid %u busy", __func__, idx));
1527 sqhead = atomic_load_acq_short(&sq->head);
1529 DPRINTF(("nvme_handle_io qid %u head %u tail %u cmdlist %p",
1530 idx, sqhead, sq->tail, sq->qbase));
1532 while (sqhead != atomic_load_acq_short(&sq->tail)) {
1533 struct nvme_command *cmd;
1534 struct pci_nvme_ioreq *req = NULL;
1536 uint64_t nblocks, bytes, size, cpsz;
1538 /* TODO: support scatter gather list handling */
1540 cmd = &sq->qbase[sqhead];
1541 sqhead = (sqhead + 1) % sq->size;
1543 lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
1545 if (cmd->opc == NVME_OPC_FLUSH) {
1546 pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
1547 pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
1551 } else if (cmd->opc == 0x08) {
1552 /* TODO: write zeroes */
1553 WPRINTF(("%s write zeroes lba 0x%lx blocks %u",
1554 __func__, lba, cmd->cdw12 & 0xFFFF));
1555 pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
1556 pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
1562 if (sc->nvstore.type == NVME_STOR_BLOCKIF) {
1563 req = pci_nvme_get_ioreq(sc);
1568 if (cmd->opc == NVME_OPC_DATASET_MANAGEMENT) {
1569 if (nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, req,
1571 pci_nvme_set_completion(sc, sq, idx, cmd->cid,
1574 pci_nvme_release_ioreq(sc, req);
1579 nblocks = (cmd->cdw12 & 0xFFFF) + 1;
1581 bytes = nblocks * sc->nvstore.sectsz;
1584 * If data starts mid-page and flows into the next page, then
1585 * increase page count
1588 DPRINTF(("[h%u:t%u:n%u] %s starting LBA 0x%lx blocks %lu "
1590 sqhead==0 ? sq->size-1 : sqhead-1, sq->tail, sq->size,
1591 cmd->opc == NVME_OPC_WRITE ?
1593 lba, nblocks, bytes));
1595 cmd->prp1 &= ~(0x03UL);
1596 cmd->prp2 &= ~(0x03UL);
1598 DPRINTF((" prp1 0x%lx prp2 0x%lx", cmd->prp1, cmd->prp2));
1601 lba *= sc->nvstore.sectsz;
1603 cpsz = PAGE_SIZE - (cmd->prp1 % PAGE_SIZE);
1609 req->io_req.br_offset = ((uint64_t)cmd->cdw11 << 32) |
1611 req->opc = cmd->opc;
1612 req->cid = cmd->cid;
1613 req->nsid = cmd->nsid;
1616 err = pci_nvme_append_iov_req(sc, req, cmd->prp1, cpsz,
1617 cmd->opc == NVME_OPC_WRITE, lba);
1624 if (size <= PAGE_SIZE) {
1625 /* prp2 is second (and final) page in transfer */
1627 err = pci_nvme_append_iov_req(sc, req, cmd->prp2,
1629 cmd->opc == NVME_OPC_WRITE,
1635 /* prp2 is pointer to a physical region page list */
1636 prp_list = paddr_guest2host(sc->nsc_pi->pi_vmctx,
1637 cmd->prp2, PAGE_SIZE);
1641 cpsz = MIN(size, PAGE_SIZE);
1644 * Move to linked physical region page list
1647 if (i == (NVME_PRP2_ITEMS-1) &&
1649 assert((prp_list[i] & (PAGE_SIZE-1)) == 0);
1650 prp_list = paddr_guest2host(
1651 sc->nsc_pi->pi_vmctx,
1652 prp_list[i], PAGE_SIZE);
1655 if (prp_list[i] == 0) {
1656 WPRINTF(("PRP2[%d] = 0 !!!", i));
1661 err = pci_nvme_append_iov_req(sc, req,
1663 cmd->opc == NVME_OPC_WRITE, lba);
1674 if (sc->nvstore.type == NVME_STOR_RAM) {
1675 uint16_t code, status;
1677 code = err ? NVME_SC_LBA_OUT_OF_RANGE :
1679 pci_nvme_status_genc(&status, code);
1681 pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
1691 req->io_req.br_callback = pci_nvme_io_done;
1696 err = blockif_read(sc->nvstore.ctx, &req->io_req);
1698 case NVME_OPC_WRITE:
1699 err = blockif_write(sc->nvstore.ctx, &req->io_req);
1702 WPRINTF(("%s unhandled io command 0x%x",
1703 __func__, cmd->opc));
1711 pci_nvme_status_genc(&status,
1712 NVME_SC_DATA_TRANSFER_ERROR);
1714 pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
1716 pci_nvme_release_ioreq(sc, req);
1720 atomic_store_short(&sq->head, sqhead);
1721 atomic_store_int(&sq->busy, 0);
1725 pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc,
1726 uint64_t idx, int is_sq, uint64_t value)
1728 DPRINTF(("nvme doorbell %lu, %s, val 0x%lx",
1729 idx, is_sq ? "SQ" : "CQ", value & 0xFFFF));
1732 atomic_store_short(&sc->submit_queues[idx].tail,
1736 pci_nvme_handle_admin_cmd(sc, value);
1738 /* submission queue; handle new entries in SQ */
1739 if (idx > sc->num_squeues) {
1740 WPRINTF(("%s SQ index %lu overflow from "
1742 __func__, idx, sc->num_squeues));
1745 pci_nvme_handle_io_cmd(sc, (uint16_t)idx);
1748 if (idx > sc->num_cqueues) {
1749 WPRINTF(("%s queue index %lu overflow from "
1751 __func__, idx, sc->num_cqueues));
1755 sc->compl_queues[idx].head = (uint16_t)value;
1760 pci_nvme_bar0_reg_dumps(const char *func, uint64_t offset, int iswrite)
1762 const char *s = iswrite ? "WRITE" : "READ";
1765 case NVME_CR_CAP_LOW:
1766 DPRINTF(("%s %s NVME_CR_CAP_LOW", func, s));
1768 case NVME_CR_CAP_HI:
1769 DPRINTF(("%s %s NVME_CR_CAP_HI", func, s));
1772 DPRINTF(("%s %s NVME_CR_VS", func, s));
1775 DPRINTF(("%s %s NVME_CR_INTMS", func, s));
1778 DPRINTF(("%s %s NVME_CR_INTMC", func, s));
1781 DPRINTF(("%s %s NVME_CR_CC", func, s));
1784 DPRINTF(("%s %s NVME_CR_CSTS", func, s));
1787 DPRINTF(("%s %s NVME_CR_NSSR", func, s));
1790 DPRINTF(("%s %s NVME_CR_AQA", func, s));
1792 case NVME_CR_ASQ_LOW:
1793 DPRINTF(("%s %s NVME_CR_ASQ_LOW", func, s));
1795 case NVME_CR_ASQ_HI:
1796 DPRINTF(("%s %s NVME_CR_ASQ_HI", func, s));
1798 case NVME_CR_ACQ_LOW:
1799 DPRINTF(("%s %s NVME_CR_ACQ_LOW", func, s));
1801 case NVME_CR_ACQ_HI:
1802 DPRINTF(("%s %s NVME_CR_ACQ_HI", func, s));
1805 DPRINTF(("unknown nvme bar-0 offset 0x%lx", offset));
1811 pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc,
1812 uint64_t offset, int size, uint64_t value)
1816 if (offset >= NVME_DOORBELL_OFFSET) {
1817 uint64_t belloffset = offset - NVME_DOORBELL_OFFSET;
1818 uint64_t idx = belloffset / 8; /* door bell size = 2*int */
1819 int is_sq = (belloffset % 8) < 4;
1821 if (belloffset > ((sc->max_queues+1) * 8 - 4)) {
1822 WPRINTF(("guest attempted an overflow write offset "
1823 "0x%lx, val 0x%lx in %s",
1824 offset, value, __func__));
1828 pci_nvme_handle_doorbell(ctx, sc, idx, is_sq, value);
1832 DPRINTF(("nvme-write offset 0x%lx, size %d, value 0x%lx",
1833 offset, size, value));
1836 WPRINTF(("guest wrote invalid size %d (offset 0x%lx, "
1837 "val 0x%lx) to bar0 in %s",
1838 size, offset, value, __func__));
1839 /* TODO: shutdown device */
1843 pci_nvme_bar0_reg_dumps(__func__, offset, 1);
1845 pthread_mutex_lock(&sc->mtx);
1848 case NVME_CR_CAP_LOW:
1849 case NVME_CR_CAP_HI:
1856 /* MSI-X, so ignore */
1859 /* MSI-X, so ignore */
1862 ccreg = (uint32_t)value;
1864 DPRINTF(("%s NVME_CR_CC en %x css %x shn %x iosqes %u "
1867 NVME_CC_GET_EN(ccreg), NVME_CC_GET_CSS(ccreg),
1868 NVME_CC_GET_SHN(ccreg), NVME_CC_GET_IOSQES(ccreg),
1869 NVME_CC_GET_IOCQES(ccreg)));
1871 if (NVME_CC_GET_SHN(ccreg)) {
1872 /* perform shutdown - flush out data to backend */
1873 sc->regs.csts &= ~(NVME_CSTS_REG_SHST_MASK <<
1874 NVME_CSTS_REG_SHST_SHIFT);
1875 sc->regs.csts |= NVME_SHST_COMPLETE <<
1876 NVME_CSTS_REG_SHST_SHIFT;
1878 if (NVME_CC_GET_EN(ccreg) != NVME_CC_GET_EN(sc->regs.cc)) {
1879 if (NVME_CC_GET_EN(ccreg) == 0)
1880 /* transition 1-> causes controller reset */
1881 pci_nvme_reset_locked(sc);
1883 pci_nvme_init_controller(ctx, sc);
1886 /* Insert the iocqes, iosqes and en bits from the write */
1887 sc->regs.cc &= ~NVME_CC_WRITE_MASK;
1888 sc->regs.cc |= ccreg & NVME_CC_WRITE_MASK;
1889 if (NVME_CC_GET_EN(ccreg) == 0) {
1890 /* Insert the ams, mps and css bit fields */
1891 sc->regs.cc &= ~NVME_CC_NEN_WRITE_MASK;
1892 sc->regs.cc |= ccreg & NVME_CC_NEN_WRITE_MASK;
1893 sc->regs.csts &= ~NVME_CSTS_RDY;
1894 } else if (sc->pending_ios == 0) {
1895 sc->regs.csts |= NVME_CSTS_RDY;
1901 /* ignore writes; don't support subsystem reset */
1904 sc->regs.aqa = (uint32_t)value;
1906 case NVME_CR_ASQ_LOW:
1907 sc->regs.asq = (sc->regs.asq & (0xFFFFFFFF00000000)) |
1908 (0xFFFFF000 & value);
1910 case NVME_CR_ASQ_HI:
1911 sc->regs.asq = (sc->regs.asq & (0x00000000FFFFFFFF)) |
1914 case NVME_CR_ACQ_LOW:
1915 sc->regs.acq = (sc->regs.acq & (0xFFFFFFFF00000000)) |
1916 (0xFFFFF000 & value);
1918 case NVME_CR_ACQ_HI:
1919 sc->regs.acq = (sc->regs.acq & (0x00000000FFFFFFFF)) |
1923 DPRINTF(("%s unknown offset 0x%lx, value 0x%lx size %d",
1924 __func__, offset, value, size));
1926 pthread_mutex_unlock(&sc->mtx);
1930 pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1931 int baridx, uint64_t offset, int size, uint64_t value)
1933 struct pci_nvme_softc* sc = pi->pi_arg;
1935 if (baridx == pci_msix_table_bar(pi) ||
1936 baridx == pci_msix_pba_bar(pi)) {
1937 DPRINTF(("nvme-write baridx %d, msix: off 0x%lx, size %d, "
1938 " value 0x%lx", baridx, offset, size, value));
1940 pci_emul_msix_twrite(pi, offset, size, value);
1946 pci_nvme_write_bar_0(ctx, sc, offset, size, value);
1950 DPRINTF(("%s unknown baridx %d, val 0x%lx",
1951 __func__, baridx, value));
1955 static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc,
1956 uint64_t offset, int size)
1960 pci_nvme_bar0_reg_dumps(__func__, offset, 0);
1962 if (offset < NVME_DOORBELL_OFFSET) {
1963 void *p = &(sc->regs);
1964 pthread_mutex_lock(&sc->mtx);
1965 memcpy(&value, (void *)((uintptr_t)p + offset), size);
1966 pthread_mutex_unlock(&sc->mtx);
1969 WPRINTF(("pci_nvme: read invalid offset %ld", offset));
1980 value &= 0xFFFFFFFF;
1984 DPRINTF((" nvme-read offset 0x%lx, size %d -> value 0x%x",
1985 offset, size, (uint32_t)value));
1993 pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1994 uint64_t offset, int size)
1996 struct pci_nvme_softc* sc = pi->pi_arg;
1998 if (baridx == pci_msix_table_bar(pi) ||
1999 baridx == pci_msix_pba_bar(pi)) {
2000 DPRINTF(("nvme-read bar: %d, msix: regoff 0x%lx, size %d",
2001 baridx, offset, size));
2003 return pci_emul_msix_tread(pi, offset, size);
2008 return pci_nvme_read_bar_0(sc, offset, size);
2011 DPRINTF(("unknown bar %d, 0x%lx", baridx, offset));
2019 pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts)
2021 char bident[sizeof("XX:X:X")];
2022 char *uopt, *xopts, *config;
2026 sc->max_queues = NVME_QUEUES;
2027 sc->max_qentries = NVME_MAX_QENTRIES;
2028 sc->ioslots = NVME_IOSLOTS;
2029 sc->num_squeues = sc->max_queues;
2030 sc->num_cqueues = sc->max_queues;
2031 sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
2034 uopt = strdup(opts);
2036 snprintf(sc->ctrldata.sn, sizeof(sc->ctrldata.sn),
2037 "NVME-%d-%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
2038 for (xopts = strtok(uopt, ",");
2040 xopts = strtok(NULL, ",")) {
2042 if ((config = strchr(xopts, '=')) != NULL)
2045 if (!strcmp("maxq", xopts)) {
2046 sc->max_queues = atoi(config);
2047 } else if (!strcmp("qsz", xopts)) {
2048 sc->max_qentries = atoi(config);
2049 } else if (!strcmp("ioslots", xopts)) {
2050 sc->ioslots = atoi(config);
2051 } else if (!strcmp("sectsz", xopts)) {
2052 sectsz = atoi(config);
2053 } else if (!strcmp("ser", xopts)) {
2055 * This field indicates the Product Serial Number in
2056 * 7-bit ASCII, unused bytes should be space characters.
2059 cpywithpad((char *)sc->ctrldata.sn,
2060 sizeof(sc->ctrldata.sn), config, ' ');
2061 } else if (!strcmp("ram", xopts)) {
2062 uint64_t sz = strtoull(&xopts[4], NULL, 10);
2064 sc->nvstore.type = NVME_STOR_RAM;
2065 sc->nvstore.size = sz * 1024 * 1024;
2066 sc->nvstore.ctx = calloc(1, sc->nvstore.size);
2067 sc->nvstore.sectsz = 4096;
2068 sc->nvstore.sectsz_bits = 12;
2069 if (sc->nvstore.ctx == NULL) {
2070 perror("Unable to allocate RAM");
2074 } else if (!strcmp("eui64", xopts)) {
2075 sc->nvstore.eui64 = htobe64(strtoull(config, NULL, 0));
2076 } else if (!strcmp("dsm", xopts)) {
2077 if (!strcmp("auto", config))
2078 sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
2079 else if (!strcmp("enable", config))
2080 sc->dataset_management = NVME_DATASET_MANAGEMENT_ENABLE;
2081 else if (!strcmp("disable", config))
2082 sc->dataset_management = NVME_DATASET_MANAGEMENT_DISABLE;
2083 } else if (optidx == 0) {
2084 snprintf(bident, sizeof(bident), "%d:%d",
2085 sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
2086 sc->nvstore.ctx = blockif_open(xopts, bident);
2087 if (sc->nvstore.ctx == NULL) {
2088 perror("Could not open backing file");
2092 sc->nvstore.type = NVME_STOR_BLOCKIF;
2093 sc->nvstore.size = blockif_size(sc->nvstore.ctx);
2095 EPRINTLN("Invalid option %s", xopts);
2104 if (sc->nvstore.ctx == NULL || sc->nvstore.size == 0) {
2105 EPRINTLN("backing store not specified");
2108 if (sectsz == 512 || sectsz == 4096 || sectsz == 8192)
2109 sc->nvstore.sectsz = sectsz;
2110 else if (sc->nvstore.type != NVME_STOR_RAM)
2111 sc->nvstore.sectsz = blockif_sectsz(sc->nvstore.ctx);
2112 for (sc->nvstore.sectsz_bits = 9;
2113 (1 << sc->nvstore.sectsz_bits) < sc->nvstore.sectsz;
2114 sc->nvstore.sectsz_bits++);
2116 if (sc->max_queues <= 0 || sc->max_queues > NVME_QUEUES)
2117 sc->max_queues = NVME_QUEUES;
2119 if (sc->max_qentries <= 0) {
2120 EPRINTLN("Invalid qsz option");
2123 if (sc->ioslots <= 0) {
2124 EPRINTLN("Invalid ioslots option");
2132 pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2134 struct pci_nvme_softc *sc;
2135 uint32_t pci_membar_sz;
2140 sc = calloc(1, sizeof(struct pci_nvme_softc));
2144 error = pci_nvme_parse_opts(sc, opts);
2150 STAILQ_INIT(&sc->ioreqs_free);
2151 sc->ioreqs = calloc(sc->ioslots, sizeof(struct pci_nvme_ioreq));
2152 for (int i = 0; i < sc->ioslots; i++) {
2153 STAILQ_INSERT_TAIL(&sc->ioreqs_free, &sc->ioreqs[i], link);
2154 pthread_mutex_init(&sc->ioreqs[i].mtx, NULL);
2155 pthread_cond_init(&sc->ioreqs[i].cv, NULL);
2157 sc->intr_coales_aggr_thresh = 1;
2159 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0A0A);
2160 pci_set_cfgdata16(pi, PCIR_VENDOR, 0xFB5D);
2161 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2162 pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_NVM);
2163 pci_set_cfgdata8(pi, PCIR_PROGIF,
2164 PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0);
2167 * Allocate size of NVMe registers + doorbell space for all queues.
2169 * The specification requires a minimum memory I/O window size of 16K.
2170 * The Windows driver will refuse to start a device with a smaller
2173 pci_membar_sz = sizeof(struct nvme_registers) +
2174 2 * sizeof(uint32_t) * (sc->max_queues + 1);
2175 pci_membar_sz = MAX(pci_membar_sz, NVME_MMIO_SPACE_MIN);
2177 DPRINTF(("nvme membar size: %u", pci_membar_sz));
2179 error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM64, pci_membar_sz);
2181 WPRINTF(("%s pci alloc mem bar failed", __func__));
2185 error = pci_emul_add_msixcap(pi, sc->max_queues + 1, NVME_MSIX_BAR);
2187 WPRINTF(("%s pci add msixcap failed", __func__));
2191 error = pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_INT_EP);
2193 WPRINTF(("%s pci add Express capability failed", __func__));
2197 pthread_mutex_init(&sc->mtx, NULL);
2198 sem_init(&sc->iosemlock, 0, sc->ioslots);
2202 * Controller data depends on Namespace data so initialize Namespace
2205 pci_nvme_init_nsdata(sc, &sc->nsdata, 1, &sc->nvstore);
2206 pci_nvme_init_ctrldata(sc);
2207 pci_nvme_init_logpages(sc);
2209 pci_lintr_request(pi);
2216 struct pci_devemu pci_de_nvme = {
2218 .pe_init = pci_nvme_init,
2219 .pe_barwrite = pci_nvme_write,
2220 .pe_barread = pci_nvme_read
2222 PCI_EMUL_SET(pci_de_nvme);