2 * Copyright (C) 2012 Intel Corporation
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <sys/types.h>
36 #define NVME_IDENTIFY_CONTROLLER _IOR('n', 0, struct nvme_controller_data)
37 #define NVME_IDENTIFY_NAMESPACE _IOR('n', 1, struct nvme_namespace_data)
38 #define NVME_IO_TEST _IOWR('n', 2, struct nvme_io_test)
39 #define NVME_BIO_TEST _IOWR('n', 4, struct nvme_io_test)
40 #define NVME_RESET_CONTROLLER _IO('n', 5)
41 #define NVME_PASSTHROUGH_CMD _IOWR('n', 6, struct nvme_pt_command)
44 * Use to mark a command to apply to all namespaces, or to retrieve global
47 #define NVME_GLOBAL_NAMESPACE_TAG ((uint32_t)0xFFFFFFFF)
49 union cap_lo_register {
52 /** maximum queue entries supported */
55 /** contiguous queues required */
58 /** arbitration mechanism supported */
61 uint32_t reserved1 : 5;
68 union cap_hi_register {
71 /** doorbell stride */
74 uint32_t reserved3 : 1;
76 /** command sets supported */
79 uint32_t css_reserved : 3;
80 uint32_t reserved2 : 7;
82 /** memory page size minimum */
85 /** memory page size maximum */
88 uint32_t reserved1 : 8;
98 uint32_t reserved1 : 3;
100 /** i/o command set selected */
103 /** memory page size */
106 /** arbitration mechanism selected */
109 /** shutdown notification */
112 /** i/o submission queue entry size */
115 /** i/o completion queue entry size */
118 uint32_t reserved2 : 8;
123 NVME_SHN_NORMAL = 0x1,
124 NVME_SHN_ABRUPT = 0x2,
127 union csts_register {
133 /** controller fatal status */
136 /** shutdown status */
139 uint32_t reserved1 : 28;
144 NVME_SHST_NORMAL = 0x0,
145 NVME_SHST_OCCURRING = 0x1,
146 NVME_SHST_COMPLETE = 0x2,
152 /** admin submission queue size */
155 uint32_t reserved1 : 4;
157 /** admin completion queue size */
160 uint32_t reserved2 : 4;
164 struct nvme_registers
166 /** controller capabilities */
167 union cap_lo_register cap_lo;
168 union cap_hi_register cap_hi;
170 uint32_t vs; /* version */
171 uint32_t intms; /* interrupt mask set */
172 uint32_t intmc; /* interrupt mask clear */
174 /** controller configuration */
175 union cc_register cc;
178 uint32_t csts; /* controller status */
181 /** admin queue attributes */
182 union aqa_register aqa;
184 uint64_t asq; /* admin submission queue base addr */
185 uint64_t acq; /* admin completion queue base addr */
186 uint32_t reserved3[0x3f2];
189 uint32_t sq_tdbl; /* submission queue tail doorbell */
190 uint32_t cq_hdbl; /* completion queue head doorbell */
191 } doorbell[1] __packed;
197 uint16_t opc : 8; /* opcode */
198 uint16_t fuse : 2; /* fused operation */
200 uint16_t cid; /* command identifier */
203 uint32_t nsid; /* namespace identifier */
210 uint64_t mptr; /* metadata pointer */
213 uint64_t prp1; /* prp entry 1 */
216 uint64_t prp2; /* prp entry 2 */
219 uint32_t cdw10; /* command-specific */
220 uint32_t cdw11; /* command-specific */
221 uint32_t cdw12; /* command-specific */
222 uint32_t cdw13; /* command-specific */
223 uint32_t cdw14; /* command-specific */
224 uint32_t cdw15; /* command-specific */
229 uint16_t p : 1; /* phase tag */
230 uint16_t sc : 8; /* status code */
231 uint16_t sct : 3; /* status code type */
233 uint16_t m : 1; /* more */
234 uint16_t dnr : 1; /* do not retry */
237 struct nvme_completion {
240 uint32_t cdw0; /* command-specific */
246 uint16_t sqhd; /* submission queue head pointer */
247 uint16_t sqid; /* submission queue identifier */
250 uint16_t cid; /* command identifier */
251 struct nvme_status status;
254 struct nvme_dsm_range {
258 uint64_t starting_lba;
261 /* status code types */
262 enum nvme_status_code_type {
263 NVME_SCT_GENERIC = 0x0,
264 NVME_SCT_COMMAND_SPECIFIC = 0x1,
265 NVME_SCT_MEDIA_ERROR = 0x2,
266 /* 0x3-0x6 - reserved */
267 NVME_SCT_VENDOR_SPECIFIC = 0x7,
270 /* generic command status codes */
271 enum nvme_generic_command_status_code {
272 NVME_SC_SUCCESS = 0x00,
273 NVME_SC_INVALID_OPCODE = 0x01,
274 NVME_SC_INVALID_FIELD = 0x02,
275 NVME_SC_COMMAND_ID_CONFLICT = 0x03,
276 NVME_SC_DATA_TRANSFER_ERROR = 0x04,
277 NVME_SC_ABORTED_POWER_LOSS = 0x05,
278 NVME_SC_INTERNAL_DEVICE_ERROR = 0x06,
279 NVME_SC_ABORTED_BY_REQUEST = 0x07,
280 NVME_SC_ABORTED_SQ_DELETION = 0x08,
281 NVME_SC_ABORTED_FAILED_FUSED = 0x09,
282 NVME_SC_ABORTED_MISSING_FUSED = 0x0a,
283 NVME_SC_INVALID_NAMESPACE_OR_FORMAT = 0x0b,
284 NVME_SC_COMMAND_SEQUENCE_ERROR = 0x0c,
286 NVME_SC_LBA_OUT_OF_RANGE = 0x80,
287 NVME_SC_CAPACITY_EXCEEDED = 0x81,
288 NVME_SC_NAMESPACE_NOT_READY = 0x82,
291 /* command specific status codes */
292 enum nvme_command_specific_status_code {
293 NVME_SC_COMPLETION_QUEUE_INVALID = 0x00,
294 NVME_SC_INVALID_QUEUE_IDENTIFIER = 0x01,
295 NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED = 0x02,
296 NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED = 0x03,
297 /* 0x04 - reserved */
298 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED = 0x05,
299 NVME_SC_INVALID_FIRMWARE_SLOT = 0x06,
300 NVME_SC_INVALID_FIRMWARE_IMAGE = 0x07,
301 NVME_SC_INVALID_INTERRUPT_VECTOR = 0x08,
302 NVME_SC_INVALID_LOG_PAGE = 0x09,
303 NVME_SC_INVALID_FORMAT = 0x0a,
304 NVME_SC_FIRMWARE_REQUIRES_RESET = 0x0b,
306 NVME_SC_CONFLICTING_ATTRIBUTES = 0x80,
307 NVME_SC_INVALID_PROTECTION_INFO = 0x81,
308 NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE = 0x82,
311 /* media error status codes */
312 enum nvme_media_error_status_code {
313 NVME_SC_WRITE_FAULTS = 0x80,
314 NVME_SC_UNRECOVERED_READ_ERROR = 0x81,
315 NVME_SC_GUARD_CHECK_ERROR = 0x82,
316 NVME_SC_APPLICATION_TAG_CHECK_ERROR = 0x83,
317 NVME_SC_REFERENCE_TAG_CHECK_ERROR = 0x84,
318 NVME_SC_COMPARE_FAILURE = 0x85,
319 NVME_SC_ACCESS_DENIED = 0x86,
323 enum nvme_admin_opcode {
324 NVME_OPC_DELETE_IO_SQ = 0x00,
325 NVME_OPC_CREATE_IO_SQ = 0x01,
326 NVME_OPC_GET_LOG_PAGE = 0x02,
327 /* 0x03 - reserved */
328 NVME_OPC_DELETE_IO_CQ = 0x04,
329 NVME_OPC_CREATE_IO_CQ = 0x05,
330 NVME_OPC_IDENTIFY = 0x06,
331 /* 0x07 - reserved */
332 NVME_OPC_ABORT = 0x08,
333 NVME_OPC_SET_FEATURES = 0x09,
334 NVME_OPC_GET_FEATURES = 0x0a,
335 /* 0x0b - reserved */
336 NVME_OPC_ASYNC_EVENT_REQUEST = 0x0c,
337 /* 0x0d-0x0f - reserved */
338 NVME_OPC_FIRMWARE_ACTIVATE = 0x10,
339 NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD = 0x11,
341 NVME_OPC_FORMAT_NVM = 0x80,
342 NVME_OPC_SECURITY_SEND = 0x81,
343 NVME_OPC_SECURITY_RECEIVE = 0x82,
346 /* nvme nvm opcodes */
347 enum nvme_nvm_opcode {
348 NVME_OPC_FLUSH = 0x00,
349 NVME_OPC_WRITE = 0x01,
350 NVME_OPC_READ = 0x02,
351 /* 0x03 - reserved */
352 NVME_OPC_WRITE_UNCORRECTABLE = 0x04,
353 NVME_OPC_COMPARE = 0x05,
354 /* 0x06-0x07 - reserved */
355 NVME_OPC_DATASET_MANAGEMENT = 0x09,
359 /* 0x00 - reserved */
360 NVME_FEAT_ARBITRATION = 0x01,
361 NVME_FEAT_POWER_MANAGEMENT = 0x02,
362 NVME_FEAT_LBA_RANGE_TYPE = 0x03,
363 NVME_FEAT_TEMPERATURE_THRESHOLD = 0x04,
364 NVME_FEAT_ERROR_RECOVERY = 0x05,
365 NVME_FEAT_VOLATILE_WRITE_CACHE = 0x06,
366 NVME_FEAT_NUMBER_OF_QUEUES = 0x07,
367 NVME_FEAT_INTERRUPT_COALESCING = 0x08,
368 NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION = 0x09,
369 NVME_FEAT_WRITE_ATOMICITY = 0x0A,
370 NVME_FEAT_ASYNC_EVENT_CONFIGURATION = 0x0B,
371 /* 0x0C-0x7F - reserved */
372 NVME_FEAT_SOFTWARE_PROGRESS_MARKER = 0x80,
373 /* 0x81-0xBF - command set specific (reserved) */
374 /* 0xC0-0xFF - vendor specific */
377 enum nvme_dsm_attribute {
378 NVME_DSM_ATTR_INTEGRAL_READ = 0x1,
379 NVME_DSM_ATTR_INTEGRAL_WRITE = 0x2,
380 NVME_DSM_ATTR_DEALLOCATE = 0x4,
383 struct nvme_controller_data {
385 /* bytes 0-255: controller capabilities and features */
390 /** pci subsystem vendor id */
399 /** firmware revision */
402 /** recommended arbitration burst */
405 /** ieee oui identifier */
408 /** multi-interface capabilities */
411 /** maximum data transfer size */
414 uint8_t reserved1[178];
416 /* bytes 256-511: admin command set attributes */
418 /** optional admin command support */
420 /* supports security send/receive commands */
421 uint16_t security : 1;
423 /* supports format nvm command */
426 /* supports firmware activate/download commands */
427 uint16_t firmware : 1;
429 uint16_t oacs_rsvd : 13;
432 /** abort command limit */
435 /** asynchronous event request limit */
438 /** firmware updates */
440 /* first slot is read-only */
441 uint8_t slot1_ro : 1;
443 /* number of firmware slots */
444 uint8_t num_slots : 3;
446 uint8_t frmw_rsvd : 4;
449 /** log page attributes */
451 /* per namespace smart/health log page */
452 uint8_t ns_smart : 1;
454 uint8_t lpa_rsvd : 7;
457 /** error log page entries */
460 /** number of power states supported */
463 /** admin vendor specific command configuration */
465 /* admin vendor specific commands use spec format */
466 uint8_t spec_format : 1;
468 uint8_t avscc_rsvd : 7;
471 uint8_t reserved2[247];
473 /* bytes 512-703: nvm command set attributes */
475 /** submission queue entry size */
481 /** completion queue entry size */
487 uint8_t reserved3[2];
489 /** number of namespaces */
492 /** optional nvm command support */
494 uint16_t compare : 1;
495 uint16_t write_unc : 1;
497 uint16_t reserved: 13;
500 /** fused operation support */
503 /** format nvm attributes */
506 /** volatile write cache */
509 uint8_t reserved : 7;
512 /* TODO: flesh out remaining nvm command set attributes */
513 uint8_t reserved4[178];
515 /* bytes 704-2047: i/o command set attributes */
516 uint8_t reserved5[1344];
518 /* bytes 2048-3071: power state descriptors */
519 uint8_t reserved6[1024];
521 /* bytes 3072-4095: vendor specific */
522 uint8_t reserved7[1024];
523 } __packed __aligned(4);
525 struct nvme_namespace_data {
527 /** namespace size */
530 /** namespace capacity */
533 /** namespace utilization */
536 /** namespace features */
538 /** thin provisioning */
539 uint8_t thin_prov : 1;
540 uint8_t reserved1 : 7;
543 /** number of lba formats */
546 /** formatted lba size */
549 uint8_t extended : 1;
550 uint8_t reserved2 : 3;
553 /** metadata capabilities */
555 /* metadata can be transferred as part of data prp list */
556 uint8_t extended : 1;
558 /* metadata can be transferred with separate metadata pointer */
561 uint8_t reserved3 : 6;
564 /** end-to-end data protection capabilities */
566 /* protection information type 1 */
569 /* protection information type 2 */
572 /* protection information type 3 */
575 /* first eight bytes of metadata */
576 uint8_t md_start : 1;
578 /* last eight bytes of metadata */
582 /** end-to-end data protection type settings */
584 /* protection information type */
587 /* 1 == protection info transferred at start of metadata */
588 /* 0 == protection info transferred at end of metadata */
589 uint8_t md_start : 1;
591 uint8_t reserved4 : 4;
594 uint8_t reserved5[98];
596 /** lba format support */
604 /** relative performance */
607 uint32_t reserved6 : 6;
610 uint8_t reserved6[192];
612 uint8_t vendor_specific[3712];
613 } __packed __aligned(4);
617 /* 0x00 - reserved */
618 NVME_LOG_ERROR = 0x01,
619 NVME_LOG_HEALTH_INFORMATION = 0x02,
620 NVME_LOG_FIRMWARE_SLOT = 0x03,
621 /* 0x04-0x7F - reserved */
622 /* 0x80-0xBF - I/O command set specific */
623 /* 0xC0-0xFF - vendor specific */
626 struct nvme_error_information_entry {
628 uint64_t error_count;
631 struct nvme_status status;
632 uint16_t error_location;
635 uint8_t vendor_specific;
636 uint8_t reserved[35];
637 } __packed __aligned(4);
639 union nvme_critical_warning_state {
644 uint8_t available_spare : 1;
645 uint8_t temperature : 1;
646 uint8_t device_reliability : 1;
647 uint8_t read_only : 1;
648 uint8_t volatile_memory_backup : 1;
649 uint8_t reserved : 3;
653 struct nvme_health_information_page {
655 union nvme_critical_warning_state critical_warning;
657 uint16_t temperature;
658 uint8_t available_spare;
659 uint8_t available_spare_threshold;
660 uint8_t percentage_used;
662 uint8_t reserved[26];
665 * Note that the following are 128-bit values, but are
666 * defined as an array of 2 64-bit values.
668 /* Data Units Read is always in 512-byte units. */
669 uint64_t data_units_read[2];
670 /* Data Units Written is always in 512-byte units. */
671 uint64_t data_units_written[2];
672 /* For NVM command set, this includes Compare commands. */
673 uint64_t host_read_commands[2];
674 uint64_t host_write_commands[2];
675 /* Controller Busy Time is reported in minutes. */
676 uint64_t controller_busy_time[2];
677 uint64_t power_cycles[2];
678 uint64_t power_on_hours[2];
679 uint64_t unsafe_shutdowns[2];
680 uint64_t media_errors[2];
681 uint64_t num_error_info_log_entries[2];
683 uint8_t reserved2[320];
684 } __packed __aligned(4);
686 struct nvme_firmware_page {
689 uint8_t slot : 3; /* slot for current FW */
690 uint8_t reserved : 5;
694 uint64_t revision[7]; /* revisions for 7 slots */
695 uint8_t reserved2[448];
696 } __packed __aligned(4);
698 #define NVME_TEST_MAX_THREADS 128
700 struct nvme_io_test {
702 enum nvme_nvm_opcode opc;
704 uint32_t time; /* in seconds */
705 uint32_t num_threads;
707 uint32_t io_completed[NVME_TEST_MAX_THREADS];
710 enum nvme_io_test_flags {
713 * Specifies whether dev_refthread/dev_relthread should be
714 * called during NVME_BIO_TEST. Ignored for other test
717 NVME_TEST_FLAG_REFTHREAD = 0x1,
720 struct nvme_pt_command {
723 * cmd is used to specify a passthrough command to a controller or
726 * The following fields from cmd may be specified by the caller:
728 * * nsid (namespace id) - for admin commands only
731 * Remaining fields must be set to 0 by the caller.
733 struct nvme_command cmd;
736 * cpl returns completion status for the passthrough command
739 * The following fields will be filled out by the driver, for
740 * consumption by the caller:
742 * * status (except for phase)
744 * Remaining fields will be set to 0 by the driver.
746 struct nvme_completion cpl;
748 /* buf is the data buffer associated with this passthrough command. */
752 * len is the length of the data buffer associated with this
753 * passthrough command.
758 * is_read = 1 if the passthrough command will read data into the
761 * is_read = 0 if the passthrough command will write data into the
767 * driver_lock is used by the driver only. It must be set to 0
770 struct mtx * driver_lock;
773 #define nvme_completion_is_error(cpl) \
774 ((cpl)->status.sc != 0 || (cpl)->status.sct != 0)
780 struct nvme_namespace;
781 struct nvme_controller;
782 struct nvme_consumer;
784 typedef void (*nvme_cb_fn_t)(void *, const struct nvme_completion *);
786 typedef void *(*nvme_cons_ns_fn_t)(struct nvme_namespace *, void *);
787 typedef void *(*nvme_cons_ctrlr_fn_t)(struct nvme_controller *);
788 typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *,
789 uint32_t, void *, uint32_t);
790 typedef void (*nvme_cons_fail_fn_t)(void *);
792 enum nvme_namespace_flags {
793 NVME_NS_DEALLOCATE_SUPPORTED = 0x1,
794 NVME_NS_FLUSH_SUPPORTED = 0x2,
797 int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
798 struct nvme_pt_command *pt,
799 uint32_t nsid, int is_user_buffer,
802 /* Admin functions */
803 void nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr,
804 uint8_t feature, uint32_t cdw11,
805 void *payload, uint32_t payload_size,
806 nvme_cb_fn_t cb_fn, void *cb_arg);
807 void nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr,
808 uint8_t feature, uint32_t cdw11,
809 void *payload, uint32_t payload_size,
810 nvme_cb_fn_t cb_fn, void *cb_arg);
811 void nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr,
812 uint8_t log_page, uint32_t nsid,
813 void *payload, uint32_t payload_size,
814 nvme_cb_fn_t cb_fn, void *cb_arg);
816 /* NVM I/O functions */
817 int nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload,
818 uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
820 int nvme_ns_cmd_write_bio(struct nvme_namespace *ns, struct bio *bp,
821 nvme_cb_fn_t cb_fn, void *cb_arg);
822 int nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload,
823 uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
825 int nvme_ns_cmd_read_bio(struct nvme_namespace *ns, struct bio *bp,
826 nvme_cb_fn_t cb_fn, void *cb_arg);
827 int nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload,
828 uint8_t num_ranges, nvme_cb_fn_t cb_fn,
830 int nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn,
833 /* Registration functions */
834 struct nvme_consumer * nvme_register_consumer(nvme_cons_ns_fn_t ns_fn,
835 nvme_cons_ctrlr_fn_t ctrlr_fn,
836 nvme_cons_async_fn_t async_fn,
837 nvme_cons_fail_fn_t fail_fn);
838 void nvme_unregister_consumer(struct nvme_consumer *consumer);
840 /* Controller helper functions */
841 device_t nvme_ctrlr_get_device(struct nvme_controller *ctrlr);
842 const struct nvme_controller_data *
843 nvme_ctrlr_get_data(struct nvme_controller *ctrlr);
845 /* Namespace helper functions */
846 uint32_t nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns);
847 uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns);
848 uint64_t nvme_ns_get_num_sectors(struct nvme_namespace *ns);
849 uint64_t nvme_ns_get_size(struct nvme_namespace *ns);
850 uint32_t nvme_ns_get_flags(struct nvme_namespace *ns);
851 const char * nvme_ns_get_serial_number(struct nvme_namespace *ns);
852 const char * nvme_ns_get_model_number(struct nvme_namespace *ns);
853 const struct nvme_namespace_data *
854 nvme_ns_get_data(struct nvme_namespace *ns);
856 int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
861 #endif /* __NVME_H__ */