2 * Copyright (c) 2017, 2018 The FreeBSD Foundation
4 * Copyright (c) 2018, 2019 Intel Corporation
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
37 #include <sys/param.h>
38 #include <sys/systm.h>
42 #include <sys/devicestat.h>
45 #include <sys/kernel.h>
46 #include <sys/kthread.h>
47 #include <sys/limits.h>
49 #include <sys/malloc.h>
50 #include <sys/module.h>
51 #include <sys/rwlock.h>
52 #include <sys/sglist.h>
55 #include <geom/geom.h>
56 #include <geom/geom_int.h>
57 #include <machine/vmparam.h>
59 #include <vm/vm_object.h>
60 #include <vm/vm_page.h>
61 #include <vm/vm_pager.h>
62 #include <contrib/dev/acpica/include/acpi.h>
63 #include <contrib/dev/acpica/include/accommon.h>
64 #include <contrib/dev/acpica/include/acuuid.h>
65 #include <dev/acpica/acpivar.h>
66 #include <dev/nvdimm/nvdimm_var.h>
68 #define UUID_INITIALIZER_VOLATILE_MEMORY \
69 {0x7305944f,0xfdda,0x44e3,0xb1,0x6c,{0x3f,0x22,0xd2,0x52,0xe5,0xd0}}
70 #define UUID_INITIALIZER_PERSISTENT_MEMORY \
71 {0x66f0d379,0xb4f3,0x4074,0xac,0x43,{0x0d,0x33,0x18,0xb7,0x8c,0xdb}}
72 #define UUID_INITIALIZER_CONTROL_REGION \
73 {0x92f701f6,0x13b4,0x405d,0x91,0x0b,{0x29,0x93,0x67,0xe8,0x23,0x4c}}
74 #define UUID_INITIALIZER_DATA_REGION \
75 {0x91af0530,0x5d86,0x470e,0xa6,0xb0,{0x0a,0x2d,0xb9,0x40,0x82,0x49}}
76 #define UUID_INITIALIZER_VOLATILE_VIRTUAL_DISK \
77 {0x77ab535a,0x45fc,0x624b,0x55,0x60,{0xf7,0xb2,0x81,0xd1,0xf9,0x6e}}
78 #define UUID_INITIALIZER_VOLATILE_VIRTUAL_CD \
79 {0x3d5abd30,0x4175,0x87ce,0x6d,0x64,{0xd2,0xad,0xe5,0x23,0xc4,0xbb}}
80 #define UUID_INITIALIZER_PERSISTENT_VIRTUAL_DISK \
81 {0x5cea02c9,0x4d07,0x69d3,0x26,0x9f,{0x44,0x96,0xfb,0xe0,0x96,0xf9}}
82 #define UUID_INITIALIZER_PERSISTENT_VIRTUAL_CD \
83 {0x08018188,0x42cd,0xbb48,0x10,0x0f,{0x53,0x87,0xd5,0x3d,0xed,0x3d}}
85 struct SPA_mapping *spa_mappings;
89 nvdimm_spa_count(void *nfitsubtbl __unused, void *arg)
98 static struct nvdimm_SPA_uuid_list_elm {
101 const bool u_usr_acc;
102 } nvdimm_SPA_uuid_list[] = {
103 [SPA_TYPE_VOLATILE_MEMORY] = {
104 .u_name = "VOLA MEM ",
105 .u_id = UUID_INITIALIZER_VOLATILE_MEMORY,
108 [SPA_TYPE_PERSISTENT_MEMORY] = {
109 .u_name = "PERS MEM",
110 .u_id = UUID_INITIALIZER_PERSISTENT_MEMORY,
113 [SPA_TYPE_CONTROL_REGION] = {
114 .u_name = "CTRL RG ",
115 .u_id = UUID_INITIALIZER_CONTROL_REGION,
118 [SPA_TYPE_DATA_REGION] = {
119 .u_name = "DATA RG ",
120 .u_id = UUID_INITIALIZER_DATA_REGION,
123 [SPA_TYPE_VOLATILE_VIRTUAL_DISK] = {
124 .u_name = "VIRT DSK",
125 .u_id = UUID_INITIALIZER_VOLATILE_VIRTUAL_DISK,
128 [SPA_TYPE_VOLATILE_VIRTUAL_CD] = {
129 .u_name = "VIRT CD ",
130 .u_id = UUID_INITIALIZER_VOLATILE_VIRTUAL_CD,
133 [SPA_TYPE_PERSISTENT_VIRTUAL_DISK] = {
135 .u_id = UUID_INITIALIZER_PERSISTENT_VIRTUAL_DISK,
138 [SPA_TYPE_PERSISTENT_VIRTUAL_CD] = {
140 .u_id = UUID_INITIALIZER_PERSISTENT_VIRTUAL_CD,
145 enum SPA_mapping_type
146 nvdimm_spa_type_from_uuid(struct uuid *uuid)
150 for (j = 0; j < nitems(nvdimm_SPA_uuid_list); j++) {
151 if (uuidcmp(uuid, &nvdimm_SPA_uuid_list[j].u_id) != 0)
155 return (SPA_TYPE_UNKNOWN);
159 nvdimm_spa_memattr(struct SPA_mapping *spa)
163 if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WB) != 0)
164 mode = VM_MEMATTR_WRITE_BACK;
165 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WT) != 0)
166 mode = VM_MEMATTR_WRITE_THROUGH;
167 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WC) != 0)
168 mode = VM_MEMATTR_WRITE_COMBINING;
169 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WP) != 0)
170 mode = VM_MEMATTR_WRITE_PROTECTED;
171 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_UC) != 0)
172 mode = VM_MEMATTR_UNCACHEABLE;
175 printf("SPA%d mapping attr unsupported\n",
177 mode = VM_MEMATTR_UNCACHEABLE;
183 nvdimm_spa_uio(struct SPA_mapping *spa, struct uio *uio)
185 struct vm_page m, *ma;
191 if (spa->spa_kva == NULL) {
192 mattr = nvdimm_spa_memattr(spa);
193 vm_page_initfake(&m, 0, mattr);
195 while (uio->uio_resid > 0) {
196 if (uio->uio_offset >= spa->spa_len)
198 off = spa->spa_phys_base + uio->uio_offset;
199 vm_page_updatefake(&m, trunc_page(off), mattr);
201 if (n > uio->uio_resid)
203 error = uiomove_fromphys(&ma, off & PAGE_MASK, n, uio);
208 while (uio->uio_resid > 0) {
209 if (uio->uio_offset >= spa->spa_len)
212 if (n > uio->uio_resid)
214 if (uio->uio_offset + n > spa->spa_len)
215 n = spa->spa_len - uio->uio_offset;
216 error = uiomove((char *)spa->spa_kva + uio->uio_offset,
226 nvdimm_spa_rw(struct cdev *dev, struct uio *uio, int ioflag)
229 return (nvdimm_spa_uio(dev->si_drv1, uio));
233 nvdimm_spa_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
236 struct SPA_mapping *spa;
242 case DIOCGSECTORSIZE:
243 *(u_int *)data = DEV_BSIZE;
246 *(off_t *)data = spa->spa_len;
256 nvdimm_spa_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size,
257 vm_object_t *objp, int nprot)
259 struct SPA_mapping *spa;
262 if (spa->spa_obj == NULL)
264 if (*offset >= spa->spa_len || *offset + size < *offset ||
265 *offset + size > spa->spa_len)
267 vm_object_reference(spa->spa_obj);
268 *objp = spa->spa_obj;
272 static struct cdevsw spa_cdevsw = {
273 .d_version = D_VERSION,
275 .d_name = "nvdimm_spa",
276 .d_read = nvdimm_spa_rw,
277 .d_write = nvdimm_spa_rw,
278 .d_ioctl = nvdimm_spa_ioctl,
279 .d_mmap_single = nvdimm_spa_mmap_single,
283 nvdimm_spa_g_all_unmapped(struct SPA_mapping *spa, struct bio *bp,
286 struct vm_page maa[bp->bio_ma_n];
287 vm_page_t ma[bp->bio_ma_n];
291 mattr = nvdimm_spa_memattr(spa);
292 for (i = 0; i < nitems(ma); i++) {
294 vm_page_initfake(&maa[i], spa->spa_phys_base +
295 trunc_page(bp->bio_offset) + PAGE_SIZE * i, mattr);
299 pmap_copy_pages(ma, bp->bio_offset & PAGE_MASK, bp->bio_ma,
300 bp->bio_ma_offset, bp->bio_length);
302 pmap_copy_pages(bp->bio_ma, bp->bio_ma_offset, ma,
303 bp->bio_offset & PAGE_MASK, bp->bio_length);
307 nvdimm_spa_g_thread(void *arg)
309 struct SPA_mapping *spa;
317 mtx_lock(&spa->spa_g_mtx);
319 bp = bioq_takefirst(&spa->spa_g_queue);
322 msleep(&spa->spa_g_queue, &spa->spa_g_mtx, PRIBIO,
324 if (!spa->spa_g_proc_run) {
325 spa->spa_g_proc_exiting = true;
326 wakeup(&spa->spa_g_queue);
327 mtx_unlock(&spa->spa_g_mtx);
332 mtx_unlock(&spa->spa_g_mtx);
333 if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
334 bp->bio_cmd != BIO_FLUSH) {
340 if (bp->bio_cmd == BIO_FLUSH) {
341 if (spa->spa_kva != NULL) {
342 pmap_large_map_wb(spa->spa_kva, spa->spa_len);
344 pmap_flush_cache_phys_range(
345 (vm_paddr_t)spa->spa_phys_base,
346 (vm_paddr_t)spa->spa_phys_base +
347 spa->spa_len, nvdimm_spa_memattr(spa));
355 if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
356 if (spa->spa_kva != NULL) {
357 aiovec.iov_base = (char *)spa->spa_kva +
359 aiovec.iov_len = bp->bio_length;
360 auio.uio_iov = &aiovec;
362 auio.uio_resid = bp->bio_length;
363 auio.uio_offset = bp->bio_offset;
364 auio.uio_segflg = UIO_SYSSPACE;
365 auio.uio_rw = bp->bio_cmd == BIO_READ ?
366 UIO_WRITE : UIO_READ;
367 auio.uio_td = curthread;
368 error = uiomove_fromphys(bp->bio_ma,
369 bp->bio_ma_offset, bp->bio_length, &auio);
370 bp->bio_resid = auio.uio_resid;
372 nvdimm_spa_g_all_unmapped(spa, bp, bp->bio_cmd);
373 bp->bio_resid = bp->bio_length;
377 aiovec.iov_base = bp->bio_data;
378 aiovec.iov_len = bp->bio_length;
379 auio.uio_iov = &aiovec;
381 auio.uio_resid = bp->bio_length;
382 auio.uio_offset = bp->bio_offset;
383 auio.uio_segflg = UIO_SYSSPACE;
384 auio.uio_rw = bp->bio_cmd == BIO_READ ? UIO_READ :
386 auio.uio_td = curthread;
387 error = nvdimm_spa_uio(spa, &auio);
388 bp->bio_resid = auio.uio_resid;
390 bp->bio_bcount = bp->bio_length;
391 devstat_end_transaction_bio(spa->spa_g_devstat, bp);
393 bp->bio_completed = bp->bio_length;
394 g_io_deliver(bp, error);
399 nvdimm_spa_g_start(struct bio *bp)
401 struct SPA_mapping *spa;
403 spa = bp->bio_to->geom->softc;
404 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
405 mtx_lock(&spa->spa_g_stat_mtx);
406 devstat_start_transaction_bio(spa->spa_g_devstat, bp);
407 mtx_unlock(&spa->spa_g_stat_mtx);
409 mtx_lock(&spa->spa_g_mtx);
410 bioq_disksort(&spa->spa_g_queue, bp);
411 wakeup(&spa->spa_g_queue);
412 mtx_unlock(&spa->spa_g_mtx);
416 nvdimm_spa_g_access(struct g_provider *pp, int r, int w, int e)
422 static g_init_t nvdimm_spa_g_init;
423 static g_fini_t nvdimm_spa_g_fini;
425 struct g_class nvdimm_spa_g_class = {
427 .version = G_VERSION,
428 .start = nvdimm_spa_g_start,
429 .access = nvdimm_spa_g_access,
430 .init = nvdimm_spa_g_init,
431 .fini = nvdimm_spa_g_fini,
433 DECLARE_GEOM_CLASS(nvdimm_spa_g_class, g_spa);
436 nvdimm_spa_init_one(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr,
439 struct make_dev_args mda;
440 struct sglist *spa_sg;
443 spa->spa_type = spa_type;
444 spa->spa_domain = ((nfitaddr->Flags & ACPI_NFIT_PROXIMITY_VALID) != 0) ?
445 nfitaddr->ProximityDomain : -1;
446 spa->spa_nfit_idx = nfitaddr->RangeIndex;
447 spa->spa_phys_base = nfitaddr->Address;
448 spa->spa_len = nfitaddr->Length;
449 spa->spa_efi_mem_flags = nfitaddr->MemoryMapping;
451 printf("NVDIMM SPA%d base %#016jx len %#016jx %s fl %#jx\n",
453 (uintmax_t)spa->spa_phys_base, (uintmax_t)spa->spa_len,
454 nvdimm_SPA_uuid_list[spa_type].u_name,
455 spa->spa_efi_mem_flags);
457 if (!nvdimm_SPA_uuid_list[spa_type].u_usr_acc)
460 error1 = pmap_large_map(spa->spa_phys_base, spa->spa_len,
461 &spa->spa_kva, nvdimm_spa_memattr(spa));
463 printf("NVDIMM SPA%d cannot map into KVA, error %d\n",
464 spa->spa_nfit_idx, error1);
468 spa_sg = sglist_alloc(1, M_WAITOK);
469 error = sglist_append_phys(spa_sg, spa->spa_phys_base,
472 spa->spa_obj = vm_pager_allocate(OBJT_SG, spa_sg, spa->spa_len,
473 VM_PROT_ALL, 0, NULL);
474 if (spa->spa_obj == NULL) {
475 printf("NVDIMM SPA%d failed to alloc vm object",
480 printf("NVDIMM SPA%d failed to init sglist, error %d",
481 spa->spa_nfit_idx, error);
485 make_dev_args_init(&mda);
486 mda.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME;
487 mda.mda_devsw = &spa_cdevsw;
489 mda.mda_uid = UID_ROOT;
490 mda.mda_gid = GID_OPERATOR;
492 mda.mda_si_drv1 = spa;
493 error = make_dev_s(&mda, &spa->spa_dev, "nvdimm_spa%d",
496 printf("NVDIMM SPA%d cannot create devfs node, error %d\n",
497 spa->spa_nfit_idx, error);
502 bioq_init(&spa->spa_g_queue);
503 mtx_init(&spa->spa_g_mtx, "spag", NULL, MTX_DEF);
504 mtx_init(&spa->spa_g_stat_mtx, "spagst", NULL, MTX_DEF);
505 spa->spa_g_proc_run = true;
506 spa->spa_g_proc_exiting = false;
507 error = kproc_create(nvdimm_spa_g_thread, spa, &spa->spa_g_proc, 0, 0,
508 "g_spa%d", spa->spa_nfit_idx);
510 printf("NVDIMM SPA%d cannot create geom worker, error %d\n",
511 spa->spa_nfit_idx, error);
516 spa->spa_g = g_new_geomf(&nvdimm_spa_g_class, "spa%d",
518 spa->spa_g->softc = spa;
519 spa->spa_p = g_new_providerf(spa->spa_g, "spa%d",
521 spa->spa_p->mediasize = spa->spa_len;
522 spa->spa_p->sectorsize = DEV_BSIZE;
523 spa->spa_p->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE |
524 G_PF_ACCEPT_UNMAPPED;
525 g_error_provider(spa->spa_p, 0);
526 spa->spa_g_devstat = devstat_new_entry("spa", spa->spa_nfit_idx,
527 DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT,
528 DEVSTAT_PRIORITY_MAX);
534 nvdimm_spa_fini_one(struct SPA_mapping *spa)
537 mtx_lock(&spa->spa_g_mtx);
538 spa->spa_g_proc_run = false;
539 wakeup(&spa->spa_g_queue);
540 while (!spa->spa_g_proc_exiting)
541 msleep(&spa->spa_g_queue, &spa->spa_g_mtx, PRIBIO, "spa_e", 0);
542 mtx_unlock(&spa->spa_g_mtx);
543 if (spa->spa_g != NULL) {
545 g_wither_geom(spa->spa_g, ENXIO);
550 if (spa->spa_g_devstat != NULL) {
551 devstat_remove_entry(spa->spa_g_devstat);
552 spa->spa_g_devstat = NULL;
554 if (spa->spa_dev != NULL) {
555 destroy_dev(spa->spa_dev);
558 vm_object_deallocate(spa->spa_obj);
559 if (spa->spa_kva != NULL) {
560 pmap_large_unmap(spa->spa_kva, spa->spa_len);
563 mtx_destroy(&spa->spa_g_mtx);
564 mtx_destroy(&spa->spa_g_stat_mtx);
568 nvdimm_spa_parse(void *nfitsubtbl, void *arg)
570 ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr;
571 struct SPA_mapping *spa;
572 enum SPA_mapping_type spa_type;
576 spa = &spa_mappings[(*i)++];
577 nfitaddr = nfitsubtbl;
578 spa_type = nvdimm_spa_type_from_uuid(
579 (struct uuid *)&nfitaddr->RangeGuid);
580 if (spa_type == SPA_TYPE_UNKNOWN) {
581 printf("Unknown SPA UUID %d ", nfitaddr->RangeIndex);
582 printf_uuid((struct uuid *)&nfitaddr->RangeGuid);
586 error = nvdimm_spa_init_one(spa, nfitaddr, spa_type);
588 nvdimm_spa_fini_one(spa);
593 nvdimm_spa_init1(ACPI_TABLE_NFIT *nfitbl)
597 error = nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS,
598 nvdimm_spa_count, &spa_mappings_cnt);
601 spa_mappings = malloc(sizeof(struct SPA_mapping) * spa_mappings_cnt,
602 M_NVDIMM, M_WAITOK | M_ZERO);
604 error = nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS,
605 nvdimm_spa_parse, &i);
607 free(spa_mappings, M_NVDIMM);
615 nvdimm_spa_g_init(struct g_class *mp __unused)
617 ACPI_TABLE_NFIT *nfitbl;
621 spa_mappings_cnt = 0;
623 if (acpi_disabled("nvdimm"))
625 status = AcpiGetTable(ACPI_SIG_NFIT, 1, (ACPI_TABLE_HEADER **)&nfitbl);
626 if (ACPI_FAILURE(status)) {
628 printf("nvdimm_spa_g_init: cannot find NFIT\n");
631 error = nvdimm_spa_init1(nfitbl);
633 printf("nvdimm_spa_g_init: error %d\n", error);
634 AcpiPutTable(&nfitbl->Header);
638 nvdimm_spa_g_fini(struct g_class *mp __unused)
642 if (spa_mappings == NULL)
644 for (i = 0; i < spa_mappings_cnt; i++)
645 nvdimm_spa_fini_one(&spa_mappings[i]);
646 free(spa_mappings, M_NVDIMM);
648 spa_mappings_cnt = 0;