2 * Copyright (c) 2017, 2018 The FreeBSD Foundation
4 * Copyright (c) 2018, 2019 Intel Corporation
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
37 #include <sys/param.h>
38 #include <sys/systm.h>
42 #include <sys/devicestat.h>
45 #include <sys/kernel.h>
46 #include <sys/kthread.h>
47 #include <sys/limits.h>
49 #include <sys/malloc.h>
50 #include <sys/module.h>
51 #include <sys/rwlock.h>
52 #include <sys/sglist.h>
55 #include <geom/geom.h>
56 #include <geom/geom_int.h>
57 #include <machine/vmparam.h>
59 #include <vm/vm_object.h>
60 #include <vm/vm_page.h>
61 #include <vm/vm_pager.h>
62 #include <contrib/dev/acpica/include/acpi.h>
63 #include <contrib/dev/acpica/include/accommon.h>
64 #include <contrib/dev/acpica/include/acuuid.h>
65 #include <dev/acpica/acpivar.h>
66 #include <dev/nvdimm/nvdimm_var.h>
68 #define UUID_INITIALIZER_VOLATILE_MEMORY \
69 {0x7305944f,0xfdda,0x44e3,0xb1,0x6c,{0x3f,0x22,0xd2,0x52,0xe5,0xd0}}
70 #define UUID_INITIALIZER_PERSISTENT_MEMORY \
71 {0x66f0d379,0xb4f3,0x4074,0xac,0x43,{0x0d,0x33,0x18,0xb7,0x8c,0xdb}}
72 #define UUID_INITIALIZER_CONTROL_REGION \
73 {0x92f701f6,0x13b4,0x405d,0x91,0x0b,{0x29,0x93,0x67,0xe8,0x23,0x4c}}
74 #define UUID_INITIALIZER_DATA_REGION \
75 {0x91af0530,0x5d86,0x470e,0xa6,0xb0,{0x0a,0x2d,0xb9,0x40,0x82,0x49}}
76 #define UUID_INITIALIZER_VOLATILE_VIRTUAL_DISK \
77 {0x77ab535a,0x45fc,0x624b,0x55,0x60,{0xf7,0xb2,0x81,0xd1,0xf9,0x6e}}
78 #define UUID_INITIALIZER_VOLATILE_VIRTUAL_CD \
79 {0x3d5abd30,0x4175,0x87ce,0x6d,0x64,{0xd2,0xad,0xe5,0x23,0xc4,0xbb}}
80 #define UUID_INITIALIZER_PERSISTENT_VIRTUAL_DISK \
81 {0x5cea02c9,0x4d07,0x69d3,0x26,0x9f,{0x44,0x96,0xfb,0xe0,0x96,0xf9}}
82 #define UUID_INITIALIZER_PERSISTENT_VIRTUAL_CD \
83 {0x08018188,0x42cd,0xbb48,0x10,0x0f,{0x53,0x87,0xd5,0x3d,0xed,0x3d}}
85 static struct nvdimm_SPA_uuid_list_elm {
89 } nvdimm_SPA_uuid_list[] = {
90 [SPA_TYPE_VOLATILE_MEMORY] = {
91 .u_name = "VOLA MEM ",
92 .u_id = UUID_INITIALIZER_VOLATILE_MEMORY,
95 [SPA_TYPE_PERSISTENT_MEMORY] = {
97 .u_id = UUID_INITIALIZER_PERSISTENT_MEMORY,
100 [SPA_TYPE_CONTROL_REGION] = {
101 .u_name = "CTRL RG ",
102 .u_id = UUID_INITIALIZER_CONTROL_REGION,
105 [SPA_TYPE_DATA_REGION] = {
106 .u_name = "DATA RG ",
107 .u_id = UUID_INITIALIZER_DATA_REGION,
110 [SPA_TYPE_VOLATILE_VIRTUAL_DISK] = {
111 .u_name = "VIRT DSK",
112 .u_id = UUID_INITIALIZER_VOLATILE_VIRTUAL_DISK,
115 [SPA_TYPE_VOLATILE_VIRTUAL_CD] = {
116 .u_name = "VIRT CD ",
117 .u_id = UUID_INITIALIZER_VOLATILE_VIRTUAL_CD,
120 [SPA_TYPE_PERSISTENT_VIRTUAL_DISK] = {
122 .u_id = UUID_INITIALIZER_PERSISTENT_VIRTUAL_DISK,
125 [SPA_TYPE_PERSISTENT_VIRTUAL_CD] = {
127 .u_id = UUID_INITIALIZER_PERSISTENT_VIRTUAL_CD,
132 enum SPA_mapping_type
133 nvdimm_spa_type_from_name(const char *name)
137 for (j = 0; j < nitems(nvdimm_SPA_uuid_list); j++) {
138 if (strcmp(name, nvdimm_SPA_uuid_list[j].u_name) != 0)
142 return (SPA_TYPE_UNKNOWN);
145 enum SPA_mapping_type
146 nvdimm_spa_type_from_uuid(struct uuid *uuid)
150 for (j = 0; j < nitems(nvdimm_SPA_uuid_list); j++) {
151 if (uuidcmp(uuid, &nvdimm_SPA_uuid_list[j].u_id) != 0)
155 return (SPA_TYPE_UNKNOWN);
159 nvdimm_spa_type_user_accessible(enum SPA_mapping_type spa_type)
162 if ((int)spa_type < 0 || spa_type >= nitems(nvdimm_SPA_uuid_list))
164 return (nvdimm_SPA_uuid_list[spa_type].u_usr_acc);
168 nvdimm_spa_memattr(uint64_t efi_mem_flags)
172 if ((efi_mem_flags & EFI_MD_ATTR_WB) != 0)
173 mode = VM_MEMATTR_WRITE_BACK;
174 else if ((efi_mem_flags & EFI_MD_ATTR_WT) != 0)
175 mode = VM_MEMATTR_WRITE_THROUGH;
176 else if ((efi_mem_flags & EFI_MD_ATTR_WC) != 0)
177 mode = VM_MEMATTR_WRITE_COMBINING;
178 else if ((efi_mem_flags & EFI_MD_ATTR_WP) != 0)
179 mode = VM_MEMATTR_WRITE_PROTECTED;
180 else if ((efi_mem_flags & EFI_MD_ATTR_UC) != 0)
181 mode = VM_MEMATTR_UNCACHEABLE;
184 printf("SPA mapping attr %#lx unsupported\n",
186 mode = VM_MEMATTR_UNCACHEABLE;
192 nvdimm_spa_uio(struct nvdimm_spa_dev *dev, struct uio *uio)
194 struct vm_page m, *ma;
200 if (dev->spa_kva == NULL) {
201 mattr = dev->spa_memattr;
202 bzero(&m, sizeof(m));
203 vm_page_initfake(&m, 0, mattr);
205 while (uio->uio_resid > 0) {
206 if (uio->uio_offset >= dev->spa_len)
208 off = dev->spa_phys_base + uio->uio_offset;
209 vm_page_updatefake(&m, trunc_page(off), mattr);
211 if (n > uio->uio_resid)
213 error = uiomove_fromphys(&ma, off & PAGE_MASK, n, uio);
218 while (uio->uio_resid > 0) {
219 if (uio->uio_offset >= dev->spa_len)
222 if (n > uio->uio_resid)
224 if (uio->uio_offset + n > dev->spa_len)
225 n = dev->spa_len - uio->uio_offset;
226 error = uiomove((char *)dev->spa_kva + uio->uio_offset,
236 nvdimm_spa_rw(struct cdev *dev, struct uio *uio, int ioflag)
239 return (nvdimm_spa_uio(dev->si_drv1, uio));
243 nvdimm_spa_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
246 struct nvdimm_spa_dev *dev;
252 case DIOCGSECTORSIZE:
253 *(u_int *)data = DEV_BSIZE;
256 *(off_t *)data = dev->spa_len;
266 nvdimm_spa_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
267 vm_object_t *objp, int nprot)
269 struct nvdimm_spa_dev *dev;
272 if (dev->spa_obj == NULL)
274 if (*offset >= dev->spa_len || *offset + size < *offset ||
275 *offset + size > dev->spa_len)
277 vm_object_reference(dev->spa_obj);
278 *objp = dev->spa_obj;
282 static struct cdevsw spa_cdevsw = {
283 .d_version = D_VERSION,
285 .d_name = "nvdimm_spa",
286 .d_read = nvdimm_spa_rw,
287 .d_write = nvdimm_spa_rw,
288 .d_ioctl = nvdimm_spa_ioctl,
289 .d_mmap_single = nvdimm_spa_mmap_single,
293 nvdimm_spa_g_all_unmapped(struct nvdimm_spa_dev *dev, struct bio *bp, int rw)
295 struct vm_page maa[bp->bio_ma_n];
296 vm_page_t ma[bp->bio_ma_n];
300 mattr = dev->spa_memattr;
301 for (i = 0; i < nitems(ma); i++) {
302 bzero(&maa[i], sizeof(maa[i]));
303 vm_page_initfake(&maa[i], dev->spa_phys_base +
304 trunc_page(bp->bio_offset) + PAGE_SIZE * i, mattr);
308 pmap_copy_pages(ma, bp->bio_offset & PAGE_MASK, bp->bio_ma,
309 bp->bio_ma_offset, bp->bio_length);
311 pmap_copy_pages(bp->bio_ma, bp->bio_ma_offset, ma,
312 bp->bio_offset & PAGE_MASK, bp->bio_length);
316 nvdimm_spa_g_thread(void *arg)
326 mtx_lock(&sc->spa_g_mtx);
328 bp = bioq_takefirst(&sc->spa_g_queue);
331 msleep(&sc->spa_g_queue, &sc->spa_g_mtx, PRIBIO,
333 if (!sc->spa_g_proc_run) {
334 sc->spa_g_proc_exiting = true;
335 wakeup(&sc->spa_g_queue);
336 mtx_unlock(&sc->spa_g_mtx);
341 mtx_unlock(&sc->spa_g_mtx);
342 if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
343 bp->bio_cmd != BIO_FLUSH) {
349 if (bp->bio_cmd == BIO_FLUSH) {
350 if (sc->dev->spa_kva != NULL) {
351 pmap_large_map_wb(sc->dev->spa_kva,
354 pmap_flush_cache_phys_range(
355 (vm_paddr_t)sc->dev->spa_phys_base,
356 (vm_paddr_t)sc->dev->spa_phys_base +
357 sc->dev->spa_len, sc->dev->spa_memattr);
365 if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
366 if (sc->dev->spa_kva != NULL) {
367 aiovec.iov_base = (char *)sc->dev->spa_kva +
369 aiovec.iov_len = bp->bio_length;
370 auio.uio_iov = &aiovec;
372 auio.uio_resid = bp->bio_length;
373 auio.uio_offset = bp->bio_offset;
374 auio.uio_segflg = UIO_SYSSPACE;
375 auio.uio_rw = bp->bio_cmd == BIO_READ ?
376 UIO_WRITE : UIO_READ;
377 auio.uio_td = curthread;
378 error = uiomove_fromphys(bp->bio_ma,
379 bp->bio_ma_offset, bp->bio_length, &auio);
380 bp->bio_resid = auio.uio_resid;
382 nvdimm_spa_g_all_unmapped(sc->dev, bp,
384 bp->bio_resid = bp->bio_length;
388 aiovec.iov_base = bp->bio_data;
389 aiovec.iov_len = bp->bio_length;
390 auio.uio_iov = &aiovec;
392 auio.uio_resid = bp->bio_length;
393 auio.uio_offset = bp->bio_offset;
394 auio.uio_segflg = UIO_SYSSPACE;
395 auio.uio_rw = bp->bio_cmd == BIO_READ ? UIO_READ :
397 auio.uio_td = curthread;
398 error = nvdimm_spa_uio(sc->dev, &auio);
399 bp->bio_resid = auio.uio_resid;
401 bp->bio_bcount = bp->bio_length;
402 devstat_end_transaction_bio(sc->spa_g_devstat, bp);
404 bp->bio_completed = bp->bio_length;
405 g_io_deliver(bp, error);
410 nvdimm_spa_g_start(struct bio *bp)
414 sc = bp->bio_to->geom->softc;
415 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
416 devstat_start_transaction_bio(sc->spa_g_devstat, bp);
418 mtx_lock(&sc->spa_g_mtx);
419 bioq_disksort(&sc->spa_g_queue, bp);
420 wakeup(&sc->spa_g_queue);
421 mtx_unlock(&sc->spa_g_mtx);
425 nvdimm_spa_g_access(struct g_provider *pp, int r, int w, int e)
431 static struct g_geom * nvdimm_spa_g_create(struct nvdimm_spa_dev *dev,
433 static g_ctl_destroy_geom_t nvdimm_spa_g_destroy_geom;
435 struct g_class nvdimm_spa_g_class = {
437 .version = G_VERSION,
438 .start = nvdimm_spa_g_start,
439 .access = nvdimm_spa_g_access,
440 .destroy_geom = nvdimm_spa_g_destroy_geom,
442 DECLARE_GEOM_CLASS(nvdimm_spa_g_class, g_spa);
445 nvdimm_spa_init(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr,
446 enum SPA_mapping_type spa_type)
451 spa->spa_type = spa_type;
452 spa->spa_nfit_idx = nfitaddr->RangeIndex;
453 spa->dev.spa_domain =
454 ((nfitaddr->Flags & ACPI_NFIT_PROXIMITY_VALID) != 0) ?
455 nfitaddr->ProximityDomain : -1;
456 spa->dev.spa_phys_base = nfitaddr->Address;
457 spa->dev.spa_len = nfitaddr->Length;
458 spa->dev.spa_efi_mem_flags = nfitaddr->MemoryMapping;
460 printf("NVDIMM SPA%d base %#016jx len %#016jx %s fl %#jx\n",
462 (uintmax_t)spa->dev.spa_phys_base,
463 (uintmax_t)spa->dev.spa_len,
464 nvdimm_SPA_uuid_list[spa_type].u_name,
465 spa->dev.spa_efi_mem_flags);
467 spa->dev.spa_memattr = nvdimm_spa_memattr(nfitaddr->MemoryMapping);
468 if (!nvdimm_SPA_uuid_list[spa_type].u_usr_acc)
471 asprintf(&name, M_NVDIMM, "spa%d", spa->spa_nfit_idx);
472 error = nvdimm_spa_dev_init(&spa->dev, name, spa->spa_nfit_idx);
473 free(name, M_NVDIMM);
478 nvdimm_spa_dev_init(struct nvdimm_spa_dev *dev, const char *name, int unit)
480 struct make_dev_args mda;
481 struct sglist *spa_sg;
485 error1 = pmap_large_map(dev->spa_phys_base, dev->spa_len,
486 &dev->spa_kva, dev->spa_memattr);
488 printf("NVDIMM %s cannot map into KVA, error %d\n", name,
493 spa_sg = sglist_alloc(1, M_WAITOK);
494 error = sglist_append_phys(spa_sg, dev->spa_phys_base,
497 dev->spa_obj = vm_pager_allocate(OBJT_SG, spa_sg, dev->spa_len,
498 VM_PROT_ALL, 0, NULL);
499 if (dev->spa_obj == NULL) {
500 printf("NVDIMM %s failed to alloc vm object", name);
504 printf("NVDIMM %s failed to init sglist, error %d", name,
509 make_dev_args_init(&mda);
510 mda.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME;
511 mda.mda_devsw = &spa_cdevsw;
513 mda.mda_uid = UID_ROOT;
514 mda.mda_gid = GID_OPERATOR;
516 mda.mda_si_drv1 = dev;
518 asprintf(&devname, M_NVDIMM, "nvdimm_%s", name);
519 error = make_dev_s(&mda, &dev->spa_dev, "%s", devname);
520 free(devname, M_NVDIMM);
522 printf("NVDIMM %s cannot create devfs node, error %d\n", name,
527 dev->spa_g = nvdimm_spa_g_create(dev, name);
528 if (dev->spa_g == NULL && error1 == 0)
533 static struct g_geom *
534 nvdimm_spa_g_create(struct nvdimm_spa_dev *dev, const char *name)
541 sc = malloc(sizeof(struct g_spa), M_NVDIMM, M_WAITOK | M_ZERO);
543 bioq_init(&sc->spa_g_queue);
544 mtx_init(&sc->spa_g_mtx, "spag", NULL, MTX_DEF);
545 sc->spa_g_proc_run = true;
546 sc->spa_g_proc_exiting = false;
547 error = kproc_create(nvdimm_spa_g_thread, sc, &sc->spa_g_proc, 0, 0,
550 mtx_destroy(&sc->spa_g_mtx);
552 printf("NVDIMM %s cannot create geom worker, error %d\n", name,
556 gp = g_new_geomf(&nvdimm_spa_g_class, "%s", name);
558 sc->spa_p = g_new_providerf(gp, "%s", name);
559 sc->spa_p->mediasize = dev->spa_len;
560 sc->spa_p->sectorsize = DEV_BSIZE;
561 sc->spa_p->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE |
562 G_PF_ACCEPT_UNMAPPED;
563 g_error_provider(sc->spa_p, 0);
564 sc->spa_g_devstat = devstat_new_entry("spa", -1, DEV_BSIZE,
565 DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT,
566 DEVSTAT_PRIORITY_MAX);
573 nvdimm_spa_fini(struct SPA_mapping *spa)
576 nvdimm_spa_dev_fini(&spa->dev);
580 nvdimm_spa_dev_fini(struct nvdimm_spa_dev *dev)
583 if (dev->spa_g != NULL) {
585 nvdimm_spa_g_destroy_geom(NULL, dev->spa_g->class, dev->spa_g);
588 if (dev->spa_dev != NULL) {
589 destroy_dev(dev->spa_dev);
592 vm_object_deallocate(dev->spa_obj);
593 if (dev->spa_kva != NULL) {
594 pmap_large_unmap(dev->spa_kva, dev->spa_len);
600 nvdimm_spa_g_destroy_geom(struct gctl_req *req, struct g_class *cp,
606 mtx_lock(&sc->spa_g_mtx);
607 sc->spa_g_proc_run = false;
608 wakeup(&sc->spa_g_queue);
609 while (!sc->spa_g_proc_exiting)
610 msleep(&sc->spa_g_queue, &sc->spa_g_mtx, PRIBIO, "spa_e", 0);
611 mtx_unlock(&sc->spa_g_mtx);
613 g_wither_geom(gp, ENXIO);
615 if (sc->spa_g_devstat != NULL) {
616 devstat_remove_entry(sc->spa_g_devstat);
617 sc->spa_g_devstat = NULL;
619 mtx_destroy(&sc->spa_g_mtx);