2 * Copyright (c) 2017, 2018 The FreeBSD Foundation
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
36 #include <sys/param.h>
37 #include <sys/systm.h>
41 #include <sys/devicestat.h>
44 #include <sys/kernel.h>
45 #include <sys/kthread.h>
46 #include <sys/limits.h>
48 #include <sys/malloc.h>
49 #include <sys/module.h>
50 #include <sys/rwlock.h>
51 #include <sys/sglist.h>
54 #include <geom/geom.h>
55 #include <geom/geom_int.h>
56 #include <machine/vmparam.h>
58 #include <vm/vm_object.h>
59 #include <vm/vm_page.h>
60 #include <vm/vm_pager.h>
61 #include <contrib/dev/acpica/include/acpi.h>
62 #include <contrib/dev/acpica/include/accommon.h>
63 #include <contrib/dev/acpica/include/acuuid.h>
64 #include <dev/acpica/acpivar.h>
65 #include <dev/nvdimm/nvdimm_var.h>
67 struct SPA_mapping *spa_mappings;
71 nvdimm_spa_count(void *nfitsubtbl __unused, void *arg)
80 static struct nvdimm_SPA_uuid_list_elm {
85 } nvdimm_SPA_uuid_list[] = {
86 [SPA_TYPE_VOLATILE_MEMORY] = {
87 .u_name = "VOLA MEM ",
88 .u_id_str = UUID_VOLATILE_MEMORY,
91 [SPA_TYPE_PERSISTENT_MEMORY] = {
93 .u_id_str = UUID_PERSISTENT_MEMORY,
96 [SPA_TYPE_CONTROL_REGION] = {
98 .u_id_str = UUID_CONTROL_REGION,
101 [SPA_TYPE_DATA_REGION] = {
102 .u_name = "DATA RG ",
103 .u_id_str = UUID_DATA_REGION,
106 [SPA_TYPE_VOLATILE_VIRTUAL_DISK] = {
107 .u_name = "VIRT DSK",
108 .u_id_str = UUID_VOLATILE_VIRTUAL_DISK,
111 [SPA_TYPE_VOLATILE_VIRTUAL_CD] = {
112 .u_name = "VIRT CD ",
113 .u_id_str = UUID_VOLATILE_VIRTUAL_CD,
116 [SPA_TYPE_PERSISTENT_VIRTUAL_DISK] = {
118 .u_id_str = UUID_PERSISTENT_VIRTUAL_DISK,
121 [SPA_TYPE_PERSISTENT_VIRTUAL_CD] = {
123 .u_id_str = UUID_PERSISTENT_VIRTUAL_CD,
129 nvdimm_spa_memattr(struct SPA_mapping *spa)
133 if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WB) != 0)
134 mode = VM_MEMATTR_WRITE_BACK;
135 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WT) != 0)
136 mode = VM_MEMATTR_WRITE_THROUGH;
137 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WC) != 0)
138 mode = VM_MEMATTR_WRITE_COMBINING;
139 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_WP) != 0)
140 mode = VM_MEMATTR_WRITE_PROTECTED;
141 else if ((spa->spa_efi_mem_flags & EFI_MD_ATTR_UC) != 0)
142 mode = VM_MEMATTR_UNCACHEABLE;
145 printf("SPA%d mapping attr unsupported\n",
147 mode = VM_MEMATTR_UNCACHEABLE;
153 nvdimm_spa_uio(struct SPA_mapping *spa, struct uio *uio)
155 struct vm_page m, *ma;
161 if (spa->spa_kva == NULL) {
162 mattr = nvdimm_spa_memattr(spa);
163 vm_page_initfake(&m, 0, mattr);
165 while (uio->uio_resid > 0) {
166 if (uio->uio_offset >= spa->spa_len)
168 off = spa->spa_phys_base + uio->uio_offset;
169 vm_page_updatefake(&m, trunc_page(off), mattr);
171 if (n > uio->uio_resid)
173 error = uiomove_fromphys(&ma, off & PAGE_MASK, n, uio);
178 while (uio->uio_resid > 0) {
179 if (uio->uio_offset >= spa->spa_len)
182 if (n > uio->uio_resid)
184 if (uio->uio_offset + n > spa->spa_len)
185 n = spa->spa_len - uio->uio_offset;
186 error = uiomove((char *)spa->spa_kva + uio->uio_offset,
196 nvdimm_spa_rw(struct cdev *dev, struct uio *uio, int ioflag)
199 return (nvdimm_spa_uio(dev->si_drv1, uio));
203 nvdimm_spa_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
206 struct SPA_mapping *spa;
212 case DIOCGSECTORSIZE:
213 *(u_int *)data = DEV_BSIZE;
216 *(off_t *)data = spa->spa_len;
226 nvdimm_spa_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size,
227 vm_object_t *objp, int nprot)
229 struct SPA_mapping *spa;
232 if (spa->spa_obj == NULL)
234 if (*offset >= spa->spa_len || *offset + size < *offset ||
235 *offset + size > spa->spa_len)
237 vm_object_reference(spa->spa_obj);
238 *objp = spa->spa_obj;
242 static struct cdevsw spa_cdevsw = {
243 .d_version = D_VERSION,
245 .d_name = "nvdimm_spa",
246 .d_read = nvdimm_spa_rw,
247 .d_write = nvdimm_spa_rw,
248 .d_ioctl = nvdimm_spa_ioctl,
249 .d_mmap_single = nvdimm_spa_mmap_single,
253 nvdimm_spa_g_all_unmapped(struct SPA_mapping *spa, struct bio *bp,
256 struct vm_page maa[bp->bio_ma_n];
257 vm_page_t ma[bp->bio_ma_n];
261 mattr = nvdimm_spa_memattr(spa);
262 for (i = 0; i < nitems(ma); i++) {
264 vm_page_initfake(&maa[i], spa->spa_phys_base +
265 trunc_page(bp->bio_offset) + PAGE_SIZE * i, mattr);
269 pmap_copy_pages(ma, bp->bio_offset & PAGE_MASK, bp->bio_ma,
270 bp->bio_ma_offset, bp->bio_length);
272 pmap_copy_pages(bp->bio_ma, bp->bio_ma_offset, ma,
273 bp->bio_offset & PAGE_MASK, bp->bio_length);
277 nvdimm_spa_g_thread(void *arg)
279 struct SPA_mapping *spa;
287 mtx_lock(&spa->spa_g_mtx);
289 bp = bioq_takefirst(&spa->spa_g_queue);
292 msleep(&spa->spa_g_queue, &spa->spa_g_mtx, PRIBIO,
294 if (!spa->spa_g_proc_run) {
295 spa->spa_g_proc_exiting = true;
296 wakeup(&spa->spa_g_queue);
297 mtx_unlock(&spa->spa_g_mtx);
302 mtx_unlock(&spa->spa_g_mtx);
303 if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
304 bp->bio_cmd != BIO_FLUSH) {
310 if (bp->bio_cmd == BIO_FLUSH) {
311 if (spa->spa_kva != NULL) {
312 pmap_large_map_wb(spa->spa_kva, spa->spa_len);
314 pmap_flush_cache_phys_range(
315 (vm_paddr_t)spa->spa_phys_base,
316 (vm_paddr_t)spa->spa_phys_base +
317 spa->spa_len, nvdimm_spa_memattr(spa));
325 if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
326 if (spa->spa_kva != NULL) {
327 aiovec.iov_base = (char *)spa->spa_kva +
329 aiovec.iov_len = bp->bio_length;
330 auio.uio_iov = &aiovec;
332 auio.uio_resid = bp->bio_length;
333 auio.uio_offset = bp->bio_offset;
334 auio.uio_segflg = UIO_SYSSPACE;
335 auio.uio_rw = bp->bio_cmd == BIO_READ ?
336 UIO_WRITE : UIO_READ;
337 auio.uio_td = curthread;
338 error = uiomove_fromphys(bp->bio_ma,
339 bp->bio_ma_offset, bp->bio_length, &auio);
340 bp->bio_resid = auio.uio_resid;
342 nvdimm_spa_g_all_unmapped(spa, bp, bp->bio_cmd);
343 bp->bio_resid = bp->bio_length;
347 aiovec.iov_base = bp->bio_data;
348 aiovec.iov_len = bp->bio_length;
349 auio.uio_iov = &aiovec;
351 auio.uio_resid = bp->bio_length;
352 auio.uio_offset = bp->bio_offset;
353 auio.uio_segflg = UIO_SYSSPACE;
354 auio.uio_rw = bp->bio_cmd == BIO_READ ? UIO_READ :
356 auio.uio_td = curthread;
357 error = nvdimm_spa_uio(spa, &auio);
358 bp->bio_resid = auio.uio_resid;
360 bp->bio_bcount = bp->bio_length;
361 devstat_end_transaction_bio(spa->spa_g_devstat, bp);
363 bp->bio_completed = bp->bio_length;
364 g_io_deliver(bp, error);
369 nvdimm_spa_g_start(struct bio *bp)
371 struct SPA_mapping *spa;
373 spa = bp->bio_to->geom->softc;
374 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
375 mtx_lock(&spa->spa_g_stat_mtx);
376 devstat_start_transaction_bio(spa->spa_g_devstat, bp);
377 mtx_unlock(&spa->spa_g_stat_mtx);
379 mtx_lock(&spa->spa_g_mtx);
380 bioq_disksort(&spa->spa_g_queue, bp);
381 wakeup(&spa->spa_g_queue);
382 mtx_unlock(&spa->spa_g_mtx);
386 nvdimm_spa_g_access(struct g_provider *pp, int r, int w, int e)
392 static g_init_t nvdimm_spa_g_init;
393 static g_fini_t nvdimm_spa_g_fini;
395 struct g_class nvdimm_spa_g_class = {
397 .version = G_VERSION,
398 .start = nvdimm_spa_g_start,
399 .access = nvdimm_spa_g_access,
400 .init = nvdimm_spa_g_init,
401 .fini = nvdimm_spa_g_fini,
403 DECLARE_GEOM_CLASS(nvdimm_spa_g_class, g_spa);
406 nvdimm_spa_init_one(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr,
409 struct make_dev_args mda;
410 struct sglist *spa_sg;
413 spa->spa_type = spa_type;
414 spa->spa_domain = ((nfitaddr->Flags & ACPI_NFIT_PROXIMITY_VALID) != 0) ?
415 nfitaddr->ProximityDomain : -1;
416 spa->spa_nfit_idx = nfitaddr->RangeIndex;
417 spa->spa_phys_base = nfitaddr->Address;
418 spa->spa_len = nfitaddr->Length;
419 spa->spa_efi_mem_flags = nfitaddr->MemoryMapping;
421 printf("NVDIMM SPA%d base %#016jx len %#016jx %s fl %#jx\n",
423 (uintmax_t)spa->spa_phys_base, (uintmax_t)spa->spa_len,
424 nvdimm_SPA_uuid_list[spa_type].u_name,
425 spa->spa_efi_mem_flags);
427 if (!nvdimm_SPA_uuid_list[spa_type].u_usr_acc)
430 error1 = pmap_large_map(spa->spa_phys_base, spa->spa_len,
431 &spa->spa_kva, nvdimm_spa_memattr(spa));
433 printf("NVDIMM SPA%d cannot map into KVA, error %d\n",
434 spa->spa_nfit_idx, error1);
438 spa_sg = sglist_alloc(1, M_WAITOK);
439 error = sglist_append_phys(spa_sg, spa->spa_phys_base,
442 spa->spa_obj = vm_pager_allocate(OBJT_SG, spa_sg, spa->spa_len,
443 VM_PROT_ALL, 0, NULL);
444 if (spa->spa_obj == NULL) {
445 printf("NVDIMM SPA%d failed to alloc vm object",
450 printf("NVDIMM SPA%d failed to init sglist, error %d",
451 spa->spa_nfit_idx, error);
455 make_dev_args_init(&mda);
456 mda.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME;
457 mda.mda_devsw = &spa_cdevsw;
459 mda.mda_uid = UID_ROOT;
460 mda.mda_gid = GID_OPERATOR;
462 mda.mda_si_drv1 = spa;
463 error = make_dev_s(&mda, &spa->spa_dev, "nvdimm_spa%d",
466 printf("NVDIMM SPA%d cannot create devfs node, error %d\n",
467 spa->spa_nfit_idx, error);
472 bioq_init(&spa->spa_g_queue);
473 mtx_init(&spa->spa_g_mtx, "spag", NULL, MTX_DEF);
474 mtx_init(&spa->spa_g_stat_mtx, "spagst", NULL, MTX_DEF);
475 spa->spa_g_proc_run = true;
476 spa->spa_g_proc_exiting = false;
477 error = kproc_create(nvdimm_spa_g_thread, spa, &spa->spa_g_proc, 0, 0,
478 "g_spa%d", spa->spa_nfit_idx);
480 printf("NVDIMM SPA%d cannot create geom worker, error %d\n",
481 spa->spa_nfit_idx, error);
486 spa->spa_g = g_new_geomf(&nvdimm_spa_g_class, "spa%d",
488 spa->spa_g->softc = spa;
489 spa->spa_p = g_new_providerf(spa->spa_g, "spa%d",
491 spa->spa_p->mediasize = spa->spa_len;
492 spa->spa_p->sectorsize = DEV_BSIZE;
493 spa->spa_p->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE |
494 G_PF_ACCEPT_UNMAPPED;
495 g_error_provider(spa->spa_p, 0);
496 spa->spa_g_devstat = devstat_new_entry("spa", spa->spa_nfit_idx,
497 DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT,
498 DEVSTAT_PRIORITY_MAX);
504 nvdimm_spa_fini_one(struct SPA_mapping *spa)
507 mtx_lock(&spa->spa_g_mtx);
508 spa->spa_g_proc_run = false;
509 wakeup(&spa->spa_g_queue);
510 while (!spa->spa_g_proc_exiting)
511 msleep(&spa->spa_g_queue, &spa->spa_g_mtx, PRIBIO, "spa_e", 0);
512 mtx_unlock(&spa->spa_g_mtx);
513 if (spa->spa_g != NULL) {
515 g_wither_geom(spa->spa_g, ENXIO);
520 if (spa->spa_g_devstat != NULL) {
521 devstat_remove_entry(spa->spa_g_devstat);
522 spa->spa_g_devstat = NULL;
524 if (spa->spa_dev != NULL) {
525 destroy_dev(spa->spa_dev);
528 vm_object_deallocate(spa->spa_obj);
529 if (spa->spa_kva != NULL) {
530 pmap_large_unmap(spa->spa_kva, spa->spa_len);
533 mtx_destroy(&spa->spa_g_mtx);
534 mtx_destroy(&spa->spa_g_stat_mtx);
538 nvdimm_spa_parse(void *nfitsubtbl, void *arg)
540 ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr;
541 struct SPA_mapping *spa;
545 spa = &spa_mappings[*i];
546 nfitaddr = nfitsubtbl;
548 for (j = 0; j < nitems(nvdimm_SPA_uuid_list); j++) {
549 /* XXXKIB: is ACPI UUID representation compatible ? */
550 if (uuidcmp((struct uuid *)&nfitaddr->RangeGuid,
551 &nvdimm_SPA_uuid_list[j].u_id) != 0)
553 error = nvdimm_spa_init_one(spa, nfitaddr, j);
555 nvdimm_spa_fini_one(spa);
558 if (j == nitems(nvdimm_SPA_uuid_list) && bootverbose) {
559 printf("Unknown SPA UUID %d ", nfitaddr->RangeIndex);
560 printf_uuid((struct uuid *)&nfitaddr->RangeGuid);
568 nvdimm_spa_init1(ACPI_TABLE_NFIT *nfitbl)
570 struct nvdimm_SPA_uuid_list_elm *sle;
573 for (i = 0; i < nitems(nvdimm_SPA_uuid_list); i++) {
574 sle = &nvdimm_SPA_uuid_list[i];
575 error = parse_uuid(sle->u_id_str, &sle->u_id);
578 printf("nvdimm_identify: error %d parsing "
579 "known SPA UUID %d %s\n", error, i,
585 error = nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS,
586 nvdimm_spa_count, &spa_mappings_cnt);
589 spa_mappings = malloc(sizeof(struct SPA_mapping) * spa_mappings_cnt,
590 M_NVDIMM, M_WAITOK | M_ZERO);
592 error = nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS,
593 nvdimm_spa_parse, &i);
595 free(spa_mappings, M_NVDIMM);
603 nvdimm_spa_g_init(struct g_class *mp __unused)
605 ACPI_TABLE_NFIT *nfitbl;
609 spa_mappings_cnt = 0;
611 if (acpi_disabled("nvdimm"))
613 status = AcpiGetTable(ACPI_SIG_NFIT, 1, (ACPI_TABLE_HEADER **)&nfitbl);
614 if (ACPI_FAILURE(status)) {
616 printf("nvdimm_spa_g_init: cannot find NFIT\n");
619 error = nvdimm_spa_init1(nfitbl);
621 printf("nvdimm_spa_g_init: error %d\n", error);
622 AcpiPutTable(&nfitbl->Header);
626 nvdimm_spa_g_fini(struct g_class *mp __unused)
630 if (spa_mappings == NULL)
632 for (i = 0; i < spa_mappings_cnt; i++)
633 nvdimm_spa_fini_one(&spa_mappings[i]);
634 free(spa_mappings, M_NVDIMM);
636 spa_mappings_cnt = 0;