2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2013 The FreeBSD Foundation
7 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
8 * under sponsorship from the FreeBSD Foundation.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/domainset.h>
38 #include <sys/malloc.h>
41 #include <sys/interrupt.h>
42 #include <sys/kernel.h>
46 #include <sys/memdesc.h>
47 #include <sys/mutex.h>
48 #include <sys/sysctl.h>
50 #include <sys/taskqueue.h>
54 #include <dev/pci/pcireg.h>
55 #include <dev/pci/pcivar.h>
57 #include <vm/vm_extern.h>
58 #include <vm/vm_kern.h>
59 #include <vm/vm_object.h>
60 #include <vm/vm_page.h>
61 #include <vm/vm_map.h>
62 #include <machine/atomic.h>
63 #include <machine/bus.h>
64 #include <machine/md_var.h>
65 #if defined(__amd64__) || defined(__i386__)
66 #include <machine/specialreg.h>
67 #include <x86/include/busdma_impl.h>
68 #include <x86/iommu/intel_reg.h>
69 #include <dev/iommu/busdma_iommu.h>
70 #include <dev/iommu/iommu.h>
71 #include <x86/iommu/intel_dmar.h>
75 * busdma_iommu.c, the implementation of the busdma(9) interface using
76 * IOMMU units from Intel VT-d.
80 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
85 static const char bounce_str[] = "bounce";
86 static const char iommu_str[] = "iommu";
87 static const char dmar_str[] = "dmar"; /* compatibility */
90 env = kern_getenv("hw.busdma.default");
92 if (strcmp(env, bounce_str) == 0)
94 else if (strcmp(env, iommu_str) == 0 ||
95 strcmp(env, dmar_str) == 0)
100 snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d",
101 domain, bus, slot, func);
102 env = kern_getenv(str);
104 return (default_bounce != 0);
105 if (strcmp(env, bounce_str) == 0)
107 else if (strcmp(env, iommu_str) == 0 ||
108 strcmp(env, dmar_str) == 0)
111 ret = default_bounce != 0;
117 * Given original device, find the requester ID that will be seen by
118 * the IOMMU unit and used for page table lookup. PCI bridges may take
119 * ownership of transactions from downstream devices, so it may not be
120 * the same as the BSF of the target device. In those cases, all
121 * devices downstream of the bridge must share a single mapping
122 * domain, and must collectively be assigned to use either IOMMU or
126 iommu_get_requester(device_t dev, uint16_t *rid)
128 devclass_t pci_class;
129 device_t l, pci, pcib, pcip, pcibp, requester;
134 pci_class = devclass_find("pci");
137 *rid = pci_get_rid(dev);
140 * Walk the bridge hierarchy from the target device to the
141 * host port to find the translating bridge nearest the IOMMU
145 pci = device_get_parent(l);
146 KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent "
147 "for %s", device_get_name(dev), device_get_name(l)));
148 KASSERT(device_get_devclass(pci) == pci_class,
149 ("iommu_get_requester(%s): non-pci parent %s for %s",
150 device_get_name(dev), device_get_name(pci),
151 device_get_name(l)));
153 pcib = device_get_parent(pci);
154 KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge "
155 "for %s", device_get_name(dev), device_get_name(pci)));
158 * The parent of our "bridge" isn't another PCI bus,
159 * so pcib isn't a PCI->PCI bridge but rather a host
160 * port, and the requester ID won't be translated
163 pcip = device_get_parent(pcib);
164 if (device_get_devclass(pcip) != pci_class)
166 pcibp = device_get_parent(pcip);
168 if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
170 * Do not stop the loop even if the target
171 * device is PCIe, because it is possible (but
172 * unlikely) to have a PCI->PCIe bridge
173 * somewhere in the hierarchy.
178 * Device is not PCIe, it cannot be seen as a
179 * requester by IOMMU unit. Check whether the
182 bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
187 * Check for a buggy PCIe/PCI bridge that
188 * doesn't report the express capability. If
189 * the bridge above it is express but isn't a
190 * PCI bridge, then we know pcib is actually a
193 if (!bridge_is_pcie && pci_find_cap(pcibp,
194 PCIY_EXPRESS, &cap_offset) == 0) {
195 pcie_flags = pci_read_config(pcibp,
196 cap_offset + PCIER_FLAGS, 2);
197 if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
198 PCIEM_TYPE_PCI_BRIDGE)
199 bridge_is_pcie = true;
202 if (bridge_is_pcie) {
204 * The current device is not PCIe, but
205 * the bridge above it is. This is a
206 * PCIe->PCI bridge. Assume that the
207 * requester ID will be the secondary
208 * bus number with slot and function
211 * XXX: Doesn't handle the case where
212 * the bridge is PCIe->PCI-X, and the
213 * bridge will only take ownership of
214 * requests in some cases. We should
215 * provide context entries with the
216 * same page tables for taken and
217 * non-taken transactions.
219 *rid = PCI_RID(pci_get_bus(l), 0, 0);
223 * Neither the device nor the bridge
224 * above it are PCIe. This is a
225 * conventional PCI->PCI bridge, which
226 * will use the bridge's BSF as the
229 *rid = pci_get_rid(pcib);
238 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr)
241 struct iommu_ctx *ctx;
245 requester = iommu_get_requester(dev, &rid);
248 * If the user requested the IOMMU disabled for the device, we
249 * cannot disable the IOMMU unit, due to possibility of other
250 * devices on the same IOMMU unit still requiring translation.
251 * Instead provide the identity mapping for the device
254 disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester),
255 pci_get_bus(requester), pci_get_slot(requester),
256 pci_get_function(requester));
257 ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr);
262 * Keep the first reference on context, release the
266 if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) {
267 ctx->flags |= IOMMU_CTX_DISABLED;
270 iommu_free_ctx_locked(unit, ctx);
278 acpi_iommu_get_dma_tag(device_t dev, device_t child)
280 struct iommu_unit *unit;
281 struct iommu_ctx *ctx;
284 unit = iommu_find(child, bootverbose);
285 /* Not in scope of any IOMMU ? */
288 if (!unit->dma_enabled)
291 #if defined(__amd64__) || defined(__i386__)
292 dmar_quirks_pre_use(unit);
293 dmar_instantiate_rmrr_ctxs(unit);
296 ctx = iommu_instantiate_ctx(unit, child, false);
297 res = ctx == NULL ? NULL : (bus_dma_tag_t)ctx->tag;
302 bus_dma_iommu_set_buswide(device_t dev)
304 struct iommu_unit *unit;
306 u_int busno, slot, func;
308 parent = device_get_parent(dev);
309 if (device_get_devclass(parent) != devclass_find("pci"))
311 unit = iommu_find(dev, bootverbose);
314 busno = pci_get_bus(dev);
315 slot = pci_get_slot(dev);
316 func = pci_get_function(dev);
317 if (slot != 0 || func != 0) {
320 "iommu%d pci%d:%d:%d requested buswide busdma\n",
321 unit->unit, busno, slot, func);
325 iommu_set_buswide_ctx(unit, busno);
330 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno)
333 MPASS(busno <= PCI_BUSMAX);
335 unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |=
336 1 << (busno % (NBBY * sizeof(uint32_t)));
341 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno)
344 MPASS(busno <= PCI_BUSMAX);
345 return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] &
346 (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0);
349 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map");
351 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit,
352 struct bus_dmamap_iommu *map);
355 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
356 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
357 bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
358 int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
359 void *lockfuncarg, bus_dma_tag_t *dmat)
361 struct bus_dma_tag_iommu *newtag, *oldtag;
365 error = common_bus_dma_tag_create(parent != NULL ?
366 &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment,
367 boundary, lowaddr, highaddr, filter, filterarg, maxsize,
368 nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
369 sizeof(struct bus_dma_tag_iommu), (void **)&newtag);
373 oldtag = (struct bus_dma_tag_iommu *)parent;
374 newtag->common.impl = &bus_dma_iommu_impl;
375 newtag->ctx = oldtag->ctx;
376 newtag->owner = oldtag->owner;
378 *dmat = (bus_dma_tag_t)newtag;
380 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
381 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
387 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
394 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
396 struct bus_dma_tag_iommu *dmat, *dmat_copy, *parent;
400 dmat_copy = dmat = (struct bus_dma_tag_iommu *)dmat1;
403 if (dmat->map_count != 0) {
407 while (dmat != NULL) {
408 parent = (struct bus_dma_tag_iommu *)dmat->common.parent;
409 if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
411 if (dmat == dmat->ctx->tag)
412 iommu_free_ctx(dmat->ctx);
413 free_domain(dmat->segments, M_IOMMU_DMAMAP);
414 free(dmat, M_DEVBUF);
421 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
426 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
433 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
435 struct bus_dma_tag_iommu *tag;
436 struct bus_dmamap_iommu *map;
438 tag = (struct bus_dma_tag_iommu *)dmat;
439 map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP,
440 DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO);
445 if (tag->segments == NULL) {
446 tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) *
447 tag->common.nsegments, M_IOMMU_DMAMAP,
448 DOMAINSET_PREF(tag->common.domain), M_NOWAIT);
449 if (tag->segments == NULL) {
450 free_domain(map, M_IOMMU_DMAMAP);
455 TAILQ_INIT(&map->map_entries);
458 map->cansleep = false;
460 *mapp = (bus_dmamap_t)map;
466 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
468 struct bus_dma_tag_iommu *tag;
469 struct bus_dmamap_iommu *map;
470 struct iommu_domain *domain;
472 tag = (struct bus_dma_tag_iommu *)dmat;
473 map = (struct bus_dmamap_iommu *)map1;
475 domain = tag->ctx->domain;
476 IOMMU_DOMAIN_LOCK(domain);
477 if (!TAILQ_EMPTY(&map->map_entries)) {
478 IOMMU_DOMAIN_UNLOCK(domain);
481 IOMMU_DOMAIN_UNLOCK(domain);
482 free_domain(map, M_IOMMU_DMAMAP);
490 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
493 struct bus_dma_tag_iommu *tag;
494 struct bus_dmamap_iommu *map;
498 error = iommu_bus_dmamap_create(dmat, flags, mapp);
502 mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
503 mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
504 attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
507 tag = (struct bus_dma_tag_iommu *)dmat;
508 map = (struct bus_dmamap_iommu *)*mapp;
510 if (tag->common.maxsize < PAGE_SIZE &&
511 tag->common.alignment <= tag->common.maxsize &&
512 attr == VM_MEMATTR_DEFAULT) {
513 *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF,
514 DOMAINSET_PREF(tag->common.domain), mflags);
515 map->flags |= BUS_DMAMAP_IOMMU_MALLOC;
517 *vaddr = (void *)kmem_alloc_attr_domainset(
518 DOMAINSET_PREF(tag->common.domain), tag->common.maxsize,
519 mflags, 0ul, BUS_SPACE_MAXADDR, attr);
520 map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC;
522 if (*vaddr == NULL) {
523 iommu_bus_dmamap_destroy(dmat, *mapp);
531 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
533 struct bus_dma_tag_iommu *tag;
534 struct bus_dmamap_iommu *map;
536 tag = (struct bus_dma_tag_iommu *)dmat;
537 map = (struct bus_dmamap_iommu *)map1;
539 if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) {
540 free_domain(vaddr, M_DEVBUF);
541 map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC;
543 KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0,
544 ("iommu_bus_dmamem_free for non alloced map %p", map));
545 kmem_free((vm_offset_t)vaddr, tag->common.maxsize);
546 map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC;
549 iommu_bus_dmamap_destroy(dmat, map1);
553 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag,
554 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
555 int flags, bus_dma_segment_t *segs, int *segp,
556 struct iommu_map_entries_tailq *unroll_list)
558 struct iommu_ctx *ctx;
559 struct iommu_domain *domain;
560 struct iommu_map_entry *entry;
563 int error, idx, gas_flags, seg;
565 KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset));
567 segs = tag->segments;
569 domain = ctx->domain;
575 if (seg >= tag->common.nsegments) {
579 buflen1 = buflen > tag->common.maxsegsz ?
580 tag->common.maxsegsz : buflen;
581 size = round_page(offset + buflen1);
584 * (Too) optimistically allow split if there are more
585 * then one segments left.
587 gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0;
588 if (seg + 1 < tag->common.nsegments)
589 gas_flags |= IOMMU_MF_CANSPLIT;
591 error = iommu_map(domain, &tag->common, size, offset,
592 IOMMU_MAP_ENTRY_READ |
593 ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0),
594 gas_flags, ma + idx, &entry);
597 if ((gas_flags & IOMMU_MF_CANSPLIT) != 0) {
598 KASSERT(size >= entry->end - entry->start,
599 ("split increased entry size %jx %jx %jx",
600 (uintmax_t)size, (uintmax_t)entry->start,
601 (uintmax_t)entry->end));
602 size = entry->end - entry->start;
606 KASSERT(entry->end - entry->start == size,
607 ("no split allowed %jx %jx %jx",
608 (uintmax_t)size, (uintmax_t)entry->start,
609 (uintmax_t)entry->end));
611 if (offset + buflen1 > size)
612 buflen1 = size - offset;
613 if (buflen1 > tag->common.maxsegsz)
614 buflen1 = tag->common.maxsegsz;
616 KASSERT(((entry->start + offset) & (tag->common.alignment - 1))
618 ("alignment failed: ctx %p start 0x%jx offset %x "
619 "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
620 (uintmax_t)tag->common.alignment));
621 KASSERT(entry->end <= tag->common.lowaddr ||
622 entry->start >= tag->common.highaddr,
623 ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
624 "lowaddr 0x%jx highaddr 0x%jx", ctx,
625 (uintmax_t)entry->start, (uintmax_t)entry->end,
626 (uintmax_t)tag->common.lowaddr,
627 (uintmax_t)tag->common.highaddr));
628 KASSERT(iommu_test_boundary(entry->start + offset, buflen1,
629 tag->common.boundary),
630 ("boundary failed: ctx %p start 0x%jx end 0x%jx "
631 "boundary 0x%jx", ctx, (uintmax_t)entry->start,
632 (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
633 KASSERT(buflen1 <= tag->common.maxsegsz,
634 ("segment too large: ctx %p start 0x%jx end 0x%jx "
635 "buflen1 0x%jx maxsegsz 0x%jx", ctx,
636 (uintmax_t)entry->start, (uintmax_t)entry->end,
637 (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
639 IOMMU_DOMAIN_LOCK(domain);
640 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
641 entry->flags |= IOMMU_MAP_ENTRY_MAP;
642 IOMMU_DOMAIN_UNLOCK(domain);
643 TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
645 segs[seg].ds_addr = entry->start + offset;
646 segs[seg].ds_len = buflen1;
648 idx += OFF_TO_IDX(trunc_page(offset + buflen1));
650 offset &= IOMMU_PAGE_MASK;
659 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag,
660 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
661 int flags, bus_dma_segment_t *segs, int *segp)
663 struct iommu_ctx *ctx;
664 struct iommu_domain *domain;
665 struct iommu_map_entry *entry, *entry1;
666 struct iommu_map_entries_tailq unroll_list;
670 domain = ctx->domain;
671 atomic_add_long(&ctx->loads, 1);
673 TAILQ_INIT(&unroll_list);
674 error = iommu_bus_dmamap_load_something1(tag, map, ma, offset,
675 buflen, flags, segs, segp, &unroll_list);
678 * The busdma interface does not allow us to report
679 * partial buffer load, so unfortunately we have to
680 * revert all work done.
682 IOMMU_DOMAIN_LOCK(domain);
683 TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
686 * No entries other than what we have created
687 * during the failed run might have been
688 * inserted there in between, since we own ctx
691 TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
692 TAILQ_REMOVE(&unroll_list, entry, unroll_link);
693 TAILQ_INSERT_TAIL(&domain->unload_entries, entry,
696 IOMMU_DOMAIN_UNLOCK(domain);
697 taskqueue_enqueue(domain->iommu->delayed_taskqueue,
698 &domain->unload_task);
701 if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
704 if (error == EINPROGRESS)
705 iommu_bus_schedule_dmamap(domain->iommu, map);
710 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
711 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
712 bus_dma_segment_t *segs, int *segp)
714 struct bus_dma_tag_iommu *tag;
715 struct bus_dmamap_iommu *map;
717 tag = (struct bus_dma_tag_iommu *)dmat;
718 map = (struct bus_dmamap_iommu *)map1;
719 return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
724 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
725 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
728 struct bus_dma_tag_iommu *tag;
729 struct bus_dmamap_iommu *map;
731 vm_paddr_t pstart, pend, paddr;
732 int error, i, ma_cnt, mflags, offset;
734 tag = (struct bus_dma_tag_iommu *)dmat;
735 map = (struct bus_dmamap_iommu *)map1;
736 pstart = trunc_page(buf);
737 pend = round_page(buf + buflen);
738 offset = buf & PAGE_MASK;
739 ma_cnt = OFF_TO_IDX(pend - pstart);
740 mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
741 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
745 for (i = 0; i < ma_cnt; i++) {
746 paddr = pstart + ptoa(i);
747 ma[i] = PHYS_TO_VM_PAGE(paddr);
748 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
750 * If PHYS_TO_VM_PAGE() returned NULL or the
751 * vm_page was not initialized we'll use a
755 fma = malloc(sizeof(struct vm_page) * ma_cnt,
756 M_DEVBUF, M_ZERO | mflags);
762 vm_page_initfake(&fma[i], pstart + ptoa(i),
767 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
775 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
776 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
779 struct bus_dma_tag_iommu *tag;
780 struct bus_dmamap_iommu *map;
782 vm_paddr_t pstart, pend, paddr;
783 int error, i, ma_cnt, mflags, offset;
785 tag = (struct bus_dma_tag_iommu *)dmat;
786 map = (struct bus_dmamap_iommu *)map1;
787 pstart = trunc_page((vm_offset_t)buf);
788 pend = round_page((vm_offset_t)buf + buflen);
789 offset = (vm_offset_t)buf & PAGE_MASK;
790 ma_cnt = OFF_TO_IDX(pend - pstart);
791 mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
792 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
796 for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
797 if (pmap == kernel_pmap)
798 paddr = pmap_kextract(pstart);
800 paddr = pmap_extract(pmap, pstart);
801 ma[i] = PHYS_TO_VM_PAGE(paddr);
802 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
804 * If PHYS_TO_VM_PAGE() returned NULL or the
805 * vm_page was not initialized we'll use a
809 fma = malloc(sizeof(struct vm_page) * ma_cnt,
810 M_DEVBUF, M_ZERO | mflags);
816 vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
820 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
828 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
829 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
831 struct bus_dmamap_iommu *map;
835 map = (struct bus_dmamap_iommu *)map1;
837 map->tag = (struct bus_dma_tag_iommu *)dmat;
838 map->callback = callback;
839 map->callback_arg = callback_arg;
842 static bus_dma_segment_t *
843 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
844 bus_dma_segment_t *segs, int nsegs, int error)
846 struct bus_dma_tag_iommu *tag;
847 struct bus_dmamap_iommu *map;
849 tag = (struct bus_dma_tag_iommu *)dmat;
850 map = (struct bus_dmamap_iommu *)map1;
853 KASSERT(map->cansleep,
854 ("map not locked and not sleepable context %p", map));
857 * We are called from the delayed context. Relock the
860 (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
865 segs = tag->segments;
870 * The limitations of busdma KPI forces the iommu to perform the actual
871 * unload, consisting of the unmapping of the map entries page tables,
872 * from the delayed context on i386, since page table page mapping
873 * might require a sleep to be successfull. The unfortunate
874 * consequence is that the DMA requests can be served some time after
875 * the bus_dmamap_unload() call returned.
877 * On amd64, we assume that sf allocation cannot fail.
880 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
882 struct bus_dma_tag_iommu *tag;
883 struct bus_dmamap_iommu *map;
884 struct iommu_ctx *ctx;
885 struct iommu_domain *domain;
886 #if defined(__amd64__)
887 struct iommu_map_entries_tailq entries;
890 tag = (struct bus_dma_tag_iommu *)dmat;
891 map = (struct bus_dmamap_iommu *)map1;
893 domain = ctx->domain;
894 atomic_add_long(&ctx->unloads, 1);
896 #if defined(__i386__)
897 IOMMU_DOMAIN_LOCK(domain);
898 TAILQ_CONCAT(&domain->unload_entries, &map->map_entries, dmamap_link);
899 IOMMU_DOMAIN_UNLOCK(domain);
900 taskqueue_enqueue(domain->iommu->delayed_taskqueue,
901 &domain->unload_task);
902 #else /* defined(__amd64__) */
903 TAILQ_INIT(&entries);
904 IOMMU_DOMAIN_LOCK(domain);
905 TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
906 IOMMU_DOMAIN_UNLOCK(domain);
907 THREAD_NO_SLEEPING();
908 iommu_domain_unload(domain, &entries, false);
909 THREAD_SLEEPING_OK();
910 KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx));
915 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
920 struct bus_dma_impl bus_dma_iommu_impl = {
921 .tag_create = iommu_bus_dma_tag_create,
922 .tag_destroy = iommu_bus_dma_tag_destroy,
923 .tag_set_domain = iommu_bus_dma_tag_set_domain,
924 .id_mapped = iommu_bus_dma_id_mapped,
925 .map_create = iommu_bus_dmamap_create,
926 .map_destroy = iommu_bus_dmamap_destroy,
927 .mem_alloc = iommu_bus_dmamem_alloc,
928 .mem_free = iommu_bus_dmamem_free,
929 .load_phys = iommu_bus_dmamap_load_phys,
930 .load_buffer = iommu_bus_dmamap_load_buffer,
931 .load_ma = iommu_bus_dmamap_load_ma,
932 .map_waitok = iommu_bus_dmamap_waitok,
933 .map_complete = iommu_bus_dmamap_complete,
934 .map_unload = iommu_bus_dmamap_unload,
935 .map_sync = iommu_bus_dmamap_sync,
939 iommu_bus_task_dmamap(void *arg, int pending)
941 struct bus_dma_tag_iommu *tag;
942 struct bus_dmamap_iommu *map;
943 struct iommu_unit *unit;
947 while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
948 TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
951 map->cansleep = true;
953 bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
954 &map->mem, map->callback, map->callback_arg,
956 map->cansleep = false;
958 (tag->common.lockfunc)(tag->common.lockfuncarg,
962 map->cansleep = false;
969 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map)
974 TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
976 taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
980 iommu_init_busdma(struct iommu_unit *unit)
984 unit->dma_enabled = 1;
985 error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled);
986 if (error == 0) /* compatibility */
987 TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled);
988 TAILQ_INIT(&unit->delayed_maps);
989 TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit);
990 unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK,
991 taskqueue_thread_enqueue, &unit->delayed_taskqueue);
992 taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
993 "iommu%d busdma taskq", unit->unit);
998 iommu_fini_busdma(struct iommu_unit *unit)
1001 if (unit->delayed_taskqueue == NULL)
1004 taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
1005 taskqueue_free(unit->delayed_taskqueue);
1006 unit->delayed_taskqueue = NULL;
1010 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1,
1011 vm_paddr_t start, vm_size_t length, int flags)
1013 struct bus_dma_tag_common *tc;
1014 struct bus_dma_tag_iommu *tag;
1015 struct bus_dmamap_iommu *map;
1016 struct iommu_ctx *ctx;
1017 struct iommu_domain *domain;
1018 struct iommu_map_entry *entry;
1024 MPASS((start & PAGE_MASK) == 0);
1025 MPASS((length & PAGE_MASK) == 0);
1027 MPASS(start + length >= start);
1028 MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0);
1030 tc = (struct bus_dma_tag_common *)dmat;
1031 if (tc->impl != &bus_dma_iommu_impl)
1034 tag = (struct bus_dma_tag_iommu *)dmat;
1036 domain = ctx->domain;
1037 map = (struct bus_dmamap_iommu *)map1;
1038 waitok = (flags & BUS_DMA_NOWAIT) != 0;
1040 entry = iommu_map_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK);
1043 entry->start = start;
1044 entry->end = start + length;
1045 ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
1046 M_WAITOK : M_NOWAIT);
1048 iommu_map_free_entry(domain, entry);
1051 for (i = 0; i < atop(length); i++) {
1052 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
1053 VM_MEMATTR_DEFAULT);
1055 error = iommu_map_region(domain, entry, IOMMU_MAP_ENTRY_READ |
1056 ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE),
1057 waitok ? IOMMU_MF_CANWAIT : 0, ma);
1059 IOMMU_DOMAIN_LOCK(domain);
1060 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
1061 entry->flags |= IOMMU_MAP_ENTRY_MAP;
1062 IOMMU_DOMAIN_UNLOCK(domain);
1064 iommu_domain_unload_entry(entry, true);
1066 for (i = 0; i < atop(length); i++)
1067 vm_page_putfake(ma[i]);