2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2013 The FreeBSD Foundation
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 #define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry)
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
43 #include <sys/rwlock.h>
44 #include <sys/memdesc.h>
45 #include <sys/mutex.h>
46 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
53 #include <vm/vm_extern.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_map.h>
59 #include <dev/pci/pcireg.h>
60 #include <dev/pci/pcivar.h>
61 #include <dev/iommu/iommu.h>
62 #include <dev/iommu/iommu_gas.h>
63 #include <dev/iommu/iommu_msi.h>
64 #include <machine/atomic.h>
65 #include <machine/bus.h>
66 #include <machine/md_var.h>
67 #include <machine/iommu.h>
68 #include <dev/iommu/busdma_iommu.h>
71 * Guest Address Space management.
74 static uma_zone_t iommu_map_entry_zone;
77 static int iommu_check_free;
84 iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY",
85 sizeof(struct iommu_map_entry), NULL, NULL,
86 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP);
88 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL);
90 struct iommu_map_entry *
91 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags)
93 struct iommu_map_entry *res;
95 KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0,
96 ("unsupported flags %x", flags));
98 res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) !=
99 0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
100 if (res != NULL && domain != NULL) {
101 res->domain = domain;
102 atomic_add_int(&domain->entries_cnt, 1);
108 iommu_gas_free_entry(struct iommu_map_entry *entry)
110 struct iommu_domain *domain;
112 domain = entry->domain;
114 atomic_subtract_int(&domain->entries_cnt, 1);
115 uma_zfree(iommu_map_entry_zone, entry);
119 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b)
122 /* Last entry have zero size, so <= */
123 KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)",
124 a, (uintmax_t)a->start, (uintmax_t)a->end));
125 KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)",
126 b, (uintmax_t)b->start, (uintmax_t)b->end));
127 KASSERT(a->end <= b->start || b->end <= a->start ||
128 a->end == a->start || b->end == b->start,
129 ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)",
130 a, (uintmax_t)a->start, (uintmax_t)a->end,
131 b, (uintmax_t)b->start, (uintmax_t)b->end));
135 else if (b->end < a->end)
141 * Update augmentation data based on data from children.
142 * Return true if and only if the update changes the augmentation data.
145 iommu_gas_augment_entry(struct iommu_map_entry *entry)
147 struct iommu_map_entry *child;
148 iommu_gaddr_t bound, delta, free_down;
151 bound = entry->start;
152 if ((child = RB_LEFT(entry, rb_entry)) != NULL) {
153 free_down = MAX(child->free_down, bound - child->last);
154 bound = child->first;
156 delta = bound - entry->first;
157 entry->first = bound;
159 if ((child = RB_RIGHT(entry, rb_entry)) != NULL) {
160 free_down = MAX(free_down, child->free_down);
161 free_down = MAX(free_down, child->first - bound);
164 delta += entry->last - bound;
166 delta = entry->free_down - free_down;
168 entry->free_down = free_down;
171 * Return true either if the value of last-first changed,
172 * or if free_down changed.
177 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry,
178 iommu_gas_cmp_entries);
182 iommu_gas_check_free(struct iommu_domain *domain)
184 struct iommu_map_entry *entry, *l, *r;
187 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
188 KASSERT(domain == entry->domain,
189 ("mismatched free domain %p entry %p entry->domain %p",
190 domain, entry, entry->domain));
191 l = RB_LEFT(entry, rb_entry);
192 r = RB_RIGHT(entry, rb_entry);
195 v = MAX(v, l->free_down);
196 v = MAX(v, entry->start - l->last);
199 v = MAX(v, r->free_down);
200 v = MAX(v, r->first - entry->end);
202 MPASS(entry->free_down == v);
208 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry)
210 struct iommu_map_entry *nbr;
212 /* Removing entry may open a new free gap before domain->start_gap. */
213 if (entry->end <= domain->start_gap->end) {
214 if (RB_RIGHT(entry, rb_entry) != NULL)
215 nbr = iommu_gas_entries_tree_RB_NEXT(entry);
216 else if (RB_LEFT(entry, rb_entry) != NULL)
217 nbr = RB_LEFT(entry, rb_entry);
219 nbr = RB_PARENT(entry, rb_entry);
220 domain->start_gap = nbr;
222 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry);
225 struct iommu_domain *
226 iommu_get_ctx_domain(struct iommu_ctx *ctx)
229 return (ctx->domain);
233 iommu_gas_init_domain(struct iommu_domain *domain)
235 struct iommu_map_entry *begin, *end;
237 begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
238 end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
240 IOMMU_DOMAIN_LOCK(domain);
241 KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain));
242 KASSERT(RB_EMPTY(&domain->rb_root),
243 ("non-empty entries %p", domain));
246 * The end entry must be inserted first because it has a zero-length gap
247 * between start and end. Initially, all augmentation data for a new
248 * entry is zero. Function iommu_gas_augment_entry will compute no
249 * change in the value of (start-end) and no change in the value of
250 * free_down, so it will return false to suggest that nothing changed in
251 * the entry. Thus, inserting the end entry second prevents
252 * augmentation information to be propogated to the begin entry at the
253 * tree root. So it is inserted first.
255 end->start = domain->end;
256 end->end = domain->end;
257 end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
258 RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, end);
261 begin->end = IOMMU_PAGE_SIZE;
262 begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
263 RB_INSERT_PREV(iommu_gas_entries_tree, &domain->rb_root, end, begin);
265 domain->start_gap = begin;
266 domain->first_place = begin;
267 domain->last_place = end;
268 domain->flags |= IOMMU_DOMAIN_GAS_INITED;
269 IOMMU_DOMAIN_UNLOCK(domain);
273 iommu_gas_fini_domain(struct iommu_domain *domain)
275 struct iommu_map_entry *entry;
277 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
278 KASSERT(domain->entries_cnt == 2,
279 ("domain still in use %p", domain));
281 entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root);
282 KASSERT(entry->start == 0, ("start entry start %p", domain));
283 KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain));
284 KASSERT(entry->flags ==
285 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
286 ("start entry flags %p", domain));
287 iommu_gas_rb_remove(domain, entry);
288 iommu_gas_free_entry(entry);
290 entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root);
291 KASSERT(entry->start == domain->end, ("end entry start %p", domain));
292 KASSERT(entry->end == domain->end, ("end entry end %p", domain));
293 KASSERT(entry->flags ==
294 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
295 ("end entry flags %p", domain));
296 iommu_gas_rb_remove(domain, entry);
297 iommu_gas_free_entry(entry);
300 struct iommu_gas_match_args {
303 const struct bus_dma_tag_common *common;
305 struct iommu_map_entry *entry;
309 * The interval [beg, end) is a free interval between two iommu_map_entries.
310 * Addresses can be allocated only in the range [lbound, ubound]. Try to
311 * allocate space in the free interval, subject to the conditions expressed by
312 * a, and return 'true' if and only if the allocation attempt succeeds.
315 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg,
316 iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound)
318 struct iommu_map_entry *entry;
319 iommu_gaddr_t first, size, start;
323 * The prev->end is always aligned on the page size, which
324 * causes page alignment for the entry->start too.
326 * Create IOMMU_PAGE_SIZE gaps before, after new entry
327 * to ensure that out-of-bounds accesses fault.
329 beg = MAX(beg + IOMMU_PAGE_SIZE, lbound);
330 start = roundup2(beg, a->common->alignment);
333 end = MIN(end - IOMMU_PAGE_SIZE - 1, ubound);
336 if (start + offset + size - 1 > end)
339 /* Check for and try to skip past boundary crossing. */
340 if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) {
342 * The start + offset to start + offset + size region crosses
343 * the boundary. Check if there is enough space after the next
344 * boundary after the beg.
347 beg = roundup2(start + offset + 1, a->common->boundary);
348 start = roundup2(beg, a->common->alignment);
350 if (start + offset + size - 1 > end ||
351 !vm_addr_bound_ok(start + offset, size,
352 a->common->boundary)) {
354 * Not enough space to align at the requested boundary,
355 * or boundary is smaller than the size, but allowed to
356 * split. We already checked that start + size does not
359 * XXXKIB. It is possible that beg is exactly at the
360 * start of the next entry, then we do not have gap.
363 if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0)
365 size = beg - first - offset;
370 entry->start = start;
371 entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE);
372 entry->flags = IOMMU_MAP_ENTRY_MAP;
376 /* Find the next entry that might abut a big-enough range. */
377 static struct iommu_map_entry *
378 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free)
380 struct iommu_map_entry *next;
382 if ((next = RB_RIGHT(curr, rb_entry)) != NULL &&
383 next->free_down >= min_free) {
384 /* Find next entry in right subtree. */
387 while ((next = RB_LEFT(curr, rb_entry)) != NULL &&
388 next->free_down >= min_free);
390 /* Find next entry in a left-parent ancestor. */
391 while ((next = RB_PARENT(curr, rb_entry)) != NULL &&
392 curr == RB_RIGHT(next, rb_entry))
400 * Address-ordered first-fit search of 'domain' for free space satisfying the
401 * conditions of 'a'. The space allocated is at least one page big, and is
402 * bounded by guard pages to the left and right. The allocated space for
403 * 'domain' is described by an rb-tree of map entries at domain->rb_root, and
404 * domain->start_gap points to a map entry less than or adjacent to the first
405 * free-space of size at least 3 pages.
408 iommu_gas_find_space(struct iommu_domain *domain,
409 struct iommu_gas_match_args *a)
411 struct iommu_map_entry *curr, *first;
412 iommu_gaddr_t addr, min_free;
414 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
415 KASSERT(a->entry->flags == 0,
416 ("dirty entry %p %p", domain, a->entry));
419 * start_gap may point to an entry adjacent to gaps too small for any
420 * new allocation. In that case, advance start_gap to the first free
421 * space big enough for a minimum allocation plus two guard pages.
423 min_free = 3 * IOMMU_PAGE_SIZE;
424 first = domain->start_gap;
425 while (first != NULL && first->free_down < min_free)
426 first = RB_PARENT(first, rb_entry);
427 for (curr = first; curr != NULL;
428 curr = iommu_gas_next(curr, min_free)) {
429 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
430 first->last + min_free <= curr->start)
432 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
433 curr->end + min_free <= first->first)
436 domain->start_gap = curr;
439 * If the subtree doesn't have free space for the requested allocation
440 * plus two guard pages, skip it.
442 min_free = 2 * IOMMU_PAGE_SIZE +
443 roundup2(a->size + a->offset, IOMMU_PAGE_SIZE);
445 /* Climb to find a node in the subtree of big-enough ranges. */
447 while (first != NULL && first->free_down < min_free)
448 first = RB_PARENT(first, rb_entry);
451 * Walk the big-enough ranges tree until one satisfies alignment
452 * requirements, or violates lowaddr address requirement.
454 addr = a->common->lowaddr;
455 for (curr = first; curr != NULL;
456 curr = iommu_gas_next(curr, min_free)) {
457 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
458 iommu_gas_match_one(a, first->last, curr->start,
460 RB_INSERT_PREV(iommu_gas_entries_tree,
461 &domain->rb_root, curr, a->entry);
464 if (curr->end >= addr) {
465 /* All remaining ranges > addr */
468 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
469 iommu_gas_match_one(a, curr->end, first->first,
471 RB_INSERT_NEXT(iommu_gas_entries_tree,
472 &domain->rb_root, curr, a->entry);
478 * To resume the search at the start of the upper region, first climb to
479 * the nearest ancestor that spans highaddr. Then find the last entry
480 * before highaddr that could abut a big-enough range.
482 addr = a->common->highaddr;
483 while (curr != NULL && curr->last < addr)
484 curr = RB_PARENT(curr, rb_entry);
486 while (curr != NULL && curr->free_down >= min_free) {
487 if (addr < curr->end)
488 curr = RB_LEFT(curr, rb_entry);
491 curr = RB_RIGHT(curr, rb_entry);
496 * Walk the remaining big-enough ranges until one satisfies alignment
499 for (curr = first; curr != NULL;
500 curr = iommu_gas_next(curr, min_free)) {
501 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
502 iommu_gas_match_one(a, first->last, curr->start,
503 addr + 1, domain->end - 1)) {
504 RB_INSERT_PREV(iommu_gas_entries_tree,
505 &domain->rb_root, curr, a->entry);
508 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
509 iommu_gas_match_one(a, curr->end, first->first,
510 addr + 1, domain->end - 1)) {
511 RB_INSERT_NEXT(iommu_gas_entries_tree,
512 &domain->rb_root, curr, a->entry);
521 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
524 struct iommu_map_entry *next, *prev;
526 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
528 if ((entry->start & IOMMU_PAGE_MASK) != 0 ||
529 (entry->end & IOMMU_PAGE_MASK) != 0)
531 if (entry->start >= entry->end)
533 if (entry->end >= domain->end)
536 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry);
537 KASSERT(next != NULL, ("next must be non-null %p %jx", domain,
538 (uintmax_t)entry->start));
539 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
540 /* prev could be NULL */
543 * Adapt to broken BIOSes which specify overlapping RMRR
546 * XXXKIB: this does not handle a case when prev or next
547 * entries are completely covered by the current one, which
550 if (prev != NULL && prev->end > entry->start &&
551 (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
552 if ((flags & IOMMU_MF_RMRR) == 0 ||
553 (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
555 entry->start = prev->end;
557 if (next->start < entry->end &&
558 (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
559 if ((flags & IOMMU_MF_RMRR) == 0 ||
560 (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
562 entry->end = next->start;
564 if (entry->end == entry->start)
567 if (prev != NULL && prev->end > entry->start) {
568 /* This assumes that prev is the placeholder entry. */
569 iommu_gas_rb_remove(domain, prev);
572 RB_INSERT_PREV(iommu_gas_entries_tree,
573 &domain->rb_root, next, entry);
574 if (next->start < entry->end) {
575 iommu_gas_rb_remove(domain, next);
579 if ((flags & IOMMU_MF_RMRR) != 0)
580 entry->flags = IOMMU_MAP_ENTRY_RMRR;
583 struct iommu_map_entry *ip, *in;
584 ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry);
585 in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry);
586 KASSERT(prev == NULL || ip == prev,
587 ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)",
588 entry, entry->start, entry->end, prev,
589 prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end,
590 ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end));
591 KASSERT(next == NULL || in == next,
592 ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)",
593 entry, entry->start, entry->end, next,
594 next == NULL ? 0 : next->start, next == NULL ? 0 : next->end,
595 in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end));
602 iommu_gas_free_space(struct iommu_map_entry *entry)
604 struct iommu_domain *domain;
606 domain = entry->domain;
607 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
608 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP,
609 ("permanent entry %p %p", domain, entry));
611 IOMMU_DOMAIN_LOCK(domain);
612 iommu_gas_rb_remove(domain, entry);
613 entry->flags &= ~IOMMU_MAP_ENTRY_MAP;
615 if (iommu_check_free)
616 iommu_gas_check_free(domain);
618 IOMMU_DOMAIN_UNLOCK(domain);
622 iommu_gas_free_region(struct iommu_map_entry *entry)
624 struct iommu_domain *domain;
626 domain = entry->domain;
627 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
628 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR,
629 ("non-RMRR entry %p %p", domain, entry));
631 IOMMU_DOMAIN_LOCK(domain);
632 if (entry != domain->first_place &&
633 entry != domain->last_place)
634 iommu_gas_rb_remove(domain, entry);
635 entry->flags &= ~IOMMU_MAP_ENTRY_RMRR;
636 IOMMU_DOMAIN_UNLOCK(domain);
639 static struct iommu_map_entry *
640 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start,
641 iommu_gaddr_t end, struct iommu_map_entry **r)
643 struct iommu_map_entry *entry, *res, fentry;
645 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
647 MPASS(end <= domain->end);
650 * Find an entry which contains the supplied guest's address
651 * start, or the first entry after the start. Since we
652 * asserted that start is below domain end, entry should
653 * exist. Then clip it if needed.
655 fentry.start = start + 1;
656 fentry.end = start + 1;
657 entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry);
659 if (entry->start >= start ||
660 (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
666 res->start = entry->end = start;
667 RB_UPDATE_AUGMENT(entry, rb_entry);
668 RB_INSERT_NEXT(iommu_gas_entries_tree,
669 &domain->rb_root, entry, res);
674 iommu_gas_remove_clip_right(struct iommu_domain *domain,
675 iommu_gaddr_t end, struct iommu_map_entry *entry,
676 struct iommu_map_entry *r)
678 if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
682 r->end = entry->start = end;
683 RB_UPDATE_AUGMENT(entry, rb_entry);
684 RB_INSERT_PREV(iommu_gas_entries_tree,
685 &domain->rb_root, entry, r);
690 iommu_gas_remove_unmap(struct iommu_domain *domain,
691 struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp)
693 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
695 if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED |
696 IOMMU_MAP_ENTRY_REMOVING)) != 0)
698 MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0);
699 entry->flags |= IOMMU_MAP_ENTRY_REMOVING;
700 TAILQ_INSERT_TAIL(gcp, entry, dmamap_link);
704 * Remove specified range from the GAS of the domain. Note that the
705 * removal is not guaranteed to occur upon the function return, it
706 * might be finalized some time after, when hardware reports that
707 * (queued) IOTLB invalidation was performed.
710 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start,
713 struct iommu_map_entry *entry, *nentry, *r1, *r2;
714 struct iommu_map_entries_tailq gc;
718 r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
719 r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
722 IOMMU_DOMAIN_LOCK(domain);
724 nentry = iommu_gas_remove_clip_left(domain, start, end, &r1);
725 RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) {
726 if (entry->start >= end)
728 KASSERT(start <= entry->start,
729 ("iommu_gas_remove entry (%#jx, %#jx) start %#jx",
730 entry->start, entry->end, start));
731 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
733 iommu_gas_remove_unmap(domain, entry, &gc);
735 if (iommu_gas_remove_clip_right(domain, end, entry, r2)) {
736 iommu_gas_remove_unmap(domain, r2, &gc);
741 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
742 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
744 KASSERT(entry->end <= start || entry->start >= end,
745 ("iommu_gas_remove leftover entry (%#jx, %#jx) range "
747 entry->start, entry->end, start, end));
751 IOMMU_DOMAIN_UNLOCK(domain);
753 iommu_gas_free_entry(r1);
755 iommu_gas_free_entry(r2);
756 iommu_domain_unload(domain, &gc, true);
760 iommu_gas_map(struct iommu_domain *domain,
761 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
762 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res)
764 struct iommu_gas_match_args a;
765 struct iommu_map_entry *entry;
768 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0,
769 ("invalid flags 0x%x", flags));
775 entry = iommu_gas_alloc_entry(domain,
776 (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0);
780 IOMMU_DOMAIN_LOCK(domain);
781 error = iommu_gas_find_space(domain, &a);
782 if (error == ENOMEM) {
783 IOMMU_DOMAIN_UNLOCK(domain);
784 iommu_gas_free_entry(entry);
788 if (iommu_check_free)
789 iommu_gas_check_free(domain);
792 ("unexpected error %d from iommu_gas_find_entry", error));
793 KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx",
794 (uintmax_t)entry->end, (uintmax_t)domain->end));
795 entry->flags |= eflags;
796 IOMMU_DOMAIN_UNLOCK(domain);
798 error = domain->ops->map(domain, entry->start,
799 entry->end - entry->start, ma, eflags,
800 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
801 if (error == ENOMEM) {
802 iommu_domain_unload_entry(entry, true,
803 (flags & IOMMU_MF_CANWAIT) != 0);
807 ("unexpected error %d from domain_map_buf", error));
814 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
815 u_int eflags, u_int flags, vm_page_t *ma)
820 KASSERT(entry->domain == domain,
821 ("mismatched domain %p entry %p entry->domain %p", domain,
822 entry, entry->domain));
823 KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain,
824 entry, entry->flags));
825 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0,
826 ("invalid flags 0x%x", flags));
828 start = entry->start;
829 IOMMU_DOMAIN_LOCK(domain);
830 error = iommu_gas_alloc_region(domain, entry, flags);
832 IOMMU_DOMAIN_UNLOCK(domain);
835 entry->flags |= eflags;
836 IOMMU_DOMAIN_UNLOCK(domain);
837 if (entry->end == entry->start)
840 error = domain->ops->map(domain, entry->start,
841 entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start),
842 eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
843 if (error == ENOMEM) {
844 iommu_domain_unload_entry(entry, false,
845 (flags & IOMMU_MF_CANWAIT) != 0);
849 ("unexpected error %d from domain_map_buf", error));
855 iommu_gas_reserve_region_locked(struct iommu_domain *domain,
856 iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry)
860 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
862 entry->start = start;
864 error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT);
866 entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED;
871 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start,
872 iommu_gaddr_t end, struct iommu_map_entry **entry0)
874 struct iommu_map_entry *entry;
877 entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
878 IOMMU_DOMAIN_LOCK(domain);
879 error = iommu_gas_reserve_region_locked(domain, start, end, entry);
880 IOMMU_DOMAIN_UNLOCK(domain);
882 iommu_gas_free_entry(entry);
883 else if (entry0 != NULL)
889 * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing
893 iommu_gas_reserve_region_extend(struct iommu_domain *domain,
894 iommu_gaddr_t start, iommu_gaddr_t end)
896 struct iommu_map_entry *entry, *next, *prev, key = {};
897 iommu_gaddr_t entry_start, entry_end;
902 end = ummin(end, domain->end);
903 while (start < end) {
904 /* Preallocate an entry. */
906 entry = iommu_gas_alloc_entry(domain,
908 /* Calculate the free region from here to the next entry. */
909 key.start = key.end = start;
910 IOMMU_DOMAIN_LOCK(domain);
911 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key);
912 KASSERT(next != NULL, ("domain %p with end %#jx has no entry "
913 "after %#jx", domain, (uintmax_t)domain->end,
915 entry_end = ummin(end, next->start);
916 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
918 entry_start = ummax(start, prev->end);
922 /* Reserve the region if non-empty. */
923 if (entry_start != entry_end) {
924 error = iommu_gas_reserve_region_locked(domain,
925 entry_start, entry_end, entry);
927 IOMMU_DOMAIN_UNLOCK(domain);
932 IOMMU_DOMAIN_UNLOCK(domain);
934 /* Release a preallocated entry if it was not used. */
936 iommu_gas_free_entry(entry);
941 iommu_unmap_msi(struct iommu_ctx *ctx)
943 struct iommu_map_entry *entry;
944 struct iommu_domain *domain;
946 domain = ctx->domain;
947 entry = domain->msi_entry;
951 domain->ops->unmap(domain, entry->start, entry->end -
952 entry->start, IOMMU_PGF_WAITOK);
954 iommu_gas_free_space(entry);
956 iommu_gas_free_entry(entry);
958 domain->msi_entry = NULL;
959 domain->msi_base = 0;
960 domain->msi_phys = 0;
964 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset,
965 u_int eflags, u_int flags, vm_page_t *ma)
967 struct iommu_domain *domain;
968 struct iommu_map_entry *entry;
972 domain = ctx->domain;
974 /* Check if there is already an MSI page allocated */
975 IOMMU_DOMAIN_LOCK(domain);
976 entry = domain->msi_entry;
977 IOMMU_DOMAIN_UNLOCK(domain);
980 error = iommu_gas_map(domain, &ctx->tag->common, size, offset,
981 eflags, flags, ma, &entry);
982 IOMMU_DOMAIN_LOCK(domain);
984 if (domain->msi_entry == NULL) {
985 MPASS(domain->msi_base == 0);
986 MPASS(domain->msi_phys == 0);
988 domain->msi_entry = entry;
989 domain->msi_base = entry->start;
990 domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]);
993 * We lost the race and already have an
994 * MSI page allocated. Free the unneeded entry.
996 iommu_gas_free_entry(entry);
998 } else if (domain->msi_entry != NULL) {
1000 * The allocation failed, but another succeeded.
1001 * Return success as there is a valid MSI page.
1005 IOMMU_DOMAIN_UNLOCK(domain);
1012 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr)
1015 *addr = (*addr - domain->msi_phys) + domain->msi_base;
1017 KASSERT(*addr >= domain->msi_entry->start,
1018 ("%s: Address is below the MSI entry start address (%jx < %jx)",
1019 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start));
1021 KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end,
1022 ("%s: Address is above the MSI entry end address (%jx < %jx)",
1023 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end));
1026 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "");
1029 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN,
1030 &iommu_check_free, 0,
1031 "Check the GPA RBtree for free_down and free_after validity");