2 * Copyright (c) 2013 The FreeBSD Foundation
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
37 #include <sys/interrupt.h>
38 #include <sys/kernel.h>
40 #include <sys/limits.h>
42 #include <sys/memdesc.h>
43 #include <sys/mutex.h>
45 #include <sys/rwlock.h>
47 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_pager.h>
57 #include <vm/vm_map.h>
58 #include <machine/atomic.h>
59 #include <machine/bus.h>
60 #include <machine/md_var.h>
61 #include <machine/specialreg.h>
62 #include <x86/include/busdma_impl.h>
63 #include <x86/iommu/intel_reg.h>
64 #include <x86/iommu/busdma_dmar.h>
65 #include <x86/iommu/intel_dmar.h>
66 #include <dev/pci/pcireg.h>
67 #include <dev/pci/pcivar.h>
69 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
71 static void dmar_ctx_unload_task(void *arg, int pending);
74 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
77 dmar_root_entry_t *re;
81 * Allocated context page must be linked.
83 ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_NOALLOC);
88 * Page not present, allocate and link. Note that other
89 * thread might execute this sequence in parallel. This
90 * should be safe, because the context entries written by both
93 TD_PREP_PINNED_ASSERT;
94 ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_ZERO |
96 re = dmar_map_pgtbl(dmar->ctx_obj, 0, DMAR_PGF_NOALLOC, &sf);
98 dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
99 VM_PAGE_TO_PHYS(ctxm)));
100 dmar_flush_root_to_ram(dmar, re);
101 dmar_unmap_pgtbl(sf);
105 static dmar_ctx_entry_t *
106 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
108 dmar_ctx_entry_t *ctxp;
110 ctxp = dmar_map_pgtbl(ctx->dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->rid),
111 DMAR_PGF_NOALLOC | DMAR_PGF_WAITOK, sfp);
112 ctxp += ctx->rid & 0xff;
117 ctx_tag_init(struct dmar_ctx *ctx, device_t dev)
121 maxaddr = MIN(ctx->end, BUS_SPACE_MAXADDR);
122 ctx->ctx_tag.common.ref_count = 1; /* Prevent free */
123 ctx->ctx_tag.common.impl = &bus_dma_dmar_impl;
124 ctx->ctx_tag.common.boundary = PCI_DMA_BOUNDARY;
125 ctx->ctx_tag.common.lowaddr = maxaddr;
126 ctx->ctx_tag.common.highaddr = maxaddr;
127 ctx->ctx_tag.common.maxsize = maxaddr;
128 ctx->ctx_tag.common.nsegments = BUS_SPACE_UNRESTRICTED;
129 ctx->ctx_tag.common.maxsegsz = maxaddr;
130 ctx->ctx_tag.ctx = ctx;
131 ctx->ctx_tag.owner = dev;
132 /* XXXKIB initialize tag further */
136 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp)
138 struct dmar_unit *unit;
142 KASSERT(ctxp->ctx1 == 0 && ctxp->ctx2 == 0,
143 ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
144 unit->unit, pci_get_bus(ctx->ctx_tag.owner),
145 pci_get_slot(ctx->ctx_tag.owner),
146 pci_get_function(ctx->ctx_tag.owner),
149 ctxp->ctx2 = DMAR_CTX2_DID(ctx->domain);
150 ctxp->ctx2 |= ctx->awlvl;
151 if ((ctx->flags & DMAR_CTX_IDMAP) != 0 &&
152 (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
153 KASSERT(ctx->pgtbl_obj == NULL,
154 ("ctx %p non-null pgtbl_obj", ctx));
155 dmar_pte_store(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
157 ctx_root = dmar_pgalloc(ctx->pgtbl_obj, 0, DMAR_PGF_NOALLOC);
158 dmar_pte_store(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
159 (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
162 dmar_flush_ctx_to_ram(unit, ctxp);
166 ctx_init_rmrr(struct dmar_ctx *ctx, device_t dev)
168 struct dmar_map_entries_tailq rmrr_entries;
169 struct dmar_map_entry *entry, *entry1;
171 dmar_gaddr_t start, end;
176 TAILQ_INIT(&rmrr_entries);
177 dmar_ctx_parse_rmrr(ctx, dev, &rmrr_entries);
178 TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) {
180 * VT-d specification requires that the start of an
181 * RMRR entry is 4k-aligned. Buggy BIOSes put
182 * anything into the start and end fields. Truncate
183 * and round as neccesary.
185 * We also allow the overlapping RMRR entries, see
186 * dmar_gas_alloc_region().
188 start = entry->start;
190 entry->start = trunc_page(start);
191 entry->end = round_page(end);
192 if (entry->start == entry->end) {
193 /* Workaround for some AMI (?) BIOSes */
195 device_printf(dev, "BIOS bug: dmar%d RMRR "
196 "region (%jx, %jx) corrected\n",
197 ctx->dmar->unit, start, end);
199 entry->end += DMAR_PAGE_SIZE * 0x20;
201 size = OFF_TO_IDX(entry->end - entry->start);
202 ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
203 for (i = 0; i < size; i++) {
204 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
207 error1 = dmar_gas_map_region(ctx, entry, DMAR_MAP_ENTRY_READ |
208 DMAR_MAP_ENTRY_WRITE, DMAR_GM_CANWAIT, ma);
210 * Non-failed RMRR entries are owned by context rb
211 * tree. Get rid of the failed entry, but do not stop
212 * the loop. Rest of the parsed RMRR entries are
213 * loaded and removed on the context destruction.
215 if (error1 == 0 && entry->end != entry->start) {
216 DMAR_LOCK(ctx->dmar);
217 ctx->flags |= DMAR_CTX_RMRR;
218 DMAR_UNLOCK(ctx->dmar);
222 "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
223 ctx->dmar->unit, start, end, error1);
226 TAILQ_REMOVE(&rmrr_entries, entry, unroll_link);
227 dmar_gas_free_entry(ctx, entry);
229 for (i = 0; i < size; i++)
230 vm_page_putfake(ma[i]);
236 static struct dmar_ctx *
237 dmar_get_ctx_alloc(struct dmar_unit *dmar, uint16_t rid)
239 struct dmar_ctx *ctx;
241 ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
242 RB_INIT(&ctx->rb_root);
243 TAILQ_INIT(&ctx->unload_entries);
244 TASK_INIT(&ctx->unload_task, 0, dmar_ctx_unload_task, ctx);
245 mtx_init(&ctx->lock, "dmarctx", NULL, MTX_DEF);
252 dmar_ctx_dtr(struct dmar_ctx *ctx, bool gas_inited, bool pgtbl_inited)
257 dmar_gas_fini_ctx(ctx);
258 DMAR_CTX_UNLOCK(ctx);
261 if (ctx->pgtbl_obj != NULL)
262 DMAR_CTX_PGLOCK(ctx);
265 mtx_destroy(&ctx->lock);
266 free(ctx, M_DMAR_CTX);
270 dmar_get_ctx(struct dmar_unit *dmar, device_t dev, uint16_t rid, bool id_mapped,
273 struct dmar_ctx *ctx, *ctx1;
274 dmar_ctx_entry_t *ctxp;
276 int bus, slot, func, error, mgaw;
279 bus = pci_get_bus(dev);
280 slot = pci_get_slot(dev);
281 func = pci_get_function(dev);
283 TD_PREP_PINNED_ASSERT;
285 ctx = dmar_find_ctx_locked(dmar, rid);
289 * Perform the allocations which require sleep or have
290 * higher chance to succeed if the sleep is allowed.
293 dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
294 ctx1 = dmar_get_ctx_alloc(dmar, rid);
298 * For now, use the maximal usable physical
299 * address of the installed memory to
300 * calculate the mgaw. It is useful for the
301 * identity mapping, and less so for the
302 * virtualized bus address space.
304 ctx1->end = ptoa(Maxmem);
305 mgaw = dmar_maxaddr2mgaw(dmar, ctx1->end, false);
306 error = ctx_set_agaw(ctx1, mgaw);
308 dmar_ctx_dtr(ctx1, false, false);
313 ctx1->end = BUS_SPACE_MAXADDR;
314 mgaw = dmar_maxaddr2mgaw(dmar, ctx1->end, true);
315 error = ctx_set_agaw(ctx1, mgaw);
317 dmar_ctx_dtr(ctx1, false, false);
321 /* Use all supported address space for remapping. */
322 ctx1->end = 1ULL << (ctx1->agaw - 1);
326 dmar_gas_init_ctx(ctx1);
328 if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
329 ctx1->pgtbl_obj = ctx_get_idmap_pgtbl(ctx1,
332 ctx1->flags |= DMAR_CTX_IDMAP;
334 error = ctx_alloc_pgtbl(ctx1);
336 dmar_ctx_dtr(ctx1, true, false);
340 /* Disable local apic region access */
341 error = dmar_gas_reserve_region(ctx1, 0xfee00000,
344 dmar_ctx_dtr(ctx1, true, true);
348 error = ctx_init_rmrr(ctx1, dev);
350 dmar_ctx_dtr(ctx1, true, true);
355 ctxp = dmar_map_ctx_entry(ctx1, &sf);
359 * Recheck the contexts, other thread might have
360 * already allocated needed one.
362 ctx = dmar_find_ctx_locked(dmar, rid);
365 ctx->ctx_tag.owner = dev;
366 ctx->domain = alloc_unrl(dmar->domids);
367 if (ctx->domain == -1) {
369 dmar_unmap_pgtbl(sf);
370 dmar_ctx_dtr(ctx, true, true);
374 ctx_tag_init(ctx, dev);
377 * This is the first activated context for the
378 * DMAR unit. Enable the translation after
379 * everything is set up.
381 if (LIST_EMPTY(&dmar->contexts))
383 LIST_INSERT_HEAD(&dmar->contexts, ctx, link);
384 ctx_id_entry_init(ctx, ctxp);
386 "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
387 "agaw %d %s-mapped\n",
388 dmar->unit, dmar->segment, bus, slot,
389 func, rid, ctx->domain, ctx->mgaw, ctx->agaw,
390 id_mapped ? "id" : "re");
392 dmar_ctx_dtr(ctx1, true, true);
394 dmar_unmap_pgtbl(sf);
397 if ((ctx->flags & DMAR_CTX_RMRR) != 0)
398 ctx->refs++; /* XXXKIB */
401 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
402 * Mode Consideration" and do the (global) invalidation of the
403 * negative TLB entries.
405 if ((dmar->hw_cap & DMAR_CAP_CM) != 0 || enable) {
406 if (dmar->qi_enabled) {
407 dmar_qi_invalidate_ctx_glob_locked(dmar);
408 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0)
409 dmar_qi_invalidate_iotlb_glob_locked(dmar);
411 error = dmar_inv_ctx_glob(dmar);
413 (dmar->hw_ecap & DMAR_ECAP_DI) != 0)
414 error = dmar_inv_iotlb_glob(dmar);
416 dmar_free_ctx_locked(dmar, ctx);
424 * The dmar lock was potentially dropped between check for the
425 * empty context list and now. Recheck the state of GCMD_TE
426 * to avoid unneeded command.
428 if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
429 error = dmar_enable_translation(dmar);
431 dmar_free_ctx_locked(dmar, ctx);
442 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
445 dmar_ctx_entry_t *ctxp;
447 DMAR_ASSERT_LOCKED(dmar);
448 KASSERT(ctx->refs >= 1,
449 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
452 * If our reference is not last, only the dereference should
461 KASSERT((ctx->flags & DMAR_CTX_RMRR) == 0,
462 ("lost ref on RMRR ctx %p", ctx));
463 KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
464 ("lost ref on disabled ctx %p", ctx));
467 * Otherwise, the context entry must be cleared before the
468 * page table is destroyed. The mapping of the context
469 * entries page could require sleep, unlock the dmar.
472 TD_PREP_PINNED_ASSERT;
473 ctxp = dmar_map_ctx_entry(ctx, &sf);
475 KASSERT(ctx->refs >= 1,
476 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
479 * Other thread might have referenced the context, in which
480 * case again only the dereference should be performed.
485 dmar_unmap_pgtbl(sf);
490 KASSERT((ctx->flags & DMAR_CTX_RMRR) == 0,
491 ("lost ref on RMRR ctx %p", ctx));
492 KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
493 ("lost ref on disabled ctx %p", ctx));
496 * Clear the context pointer and flush the caches.
497 * XXXKIB: cannot do this if any RMRR entries are still present.
499 dmar_pte_clear(&ctxp->ctx1);
501 dmar_flush_ctx_to_ram(dmar, ctxp);
502 dmar_inv_ctx_glob(dmar);
503 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
504 if (dmar->qi_enabled)
505 dmar_qi_invalidate_iotlb_glob_locked(dmar);
507 dmar_inv_iotlb_glob(dmar);
509 LIST_REMOVE(ctx, link);
513 * The rest of the destruction is invisible for other users of
516 taskqueue_drain(dmar->delayed_taskqueue, &ctx->unload_task);
517 KASSERT(TAILQ_EMPTY(&ctx->unload_entries),
518 ("unfinished unloads %p", ctx));
519 dmar_unmap_pgtbl(sf);
520 free_unr(dmar->domids, ctx->domain);
521 dmar_ctx_dtr(ctx, true, true);
526 dmar_free_ctx(struct dmar_ctx *ctx)
528 struct dmar_unit *dmar;
532 dmar_free_ctx_locked(dmar, ctx);
536 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
538 struct dmar_ctx *ctx;
540 DMAR_ASSERT_LOCKED(dmar);
542 LIST_FOREACH(ctx, &dmar->contexts, link) {
550 dmar_ctx_free_entry(struct dmar_map_entry *entry, bool free)
552 struct dmar_ctx *ctx;
556 if ((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0)
557 dmar_gas_free_region(ctx, entry);
559 dmar_gas_free_space(ctx, entry);
560 DMAR_CTX_UNLOCK(ctx);
562 dmar_gas_free_entry(ctx, entry);
568 dmar_ctx_unload_entry(struct dmar_map_entry *entry, bool free)
570 struct dmar_unit *unit;
572 unit = entry->ctx->dmar;
573 if (unit->qi_enabled) {
575 dmar_qi_invalidate_locked(entry->ctx, entry->start,
576 entry->end - entry->start, &entry->gseq);
578 entry->flags |= DMAR_MAP_ENTRY_QI_NF;
579 TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
582 ctx_flush_iotlb_sync(entry->ctx, entry->start, entry->end -
584 dmar_ctx_free_entry(entry, free);
589 dmar_ctx_unload(struct dmar_ctx *ctx, struct dmar_map_entries_tailq *entries,
592 struct dmar_unit *unit;
593 struct dmar_map_entry *entry, *entry1;
594 struct dmar_qi_genseq gseq;
599 TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
600 KASSERT((entry->flags & DMAR_MAP_ENTRY_MAP) != 0,
601 ("not mapped entry %p %p", ctx, entry));
602 error = ctx_unmap_buf(ctx, entry->start, entry->end -
603 entry->start, cansleep ? DMAR_PGF_WAITOK : 0);
604 KASSERT(error == 0, ("unmap %p error %d", ctx, error));
605 if (!unit->qi_enabled) {
606 ctx_flush_iotlb_sync(ctx, entry->start,
607 entry->end - entry->start);
608 TAILQ_REMOVE(entries, entry, dmamap_link);
609 dmar_ctx_free_entry(entry, true);
612 if (TAILQ_EMPTY(entries))
615 KASSERT(unit->qi_enabled, ("loaded entry left"));
617 TAILQ_FOREACH(entry, entries, dmamap_link) {
620 dmar_qi_invalidate_locked(ctx, entry->start, entry->end -
621 entry->start, TAILQ_NEXT(entry, dmamap_link) == NULL ?
624 TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
626 TAILQ_REMOVE(entries, entry, dmamap_link);
627 TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
633 dmar_ctx_unload_task(void *arg, int pending)
635 struct dmar_ctx *ctx;
636 struct dmar_map_entries_tailq entries;
639 TAILQ_INIT(&entries);
643 TAILQ_SWAP(&ctx->unload_entries, &entries, dmar_map_entry,
645 DMAR_CTX_UNLOCK(ctx);
646 if (TAILQ_EMPTY(&entries))
648 dmar_ctx_unload(ctx, &entries, true);