2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2016, Anish Gupta (anish@freebsd.org)
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/malloc.h>
41 #include <sys/sysctl.h>
46 #include <dev/pci/pcivar.h>
47 #include <dev/pci/pcireg.h>
49 #include <machine/resource.h>
50 #include <machine/vmm.h>
51 #include <machine/pmap.h>
52 #include <machine/vmparam.h>
53 #include <machine/pci_cfgreg.h>
58 #include "amdvi_priv.h"
61 SYSCTL_NODE(_hw_vmm, OID_AUTO, amdvi, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
64 #define MOD_INC(a, s, m) (((a) + (s)) % ((m) * (s)))
65 #define MOD_DEC(a, s, m) (((a) - (s)) % ((m) * (s)))
67 /* Print RID or device ID in PCI string format. */
68 #define RID2PCI_STR(d) PCI_RID2BUS(d), PCI_RID2SLOT(d), PCI_RID2FUNC(d)
70 static void amdvi_dump_cmds(struct amdvi_softc *softc, int count);
71 static void amdvi_print_dev_cap(struct amdvi_softc *softc);
73 MALLOC_DEFINE(M_AMDVI, "amdvi", "amdvi");
75 extern device_t *ivhd_devs;
77 extern int ivhd_count;
78 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, count, CTLFLAG_RDTUN, &ivhd_count,
81 static int amdvi_enable_user = 0;
82 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, enable, CTLFLAG_RDTUN,
83 &amdvi_enable_user, 0, NULL);
84 TUNABLE_INT("hw.vmm.amdvi_enable", &amdvi_enable_user);
86 #ifdef AMDVI_ATS_ENABLE
87 /* XXX: ATS is not tested. */
88 static int amdvi_enable_iotlb = 1;
89 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, iotlb_enabled, CTLFLAG_RDTUN,
90 &amdvi_enable_iotlb, 0, NULL);
91 TUNABLE_INT("hw.vmm.enable_iotlb", &amdvi_enable_iotlb);
94 static int amdvi_host_ptp = 1; /* Use page tables for host. */
95 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, host_ptp, CTLFLAG_RDTUN,
96 &amdvi_host_ptp, 0, NULL);
97 TUNABLE_INT("hw.vmm.amdvi.host_ptp", &amdvi_host_ptp);
99 /* Page table level used <= supported by h/w[v1=7]. */
100 int amdvi_ptp_level = 4;
101 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, ptp_level, CTLFLAG_RDTUN,
102 &amdvi_ptp_level, 0, NULL);
103 TUNABLE_INT("hw.vmm.amdvi.ptp_level", &amdvi_ptp_level);
105 /* Disable fault event reporting. */
106 static int amdvi_disable_io_fault = 0;
107 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, disable_io_fault, CTLFLAG_RDTUN,
108 &amdvi_disable_io_fault, 0, NULL);
109 TUNABLE_INT("hw.vmm.amdvi.disable_io_fault", &amdvi_disable_io_fault);
111 static uint32_t amdvi_dom_id = 0; /* 0 is reserved for host. */
112 SYSCTL_UINT(_hw_vmm_amdvi, OID_AUTO, domain_id, CTLFLAG_RD,
113 &amdvi_dom_id, 0, NULL);
115 * Device table entry.
116 * Bus(256) x Dev(32) x Fun(8) x DTE(256 bits or 32 bytes).
117 * = 256 * 2 * PAGE_SIZE.
119 static struct amdvi_dte amdvi_dte[PCI_NUM_DEV_MAX] __aligned(PAGE_SIZE);
120 CTASSERT(PCI_NUM_DEV_MAX == 0x10000);
121 CTASSERT(sizeof(amdvi_dte) == 0x200000);
123 static SLIST_HEAD (, amdvi_domain) dom_head;
125 static inline uint32_t
126 amdvi_pci_read(struct amdvi_softc *softc, int off)
129 return (pci_cfgregread(PCI_RID2BUS(softc->pci_rid),
130 PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid),
134 #ifdef AMDVI_ATS_ENABLE
135 /* XXX: Should be in pci.c */
137 * Check if device has ATS capability and its enabled.
138 * If ATS is absent or disabled, return (-1), otherwise ATS
142 amdvi_find_ats_qlen(uint16_t devid)
148 dev = pci_find_bsf(PCI_RID2BUS(devid), PCI_RID2SLOT(devid),
149 PCI_RID2FUNC(devid));
154 #define PCIM_ATS_EN BIT(31)
156 if (pci_find_extcap(dev, PCIZ_ATS, &off) == 0) {
157 cap = pci_read_config(dev, off + 4, 4);
159 qlen = qlen ? qlen : 32;
160 printf("AMD-Vi: PCI device %d.%d.%d ATS %s qlen=%d\n",
162 (cap & PCIM_ATS_EN) ? "enabled" : "Disabled",
164 qlen = (cap & PCIM_ATS_EN) ? qlen : -1;
171 * Check if an endpoint device support device IOTLB or ATS.
174 amdvi_dev_support_iotlb(struct amdvi_softc *softc, uint16_t devid)
176 struct ivhd_dev_cfg *cfg;
178 bool pci_ats, ivhd_ats;
180 qlen = amdvi_find_ats_qlen(devid);
184 KASSERT(softc, ("softc is NULL"));
185 cfg = softc->dev_cfg;
188 for (i = 0; i < softc->dev_cfg_cnt; i++) {
189 if ((cfg->start_id <= devid) && (cfg->end_id >= devid)) {
190 ivhd_ats = cfg->enable_ats;
196 pci_ats = (qlen < 0) ? false : true;
197 if (pci_ats != ivhd_ats)
198 device_printf(softc->dev,
199 "BIOS bug: mismatch in ATS setting for %d.%d.%d,"
200 "ATS inv qlen = %d\n", RID2PCI_STR(devid), qlen);
202 /* Ignore IVRS setting and respect PCI setting. */
207 /* Enable IOTLB support for IOMMU if its supported. */
209 amdvi_hw_enable_iotlb(struct amdvi_softc *softc)
211 #ifndef AMDVI_ATS_ENABLE
212 softc->iotlb = false;
216 supported = (softc->ivhd_flag & IVHD_FLAG_IOTLB) ? true : false;
218 if (softc->pci_cap & AMDVI_PCI_CAP_IOTLB) {
220 device_printf(softc->dev, "IOTLB disabled by BIOS.\n");
222 if (supported && !amdvi_enable_iotlb) {
223 device_printf(softc->dev, "IOTLB disabled by user.\n");
229 softc->iotlb = supported;
235 amdvi_init_cmd(struct amdvi_softc *softc)
237 struct amdvi_ctrl *ctrl = softc->ctrl;
239 ctrl->cmd.len = 8; /* Use 256 command buffer entries. */
240 softc->cmd_max = 1 << ctrl->cmd.len;
242 softc->cmd = malloc(sizeof(struct amdvi_cmd) *
243 softc->cmd_max, M_AMDVI, M_WAITOK | M_ZERO);
245 if ((uintptr_t)softc->cmd & PAGE_MASK)
246 panic("AMDVi: Command buffer not aligned on page boundary.");
248 ctrl->cmd.base = vtophys(softc->cmd) / PAGE_SIZE;
250 * XXX: Reset the h/w pointers in case IOMMU is restarting,
251 * h/w doesn't clear these pointers based on empirical data.
260 * Note: Update tail pointer after we have written the command since tail
261 * pointer update cause h/w to execute new commands, see section 3.3
262 * of AMD IOMMU spec ver 2.0.
264 /* Get the command tail pointer w/o updating it. */
265 static struct amdvi_cmd *
266 amdvi_get_cmd_tail(struct amdvi_softc *softc)
268 struct amdvi_ctrl *ctrl;
269 struct amdvi_cmd *tail;
271 KASSERT(softc, ("softc is NULL"));
272 KASSERT(softc->cmd != NULL, ("cmd is NULL"));
275 KASSERT(ctrl != NULL, ("ctrl is NULL"));
277 tail = (struct amdvi_cmd *)((uint8_t *)softc->cmd +
284 * Update the command tail pointer which will start command execution.
287 amdvi_update_cmd_tail(struct amdvi_softc *softc)
289 struct amdvi_ctrl *ctrl;
292 size = sizeof(struct amdvi_cmd);
293 KASSERT(softc->cmd != NULL, ("cmd is NULL"));
296 KASSERT(ctrl != NULL, ("ctrl is NULL"));
298 ctrl->cmd_tail = MOD_INC(ctrl->cmd_tail, size, softc->cmd_max);
301 #ifdef AMDVI_DEBUG_CMD
302 device_printf(softc->dev, "cmd_tail: %s Tail:0x%x, Head:0x%x.\n",
310 * Various commands supported by IOMMU.
313 /* Completion wait command. */
315 amdvi_cmd_cmp(struct amdvi_softc *softc, const uint64_t data)
317 struct amdvi_cmd *cmd;
320 cmd = amdvi_get_cmd_tail(softc);
321 KASSERT(cmd != NULL, ("Cmd is NULL"));
323 pa = vtophys(&softc->cmp_data);
324 cmd->opcode = AMDVI_CMP_WAIT_OPCODE;
325 cmd->word0 = (pa & 0xFFFFFFF8) | AMDVI_CMP_WAIT_STORE;
326 cmd->word1 = (pa >> 32) & 0xFFFFF;
329 amdvi_update_cmd_tail(softc);
332 /* Invalidate device table entry. */
334 amdvi_cmd_inv_dte(struct amdvi_softc *softc, uint16_t devid)
336 struct amdvi_cmd *cmd;
338 cmd = amdvi_get_cmd_tail(softc);
339 KASSERT(cmd != NULL, ("Cmd is NULL"));
340 cmd->opcode = AMDVI_INVD_DTE_OPCODE;
342 amdvi_update_cmd_tail(softc);
343 #ifdef AMDVI_DEBUG_CMD
344 device_printf(softc->dev, "Invalidated DTE:0x%x\n", devid);
348 /* Invalidate IOMMU page, use for invalidation of domain. */
350 amdvi_cmd_inv_iommu_pages(struct amdvi_softc *softc, uint16_t domain_id,
351 uint64_t addr, bool guest_nested,
354 struct amdvi_cmd *cmd;
356 cmd = amdvi_get_cmd_tail(softc);
357 KASSERT(cmd != NULL, ("Cmd is NULL"));
360 cmd->opcode = AMDVI_INVD_PAGE_OPCODE;
361 cmd->word1 = domain_id;
363 * Invalidate all addresses for this domain.
366 cmd->addr |= pde ? AMDVI_INVD_PAGE_PDE : 0;
367 cmd->addr |= page ? AMDVI_INVD_PAGE_S : 0;
369 amdvi_update_cmd_tail(softc);
372 #ifdef AMDVI_ATS_ENABLE
373 /* Invalidate device IOTLB. */
375 amdvi_cmd_inv_iotlb(struct amdvi_softc *softc, uint16_t devid)
377 struct amdvi_cmd *cmd;
383 qlen = amdvi_find_ats_qlen(devid);
385 panic("AMDVI: Invalid ATS qlen(%d) for device %d.%d.%d\n",
386 qlen, RID2PCI_STR(devid));
388 cmd = amdvi_get_cmd_tail(softc);
389 KASSERT(cmd != NULL, ("Cmd is NULL"));
391 #ifdef AMDVI_DEBUG_CMD
392 device_printf(softc->dev, "Invalidate IOTLB devID 0x%x"
393 " Qlen:%d\n", devid, qlen);
395 cmd->opcode = AMDVI_INVD_IOTLB_OPCODE;
398 cmd->addr = AMDVI_INVD_IOTLB_ALL_ADDR |
400 amdvi_update_cmd_tail(softc);
404 #ifdef notyet /* For Interrupt Remap. */
406 amdvi_cmd_inv_intr_map(struct amdvi_softc *softc,
409 struct amdvi_cmd *cmd;
411 cmd = amdvi_get_cmd_tail(softc);
412 KASSERT(cmd != NULL, ("Cmd is NULL"));
413 cmd->opcode = AMDVI_INVD_INTR_OPCODE;
415 amdvi_update_cmd_tail(softc);
416 #ifdef AMDVI_DEBUG_CMD
417 device_printf(softc->dev, "Invalidate INTR map of devID 0x%x\n", devid);
422 /* Invalidate domain using INVALIDATE_IOMMU_PAGES command. */
424 amdvi_inv_domain(struct amdvi_softc *softc, uint16_t domain_id)
426 struct amdvi_cmd *cmd;
428 cmd = amdvi_get_cmd_tail(softc);
429 KASSERT(cmd != NULL, ("Cmd is NULL"));
432 * See section 3.3.3 of IOMMU spec rev 2.0, software note
433 * for invalidating domain.
435 amdvi_cmd_inv_iommu_pages(softc, domain_id, AMDVI_INVD_PAGE_ALL_ADDR,
438 #ifdef AMDVI_DEBUG_CMD
439 device_printf(softc->dev, "Invalidate domain:0x%x\n", domain_id);
445 amdvi_cmp_wait(struct amdvi_softc *softc)
447 struct amdvi_ctrl *ctrl;
448 const uint64_t VERIFY = 0xA5A5;
449 volatile uint64_t *read;
454 read = &softc->cmp_data;
456 amdvi_cmd_cmp(softc, VERIFY);
457 /* Wait for h/w to update completion data. */
458 for (i = 0; i < 100 && (*read != VERIFY); i++) {
459 DELAY(1000); /* 1 ms */
461 status = (VERIFY == softc->cmp_data) ? true : false;
463 #ifdef AMDVI_DEBUG_CMD
465 device_printf(softc->dev, "CMD completion DONE Tail:0x%x, "
466 "Head:0x%x, loop:%d.\n", ctrl->cmd_tail,
467 ctrl->cmd_head, loop);
473 amdvi_wait(struct amdvi_softc *softc)
475 struct amdvi_ctrl *ctrl;
478 KASSERT(softc, ("softc is NULL"));
481 KASSERT(ctrl != NULL, ("ctrl is NULL"));
482 /* Don't wait if h/w is not enabled. */
483 if ((ctrl->control & AMDVI_CTRL_EN) == 0)
486 for (i = 0; i < 10; i++) {
487 if (amdvi_cmp_wait(softc))
491 device_printf(softc->dev, "Error: completion failed"
492 " tail:0x%x, head:0x%x.\n",
493 ctrl->cmd_tail, ctrl->cmd_head);
494 /* Dump the last command. */
495 amdvi_dump_cmds(softc, 1);
499 amdvi_dump_cmds(struct amdvi_softc *softc, int count)
501 struct amdvi_ctrl *ctrl;
502 struct amdvi_cmd *cmd;
506 device_printf(softc->dev, "Dump last %d command(s):\n", count);
508 * If h/w is stuck in completion, it is the previous command,
509 * start dumping from previous command onward.
511 off = MOD_DEC(ctrl->cmd_head, sizeof(struct amdvi_cmd),
513 for (i = 0; off != ctrl->cmd_tail && i < count; i++) {
514 cmd = (struct amdvi_cmd *)((uint8_t *)softc->cmd + off);
515 printf(" [CMD%d, off:0x%x] opcode= 0x%x 0x%x"
516 " 0x%x 0x%lx\n", i, off, cmd->opcode,
517 cmd->word0, cmd->word1, cmd->addr);
518 off = (off + sizeof(struct amdvi_cmd)) %
519 (softc->cmd_max * sizeof(struct amdvi_cmd));
524 amdvi_init_event(struct amdvi_softc *softc)
526 struct amdvi_ctrl *ctrl;
530 softc->event_max = 1 << ctrl->event.len;
531 softc->event = malloc(sizeof(struct amdvi_event) *
532 softc->event_max, M_AMDVI, M_WAITOK | M_ZERO);
533 if ((uintptr_t)softc->event & PAGE_MASK) {
534 device_printf(softc->dev, "Event buffer not aligned on page.");
537 ctrl->event.base = vtophys(softc->event) / PAGE_SIZE;
539 /* Reset the pointers. */
547 amdvi_decode_evt_flag(uint16_t flag)
550 flag &= AMDVI_EVENT_FLAG_MASK;
551 printf(" 0x%b]\n", flag,
565 /* See section 2.5.4 of AMD IOMMU spec ver 2.62.*/
567 amdvi_decode_evt_flag_type(uint8_t type)
570 switch (AMDVI_EVENT_FLAG_TYPE(type)) {
575 printf("Master Abort\n");
578 printf("Target Abort\n");
581 printf("Data Err\n");
589 amdvi_decode_inv_dte_evt(uint16_t devid, uint16_t domid, uint64_t addr,
593 printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
596 amdvi_decode_evt_flag(flag);
600 amdvi_decode_pf_evt(uint16_t devid, uint16_t domid, uint64_t addr,
604 printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
607 amdvi_decode_evt_flag(flag);
611 amdvi_decode_dte_hwerr_evt(uint16_t devid, uint16_t domid,
612 uint64_t addr, uint16_t flag)
615 printf("\t[DEV_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
616 " Addr:0x%lx", devid, domid, addr);
617 amdvi_decode_evt_flag(flag);
618 amdvi_decode_evt_flag_type(flag);
622 amdvi_decode_page_hwerr_evt(uint16_t devid, uint16_t domid, uint64_t addr,
626 printf("\t[PAGE_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
627 " Addr:0x%lx", devid, domid, addr);
628 amdvi_decode_evt_flag(flag);
629 amdvi_decode_evt_flag_type(AMDVI_EVENT_FLAG_TYPE(flag));
633 amdvi_decode_evt(struct amdvi_event *evt)
635 struct amdvi_cmd *cmd;
637 switch (evt->opcode) {
638 case AMDVI_EVENT_INVALID_DTE:
639 amdvi_decode_inv_dte_evt(evt->devid, evt->pasid_domid,
640 evt->addr, evt->flag);
643 case AMDVI_EVENT_PFAULT:
644 amdvi_decode_pf_evt(evt->devid, evt->pasid_domid,
645 evt->addr, evt->flag);
648 case AMDVI_EVENT_DTE_HW_ERROR:
649 amdvi_decode_dte_hwerr_evt(evt->devid, evt->pasid_domid,
650 evt->addr, evt->flag);
653 case AMDVI_EVENT_PAGE_HW_ERROR:
654 amdvi_decode_page_hwerr_evt(evt->devid, evt->pasid_domid,
655 evt->addr, evt->flag);
658 case AMDVI_EVENT_ILLEGAL_CMD:
660 case AMDVI_EVENT_CMD_HW_ERROR:
661 printf("\t[%s EVT]\n", (evt->opcode == AMDVI_EVENT_ILLEGAL_CMD) ?
662 "ILLEGAL CMD" : "CMD HW ERR");
663 cmd = (struct amdvi_cmd *)PHYS_TO_DMAP(evt->addr);
664 printf("\tCMD opcode= 0x%x 0x%x 0x%x 0x%lx\n",
665 cmd->opcode, cmd->word0, cmd->word1, cmd->addr);
668 case AMDVI_EVENT_IOTLB_TIMEOUT:
669 printf("\t[IOTLB_INV_TIMEOUT devid:0x%x addr:0x%lx]\n",
670 evt->devid, evt->addr);
673 case AMDVI_EVENT_INVALID_DTE_REQ:
674 printf("\t[INV_DTE devid:0x%x addr:0x%lx type:0x%x tr:%d]\n",
675 evt->devid, evt->addr, evt->flag >> 9,
676 (evt->flag >> 8) & 1);
679 case AMDVI_EVENT_INVALID_PPR_REQ:
680 case AMDVI_EVENT_COUNTER_ZERO:
681 printf("AMD-Vi: v2 events.\n");
685 printf("Unsupported AMD-Vi event:%d\n", evt->opcode);
690 amdvi_print_events(struct amdvi_softc *softc)
692 struct amdvi_ctrl *ctrl;
693 struct amdvi_event *event;
697 size = sizeof(struct amdvi_event);
698 for (i = 0; i < softc->event_max; i++) {
699 event = &softc->event[ctrl->evt_head / size];
702 device_printf(softc->dev, "\t[Event%d: Head:0x%x Tail:0x%x]\n",
703 i, ctrl->evt_head, ctrl->evt_tail);
704 amdvi_decode_evt(event);
705 ctrl->evt_head = MOD_INC(ctrl->evt_head, size,
711 amdvi_init_dte(struct amdvi_softc *softc)
713 struct amdvi_ctrl *ctrl;
716 ctrl->dte.base = vtophys(amdvi_dte) / PAGE_SIZE;
717 ctrl->dte.size = 0x1FF; /* 2MB device table. */
723 * Not all capabilities of IOMMU are available in ACPI IVHD flag
724 * or EFR entry, read directly from device.
727 amdvi_print_pci_cap(device_t dev)
729 struct amdvi_softc *softc;
733 softc = device_get_softc(dev);
734 off = softc->cap_off;
737 * Section 3.7.1 of IOMMU sepc rev 2.0.
738 * Read capability from device.
740 cap = amdvi_pci_read(softc, off);
742 /* Make sure capability type[18:16] is 3. */
743 KASSERT((((cap >> 16) & 0x7) == 0x3),
744 ("Not a IOMMU capability 0x%x@0x%x", cap, off));
746 softc->pci_cap = cap >> 24;
747 device_printf(softc->dev, "PCI cap 0x%x@0x%x feature:%b\n",
748 cap, off, softc->pci_cap,
749 "\20\1IOTLB\2HT\3NPCache\4EFR\5CapExt");
755 amdvi_event_intr(void *arg)
757 struct amdvi_softc *softc;
758 struct amdvi_ctrl *ctrl;
760 softc = (struct amdvi_softc *)arg;
762 device_printf(softc->dev, "EVT INTR %ld Status:0x%x"
763 " EVT Head:0x%x Tail:0x%x]\n", softc->event_intr_cnt++,
764 ctrl->status, ctrl->evt_head, ctrl->evt_tail);
765 printf(" [CMD Total 0x%lx] Tail:0x%x, Head:0x%x.\n",
766 softc->total_cmd, ctrl->cmd_tail, ctrl->cmd_head);
768 amdvi_print_events(softc);
769 ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR;
773 amdvi_free_evt_intr_res(device_t dev)
776 struct amdvi_softc *softc;
778 softc = device_get_softc(dev);
779 if (softc->event_tag != NULL) {
780 bus_teardown_intr(dev, softc->event_res, softc->event_tag);
782 if (softc->event_res != NULL) {
783 bus_release_resource(dev, SYS_RES_IRQ, softc->event_rid,
786 bus_delete_resource(dev, SYS_RES_IRQ, softc->event_rid);
787 PCIB_RELEASE_MSI(device_get_parent(device_get_parent(dev)),
788 dev, 1, &softc->event_irq);
792 amdvi_alloc_intr_resources(struct amdvi_softc *softc)
794 struct amdvi_ctrl *ctrl;
802 pcib = device_get_parent(device_get_parent(dev));
803 mmio_dev = pci_find_bsf(PCI_RID2BUS(softc->pci_rid),
804 PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid));
805 if (device_is_attached(mmio_dev)) {
807 "warning: IOMMU device is claimed by another driver %s\n",
808 device_get_driver(mmio_dev)->name);
811 softc->event_irq = -1;
812 softc->event_rid = 0;
815 * Section 3.7.1 of IOMMU rev 2.0. With MSI, there is only one
816 * interrupt. XXX: Enable MSI/X support.
818 err = PCIB_ALLOC_MSI(pcib, dev, 1, 1, &softc->event_irq);
821 "Couldn't find event MSI IRQ resource.\n");
825 err = bus_set_resource(dev, SYS_RES_IRQ, softc->event_rid,
826 softc->event_irq, 1);
828 device_printf(dev, "Couldn't set event MSI resource.\n");
832 softc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
833 &softc->event_rid, RF_ACTIVE);
834 if (!softc->event_res) {
836 "Unable to allocate event INTR resource.\n");
840 if (bus_setup_intr(dev, softc->event_res,
841 INTR_TYPE_MISC | INTR_MPSAFE, NULL, amdvi_event_intr,
842 softc, &softc->event_tag)) {
843 device_printf(dev, "Fail to setup event intr\n");
844 bus_release_resource(softc->dev, SYS_RES_IRQ,
845 softc->event_rid, softc->event_res);
846 softc->event_res = NULL;
850 bus_describe_intr(dev, softc->event_res, softc->event_tag,
853 err = PCIB_MAP_MSI(pcib, dev, softc->event_irq, &msi_addr,
857 "Event interrupt config failed, err=%d.\n",
859 amdvi_free_evt_intr_res(softc->dev);
863 /* Clear interrupt status bits. */
865 ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR;
867 /* Now enable MSI interrupt. */
868 pci_enable_msi(mmio_dev, msi_addr, msi_data);
874 amdvi_print_dev_cap(struct amdvi_softc *softc)
876 struct ivhd_dev_cfg *cfg;
879 cfg = softc->dev_cfg;
880 for (i = 0; i < softc->dev_cfg_cnt; i++) {
881 device_printf(softc->dev, "device [0x%x - 0x%x]"
882 "config:%b%s\n", cfg->start_id, cfg->end_id,
884 "\020\001INIT\002ExtInt\003NMI"
885 "\007LINT0\008LINT1",
886 cfg->enable_ats ? "ATS enabled" : "");
892 amdvi_handle_sysctl(SYSCTL_HANDLER_ARGS)
894 struct amdvi_softc *softc;
895 int result, type, error = 0;
897 softc = (struct amdvi_softc *)arg1;
902 result = softc->ctrl->cmd_head;
903 error = sysctl_handle_int(oidp, &result, 0,
907 result = softc->ctrl->cmd_tail;
908 error = sysctl_handle_int(oidp, &result, 0,
912 result = softc->ctrl->evt_head;
913 error = sysctl_handle_int(oidp, &result, 0,
917 result = softc->ctrl->evt_tail;
918 error = sysctl_handle_int(oidp, &result, 0,
923 device_printf(softc->dev, "Unknown sysctl:%d\n", type);
930 amdvi_add_sysctl(struct amdvi_softc *softc)
932 struct sysctl_oid_list *child;
933 struct sysctl_ctx_list *ctx;
937 ctx = device_get_sysctl_ctx(dev);
938 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
940 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "event_intr_count", CTLFLAG_RD,
941 &softc->event_intr_cnt, "Event interrupt count");
942 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "command_count", CTLFLAG_RD,
943 &softc->total_cmd, "Command submitted count");
944 SYSCTL_ADD_U16(ctx, child, OID_AUTO, "pci_rid", CTLFLAG_RD,
945 &softc->pci_rid, 0, "IOMMU RID");
946 SYSCTL_ADD_U16(ctx, child, OID_AUTO, "start_dev_rid", CTLFLAG_RD,
947 &softc->start_dev_rid, 0, "Start of device under this IOMMU");
948 SYSCTL_ADD_U16(ctx, child, OID_AUTO, "end_dev_rid", CTLFLAG_RD,
949 &softc->end_dev_rid, 0, "End of device under this IOMMU");
950 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_head",
951 CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 0,
952 amdvi_handle_sysctl, "IU", "Command head");
953 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_tail",
954 CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 1,
955 amdvi_handle_sysctl, "IU", "Command tail");
956 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_head",
957 CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 2,
958 amdvi_handle_sysctl, "IU", "Command head");
959 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_tail",
960 CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 3,
961 amdvi_handle_sysctl, "IU", "Command tail");
965 amdvi_setup_hw(struct amdvi_softc *softc)
972 amdvi_hw_enable_iotlb(softc);
974 amdvi_print_dev_cap(softc);
976 if ((status = amdvi_print_pci_cap(dev)) != 0) {
977 device_printf(dev, "PCI capability.\n");
980 if ((status = amdvi_init_cmd(softc)) != 0) {
981 device_printf(dev, "Couldn't configure command buffer.\n");
984 if ((status = amdvi_init_event(softc)) != 0) {
985 device_printf(dev, "Couldn't configure event buffer.\n");
988 if ((status = amdvi_init_dte(softc)) != 0) {
989 device_printf(dev, "Couldn't configure device table.\n");
992 if ((status = amdvi_alloc_intr_resources(softc)) != 0) {
995 amdvi_add_sysctl(softc);
1000 amdvi_teardown_hw(struct amdvi_softc *softc)
1007 * Called after disable, h/w is stopped by now, free all the resources.
1009 amdvi_free_evt_intr_res(dev);
1012 free(softc->cmd, M_AMDVI);
1015 free(softc->event, M_AMDVI);
1020 /*********** bhyve interfaces *********************/
1027 if (!amdvi_enable_user && ivhd_count) {
1028 printf("bhyve: Found %d AMD-Vi/IOMMU device(s), "
1029 "use hw.vmm.amdvi.enable=1 to enable pass-through.\n",
1043 amdvi_domainId(void)
1047 * If we hit maximum domain limit, rollover leaving host
1049 * XXX: make sure that this domain is not used.
1051 if (amdvi_dom_id == AMDVI_MAX_DOMAIN)
1054 return ((uint16_t)amdvi_dom_id++);
1058 amdvi_do_inv_domain(uint16_t domain_id, bool create)
1060 struct amdvi_softc *softc;
1063 for (i = 0; i < ivhd_count; i++) {
1064 softc = device_get_softc(ivhd_devs[i]);
1065 KASSERT(softc, ("softc is NULL"));
1067 * If not present pages are cached, invalidate page after
1071 if (create && ((softc->pci_cap & AMDVI_PCI_CAP_NPCACHE) == 0))
1074 amdvi_inv_domain(softc, domain_id);
1080 amdvi_create_domain(vm_paddr_t maxaddr)
1082 struct amdvi_domain *dom;
1084 dom = malloc(sizeof(struct amdvi_domain), M_AMDVI, M_ZERO | M_WAITOK);
1085 dom->id = amdvi_domainId();
1086 //dom->maxaddr = maxaddr;
1087 #ifdef AMDVI_DEBUG_CMD
1088 printf("Created domain #%d\n", dom->id);
1091 * Host domain(#0) don't create translation table.
1093 if (dom->id || amdvi_host_ptp)
1094 dom->ptp = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
1096 dom->ptp_level = amdvi_ptp_level;
1098 amdvi_do_inv_domain(dom->id, true);
1099 SLIST_INSERT_HEAD(&dom_head, dom, next);
1105 amdvi_free_ptp(uint64_t *ptp, int level)
1112 for (i = 0; i < NPTEPG ; i++) {
1113 if ((ptp[i] & AMDVI_PT_PRESENT) == 0)
1115 /* XXX: Add super-page or PTE mapping > 4KB. */
1117 /* Super-page mapping. */
1118 if (AMDVI_PD_SUPER(ptp[i]))
1122 amdvi_free_ptp((uint64_t *)PHYS_TO_DMAP(ptp[i]
1123 & AMDVI_PT_MASK), level - 1);
1131 amdvi_destroy_domain(void *arg)
1133 struct amdvi_domain *domain;
1135 domain = (struct amdvi_domain *)arg;
1136 KASSERT(domain, ("domain is NULL"));
1137 #ifdef AMDVI_DEBUG_CMD
1138 printf("Destroying domain %d\n", domain->id);
1141 amdvi_free_ptp(domain->ptp, domain->ptp_level);
1143 amdvi_do_inv_domain(domain->id, false);
1144 SLIST_REMOVE(&dom_head, domain, amdvi_domain, next);
1145 free(domain, M_AMDVI);
1149 amdvi_set_pt(uint64_t *pt, int level, vm_paddr_t gpa,
1150 vm_paddr_t hpa, uint64_t pg_size, bool create)
1154 const int PT_SHIFT = 9;
1155 const int PT_INDEX_MASK = (1 << PT_SHIFT) - 1; /* Based on PT_SHIFT */
1160 if (hpa & (pg_size - 1)) {
1161 printf("HPA is not size aligned.\n");
1164 if (gpa & (pg_size - 1)) {
1165 printf("HPA is not size aligned.\n");
1169 while ((shift > PAGE_SHIFT) && (pg_size < (1UL << shift))) {
1170 index = (gpa >> shift) & PT_INDEX_MASK;
1172 if ((pt[index] == 0) && create) {
1173 page = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
1175 pt[index] = pa | AMDVI_PT_PRESENT | AMDVI_PT_RW |
1176 ((level - 1) << AMDVI_PD_LEVEL_SHIFT);
1178 #ifdef AMDVI_DEBUG_PTE
1179 if ((gpa % 0x1000000) == 0)
1180 printf("[level%d, shift = %d]PTE:0x%lx\n",
1181 level, shift, pt[index]);
1183 #define PTE2PA(x) ((uint64_t)(x) & AMDVI_PT_MASK)
1184 pa = PTE2PA(pt[index]);
1185 pt = (uint64_t *)PHYS_TO_DMAP(pa);
1191 index = (gpa >> shift) & PT_INDEX_MASK;
1194 pt[index] = hpa | AMDVI_PT_RW | AMDVI_PT_PRESENT;
1198 #ifdef AMDVI_DEBUG_PTE
1199 if ((gpa % 0x1000000) == 0)
1200 printf("[Last level%d, shift = %d]PTE:0x%lx\n",
1201 level, shift, pt[index]);
1203 return (1ULL << shift);
1207 amdvi_update_mapping(struct amdvi_domain *domain, vm_paddr_t gpa,
1208 vm_paddr_t hpa, uint64_t size, bool create)
1210 uint64_t mapped, *ptp, len;
1213 KASSERT(domain, ("domain is NULL"));
1214 level = domain->ptp_level;
1215 KASSERT(level, ("Page table level is 0"));
1218 KASSERT(ptp, ("PTP is NULL"));
1220 while (mapped < size) {
1221 len = amdvi_set_pt(ptp, level, gpa + mapped, hpa + mapped,
1224 printf("Error: Couldn't map HPA:0x%lx GPA:0x%lx\n",
1235 amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
1238 struct amdvi_domain *domain;
1240 domain = (struct amdvi_domain *)arg;
1242 if (domain->id && !domain->ptp) {
1243 printf("ptp is NULL");
1248 * If host domain is created w/o page table, skip IOMMU page
1252 return (amdvi_update_mapping(domain, gpa, hpa, len, true));
1258 amdvi_destroy_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
1260 struct amdvi_domain *domain;
1262 domain = (struct amdvi_domain *)arg;
1264 * If host domain is created w/o page table, skip IOMMU page
1268 return (amdvi_update_mapping(domain, gpa, 0, len, false));
1273 static struct amdvi_softc *
1274 amdvi_find_iommu(uint16_t devid)
1276 struct amdvi_softc *softc;
1279 for (i = 0; i < ivhd_count; i++) {
1280 softc = device_get_softc(ivhd_devs[i]);
1281 if ((devid >= softc->start_dev_rid) &&
1282 (devid <= softc->end_dev_rid))
1287 * XXX: BIOS bug, device not in IVRS table, assume its from first IOMMU.
1289 printf("BIOS bug device(%d.%d.%d) doesn't have IVHD entry.\n",
1290 RID2PCI_STR(devid));
1292 return (device_get_softc(ivhd_devs[0]));
1296 * Set-up device table entry.
1297 * IOMMU spec Rev 2.0, section 3.2.2.2, some of the fields must
1298 * be set concurrently, e.g. read and write bits.
1301 amdvi_set_dte(struct amdvi_domain *domain, uint16_t devid, bool enable)
1303 struct amdvi_softc *softc;
1304 struct amdvi_dte* temp;
1306 KASSERT(domain, ("domain is NULL for pci_rid:0x%x\n", devid));
1308 softc = amdvi_find_iommu(devid);
1309 KASSERT(softc, ("softc is NULL for pci_rid:0x%x\n", devid));
1311 temp = &amdvi_dte[devid];
1313 #ifdef AMDVI_ATS_ENABLE
1314 /* If IOMMU and device support IOTLB, enable it. */
1315 if (amdvi_dev_support_iotlb(softc, devid) && softc->iotlb)
1316 temp->iotlb_enable = 1;
1319 /* Avoid duplicate I/O faults. */
1320 temp->sup_second_io_fault = 1;
1321 temp->sup_all_io_fault = amdvi_disable_io_fault;
1324 temp->domain_id = domain->id;
1328 temp->pt_base = vtophys(domain->ptp) >> 12;
1329 temp->pt_level = amdvi_ptp_level;
1332 * XXX: Page table valid[TV] bit must be set even if host domain
1333 * page tables are not enabled.
1336 temp->read_allow = 1;
1337 temp->write_allow = 1;
1342 amdvi_inv_device(uint16_t devid)
1344 struct amdvi_softc *softc;
1346 softc = amdvi_find_iommu(devid);
1347 KASSERT(softc, ("softc is NULL"));
1349 amdvi_cmd_inv_dte(softc, devid);
1350 #ifdef AMDVI_ATS_ENABLE
1351 if (amdvi_dev_support_iotlb(softc, devid))
1352 amdvi_cmd_inv_iotlb(softc, devid);
1358 amdvi_add_device(void *arg, uint16_t devid)
1360 struct amdvi_domain *domain;
1362 domain = (struct amdvi_domain *)arg;
1363 KASSERT(domain != NULL, ("domain is NULL"));
1364 #ifdef AMDVI_DEBUG_CMD
1365 printf("Assigning device(%d.%d.%d) to domain:%d\n",
1366 RID2PCI_STR(devid), domain->id);
1368 amdvi_set_dte(domain, devid, true);
1369 amdvi_inv_device(devid);
1373 amdvi_remove_device(void *arg, uint16_t devid)
1375 struct amdvi_domain *domain;
1377 domain = (struct amdvi_domain *)arg;
1378 #ifdef AMDVI_DEBUG_CMD
1379 printf("Remove device(0x%x) from domain:%d\n",
1382 amdvi_set_dte(domain, devid, false);
1383 amdvi_inv_device(devid);
1389 struct amdvi_ctrl *ctrl;
1390 struct amdvi_softc *softc;
1394 for (i = 0; i < ivhd_count; i++) {
1395 softc = device_get_softc(ivhd_devs[i]);
1396 KASSERT(softc, ("softc is NULL\n"));
1398 KASSERT(ctrl, ("ctrl is NULL\n"));
1400 val = ( AMDVI_CTRL_EN |
1403 AMDVI_CTRL_ELOGINT |
1404 AMDVI_CTRL_INV_TO_1S);
1406 if (softc->ivhd_flag & IVHD_FLAG_COH)
1407 val |= AMDVI_CTRL_COH;
1408 if (softc->ivhd_flag & IVHD_FLAG_HTT)
1409 val |= AMDVI_CTRL_HTT;
1410 if (softc->ivhd_flag & IVHD_FLAG_RPPW)
1411 val |= AMDVI_CTRL_RPPW;
1412 if (softc->ivhd_flag & IVHD_FLAG_PPW)
1413 val |= AMDVI_CTRL_PPW;
1414 if (softc->ivhd_flag & IVHD_FLAG_ISOC)
1415 val |= AMDVI_CTRL_ISOC;
1417 ctrl->control = val;
1424 struct amdvi_ctrl *ctrl;
1425 struct amdvi_softc *softc;
1428 for (i = 0; i < ivhd_count; i++) {
1429 softc = device_get_softc(ivhd_devs[i]);
1430 KASSERT(softc, ("softc is NULL\n"));
1432 KASSERT(ctrl, ("ctrl is NULL\n"));
1439 amdvi_inv_tlb(void *arg)
1441 struct amdvi_domain *domain;
1443 domain = (struct amdvi_domain *)arg;
1444 KASSERT(domain, ("domain is NULL"));
1445 amdvi_do_inv_domain(domain->id, false);
1448 struct iommu_ops iommu_ops_amd = {
1453 amdvi_create_domain,
1454 amdvi_destroy_domain,
1455 amdvi_create_mapping,
1456 amdvi_destroy_mapping,
1458 amdvi_remove_device,