2 * Copyright (c) 2016, Anish Gupta (anish@freebsd.org)
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/param.h>
31 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/module.h>
35 #include <sys/malloc.h>
39 #include <sys/sysctl.h>
44 #include <dev/pci/pcivar.h>
45 #include <dev/pci/pcireg.h>
47 #include <machine/resource.h>
48 #include <machine/vmm.h>
49 #include <machine/pmap.h>
50 #include <machine/vmparam.h>
51 #include <machine/pci_cfgreg.h>
56 #include "amdvi_priv.h"
59 SYSCTL_NODE(_hw_vmm, OID_AUTO, amdvi, CTLFLAG_RW, NULL, NULL);
61 #define MOD_INC(a, s, m) (((a) + (s)) % ((m) * (s)))
62 #define MOD_DEC(a, s, m) (((a) - (s)) % ((m) * (s)))
64 /* Print RID or device ID in PCI string format. */
65 #define RID2PCI_STR(d) PCI_RID2BUS(d), PCI_RID2SLOT(d), PCI_RID2FUNC(d)
67 static void amdvi_dump_cmds(struct amdvi_softc *softc);
68 static void amdvi_print_dev_cap(struct amdvi_softc *softc);
70 MALLOC_DEFINE(M_AMDVI, "amdvi", "amdvi");
72 extern device_t *ivhd_devs;
74 extern int ivhd_count;
75 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, count, CTLFLAG_RDTUN, &ivhd_count,
78 static int amdvi_enable_user = 0;
79 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, enable, CTLFLAG_RDTUN,
80 &amdvi_enable_user, 0, NULL);
81 TUNABLE_INT("hw.vmm.amdvi_enable", &amdvi_enable_user);
83 #ifdef AMDVI_ATS_ENABLE
84 /* XXX: ATS is not tested. */
85 static int amdvi_enable_iotlb = 1;
86 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, iotlb_enabled, CTLFLAG_RDTUN,
87 &amdvi_enable_iotlb, 0, NULL);
88 TUNABLE_INT("hw.vmm.enable_iotlb", &amdvi_enable_iotlb);
91 static int amdvi_host_ptp = 1; /* Use page tables for host. */
92 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, host_ptp, CTLFLAG_RDTUN,
93 &amdvi_host_ptp, 0, NULL);
94 TUNABLE_INT("hw.vmm.amdvi.host_ptp", &amdvi_host_ptp);
96 /* Page table level used <= supported by h/w[v1=7]. */
97 static int amdvi_ptp_level = 4;
98 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, ptp_level, CTLFLAG_RDTUN,
99 &amdvi_ptp_level, 0, NULL);
100 TUNABLE_INT("hw.vmm.amdvi.ptp_level", &amdvi_ptp_level);
102 /* Disable fault event reporting. */
103 static int amdvi_disable_io_fault = 0;
104 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, disable_io_fault, CTLFLAG_RDTUN,
105 &amdvi_disable_io_fault, 0, NULL);
106 TUNABLE_INT("hw.vmm.amdvi.disable_io_fault", &amdvi_disable_io_fault);
108 static uint32_t amdvi_dom_id = 0; /* 0 is reserved for host. */
109 SYSCTL_UINT(_hw_vmm_amdvi, OID_AUTO, domain_id, CTLFLAG_RD,
110 &amdvi_dom_id, 0, NULL);
112 * Device table entry.
113 * Bus(256) x Dev(32) x Fun(8) x DTE(256 bits or 32 bytes).
114 * = 256 * 2 * PAGE_SIZE.
116 static struct amdvi_dte amdvi_dte[PCI_NUM_DEV_MAX] __aligned(PAGE_SIZE);
117 CTASSERT(PCI_NUM_DEV_MAX == 0x10000);
118 CTASSERT(sizeof(amdvi_dte) == 0x200000);
120 static SLIST_HEAD (, amdvi_domain) dom_head;
123 amdvi_pci_write(struct amdvi_softc *softc, int off, uint32_t data)
126 pci_cfgregwrite(PCI_RID2BUS(softc->pci_rid),
127 PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid),
131 static inline uint32_t
132 amdvi_pci_read(struct amdvi_softc *softc, int off)
135 return (pci_cfgregread(PCI_RID2BUS(softc->pci_rid),
136 PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid),
141 amdvi_find_pci_cap(struct amdvi_softc *softc, uint8_t capability, int *off)
146 read = amdvi_pci_read(softc, PCIR_COMMAND);
147 if (((read >> 16) & PCIM_STATUS_CAPPRESENT) == 0)
150 /* Read the starting of capability pointer. */
151 read = amdvi_pci_read(softc, PCIR_CAP_PTR);
155 read = amdvi_pci_read(softc, ptr);
156 if ((read & 0xFF) == capability) {
160 ptr = (read >> 8) & 0xFF;
166 #ifdef AMDVI_ATS_ENABLE
167 /* XXX: Should be in pci.c */
169 * Check if device has ATS capability and its enabled.
170 * If ATS is absent or disabled, return (-1), otherwise ATS
174 amdvi_find_ats_qlen(uint16_t devid)
180 dev = pci_find_bsf(PCI_RID2BUS(devid), PCI_RID2SLOT(devid),
181 PCI_RID2FUNC(devid));
186 #define PCIM_ATS_EN BIT(31)
188 if (pci_find_extcap(dev, PCIZ_ATS, &off) == 0) {
189 cap = pci_read_config(dev, off + 4, 4);
191 qlen = qlen ? qlen : 32;
192 printf("AMD-Vi: PCI device %d.%d.%d ATS %s qlen=%d\n",
194 (cap & PCIM_ATS_EN) ? "enabled" : "Disabled",
196 qlen = (cap & PCIM_ATS_EN) ? qlen : -1;
203 * Check if an endpoint device support device IOTLB or ATS.
206 amdvi_dev_support_iotlb(struct amdvi_softc *softc, uint16_t devid)
208 struct ivhd_dev_cfg *cfg;
210 bool pci_ats, ivhd_ats;
212 qlen = amdvi_find_ats_qlen(devid);
216 KASSERT(softc, ("softc is NULL"));
217 cfg = softc->dev_cfg;
220 for (i = 0; i < softc->dev_cfg_cnt; i++) {
221 if ((cfg->start_id <= devid) && (cfg->end_id >= devid)) {
222 ivhd_ats = cfg->enable_ats;
228 pci_ats = (qlen < 0) ? false : true;
229 if (pci_ats != ivhd_ats)
230 device_printf(softc->dev,
231 "BIOS bug: mismatch in ATS setting for %d.%d.%d,"
232 "ATS inv qlen = %d\n", RID2PCI_STR(devid), qlen);
234 /* Ignore IVRS setting and respect PCI setting. */
239 /* Enable IOTLB support for IOMMU if its supported. */
241 amdvi_hw_enable_iotlb(struct amdvi_softc *softc)
243 #ifndef AMDVI_ATS_ENABLE
244 softc->iotlb = false;
248 supported = (softc->ivhd_flag & IVHD_FLAG_IOTLB) ? true : false;
250 if (softc->pci_cap & AMDVI_PCI_CAP_IOTLB) {
252 device_printf(softc->dev, "IOTLB disabled by BIOS.\n");
254 if (supported && !amdvi_enable_iotlb) {
255 device_printf(softc->dev, "IOTLB disabled by user.\n");
261 softc->iotlb = supported;
267 amdvi_init_cmd(struct amdvi_softc *softc)
269 struct amdvi_ctrl *ctrl = softc->ctrl;
271 ctrl->cmd.len = 8; /* Use 256 command buffer entries. */
272 softc->cmd_max = 1 << ctrl->cmd.len;
274 softc->cmd = malloc(sizeof(struct amdvi_cmd) *
275 softc->cmd_max, M_AMDVI, M_WAITOK | M_ZERO);
277 if ((uintptr_t)softc->cmd & PAGE_MASK)
278 panic("AMDVi: Command buffer not aligned on page boundary.");
280 ctrl->cmd.base = vtophys(softc->cmd) / PAGE_SIZE;
282 * XXX: Reset the h/w pointers in case IOMMU is restarting,
283 * h/w doesn't clear these pointers based on empirical data.
292 * Note: Update tail pointer after we have written the command since tail
293 * pointer update cause h/w to execute new commands, see section 3.3
294 * of AMD IOMMU spec ver 2.0.
296 /* Get the command tail pointer w/o updating it. */
297 static struct amdvi_cmd *
298 amdvi_get_cmd_tail(struct amdvi_softc *softc)
300 struct amdvi_ctrl *ctrl;
301 struct amdvi_cmd *tail;
303 KASSERT(softc, ("softc is NULL"));
304 KASSERT(softc->cmd != NULL, ("cmd is NULL"));
307 KASSERT(ctrl != NULL, ("ctrl is NULL"));
309 tail = (struct amdvi_cmd *)((uint8_t *)softc->cmd +
316 * Update the command tail pointer which will start command execution.
319 amdvi_update_cmd_tail(struct amdvi_softc *softc)
321 struct amdvi_ctrl *ctrl;
324 size = sizeof(struct amdvi_cmd);
325 KASSERT(softc->cmd != NULL, ("cmd is NULL"));
328 KASSERT(ctrl != NULL, ("ctrl is NULL"));
330 ctrl->cmd_tail = MOD_INC(ctrl->cmd_tail, size, softc->cmd_max);
333 #ifdef AMDVI_DEBUG_CMD
334 device_printf(softc->dev, "cmd_tail: %s Tail:0x%x, Head:0x%x.\n",
342 * Various commands supported by IOMMU.
345 /* Completion wait command. */
347 amdvi_cmd_cmp(struct amdvi_softc *softc, const uint64_t data)
349 struct amdvi_cmd *cmd;
352 cmd = amdvi_get_cmd_tail(softc);
353 KASSERT(cmd != NULL, ("Cmd is NULL"));
355 pa = vtophys(&softc->cmp_data);
356 cmd->opcode = AMDVI_CMP_WAIT_OPCODE;
357 cmd->word0 = (pa & 0xFFFFFFF8) |
358 (AMDVI_CMP_WAIT_STORE);
359 //(AMDVI_CMP_WAIT_FLUSH | AMDVI_CMP_WAIT_STORE);
360 cmd->word1 = (pa >> 32) & 0xFFFFF;
363 amdvi_update_cmd_tail(softc);
366 /* Invalidate device table entry. */
368 amdvi_cmd_inv_dte(struct amdvi_softc *softc, uint16_t devid)
370 struct amdvi_cmd *cmd;
372 cmd = amdvi_get_cmd_tail(softc);
373 KASSERT(cmd != NULL, ("Cmd is NULL"));
374 cmd->opcode = AMDVI_INVD_DTE_OPCODE;
376 amdvi_update_cmd_tail(softc);
377 #ifdef AMDVI_DEBUG_CMD
378 device_printf(softc->dev, "Invalidated DTE:0x%x\n", devid);
382 /* Invalidate IOMMU page, use for invalidation of domain. */
384 amdvi_cmd_inv_iommu_pages(struct amdvi_softc *softc, uint16_t domain_id,
385 uint64_t addr, bool guest_nested,
388 struct amdvi_cmd *cmd;
390 cmd = amdvi_get_cmd_tail(softc);
391 KASSERT(cmd != NULL, ("Cmd is NULL"));
394 cmd->opcode = AMDVI_INVD_PAGE_OPCODE;
395 cmd->word1 = domain_id;
397 * Invalidate all addresses for this domain.
400 cmd->addr |= pde ? AMDVI_INVD_PAGE_PDE : 0;
401 cmd->addr |= page ? AMDVI_INVD_PAGE_S : 0;
403 amdvi_update_cmd_tail(softc);
406 #ifdef AMDVI_ATS_ENABLE
407 /* Invalidate device IOTLB. */
409 amdvi_cmd_inv_iotlb(struct amdvi_softc *softc, uint16_t devid)
411 struct amdvi_cmd *cmd;
417 qlen = amdvi_find_ats_qlen(devid);
419 panic("AMDVI: Invalid ATS qlen(%d) for device %d.%d.%d\n",
420 qlen, RID2PCI_STR(devid));
422 cmd = amdvi_get_cmd_tail(softc);
423 KASSERT(cmd != NULL, ("Cmd is NULL"));
425 #ifdef AMDVI_DEBUG_CMD
426 device_printf(softc->dev, "Invalidate IOTLB devID 0x%x"
427 " Qlen:%d\n", devid, qlen);
429 cmd->opcode = AMDVI_INVD_IOTLB_OPCODE;
432 cmd->addr = AMDVI_INVD_IOTLB_ALL_ADDR |
434 amdvi_update_cmd_tail(softc);
438 #ifdef notyet /* For Interrupt Remap. */
440 amdvi_cmd_inv_intr_map(struct amdvi_softc *softc,
443 struct amdvi_cmd *cmd;
445 cmd = amdvi_get_cmd_tail(softc);
446 KASSERT(cmd != NULL, ("Cmd is NULL"));
447 cmd->opcode = AMDVI_INVD_INTR_OPCODE;
449 amdvi_update_cmd_tail(softc);
450 #ifdef AMDVI_DEBUG_CMD
451 device_printf(softc->dev, "Invalidate INTR map of devID 0x%x\n", devid);
456 /* Invalidate domain using INVALIDATE_IOMMU_PAGES command. */
458 amdvi_inv_domain(struct amdvi_softc *softc, uint16_t domain_id)
460 struct amdvi_cmd *cmd;
462 cmd = amdvi_get_cmd_tail(softc);
463 KASSERT(cmd != NULL, ("Cmd is NULL"));
466 * See section 3.3.3 of IOMMU spec rev 2.0, software note
467 * for invalidating domain.
469 amdvi_cmd_inv_iommu_pages(softc, domain_id, AMDVI_INVD_PAGE_ALL_ADDR,
472 #ifdef AMDVI_DEBUG_CMD
473 device_printf(softc->dev, "Invalidate domain:0x%x\n", domain_id);
479 amdvi_cmp_wait(struct amdvi_softc *softc)
481 struct amdvi_ctrl *ctrl;
482 const uint64_t VERIFY = 0xA5A5;
483 volatile uint64_t *read;
488 read = &softc->cmp_data;
490 amdvi_cmd_cmp(softc, VERIFY);
491 /* Wait for h/w to update completion data. */
492 for (i = 0; i < 100 && (*read != VERIFY); i++) {
493 DELAY(1000); /* 1 ms */
495 status = (VERIFY == softc->cmp_data) ? true : false;
497 #ifdef AMDVI_DEBUG_CMD
499 device_printf(softc->dev, "CMD completion DONE Tail:0x%x, "
500 "Head:0x%x, loop:%d.\n", ctrl->cmd_tail,
501 ctrl->cmd_head, loop);
507 amdvi_wait(struct amdvi_softc *softc)
509 struct amdvi_ctrl *ctrl;
512 KASSERT(softc, ("softc is NULL"));
515 KASSERT(ctrl != NULL, ("ctrl is NULL"));
516 /* Don't wait if h/w is not enabled. */
517 if ((ctrl->control & AMDVI_CTRL_EN) == 0)
520 for (i = 0; i < 10; i++) {
521 if (amdvi_cmp_wait(softc))
525 device_printf(softc->dev, "Error: completion failed"
526 " tail:0x%x, head:0x%x.\n",
527 ctrl->cmd_tail, ctrl->cmd_head);
528 amdvi_dump_cmds(softc);
532 amdvi_dump_cmds(struct amdvi_softc *softc)
534 struct amdvi_ctrl *ctrl;
535 struct amdvi_cmd *cmd;
539 device_printf(softc->dev, "Dump all the commands:\n");
541 * If h/w is stuck in completion, it is the previous command,
542 * start dumping from previous command onward.
544 off = MOD_DEC(ctrl->cmd_head, sizeof(struct amdvi_cmd),
546 for (i = 0; off != ctrl->cmd_tail &&
547 i < softc->cmd_max; i++) {
548 cmd = (struct amdvi_cmd *)((uint8_t *)softc->cmd + off);
549 printf(" [CMD%d, off:0x%x] opcode= 0x%x 0x%x"
550 " 0x%x 0x%lx\n", i, off, cmd->opcode,
551 cmd->word0, cmd->word1, cmd->addr);
552 off = (off + sizeof(struct amdvi_cmd)) %
553 (softc->cmd_max * sizeof(struct amdvi_cmd));
558 amdvi_init_event(struct amdvi_softc *softc)
560 struct amdvi_ctrl *ctrl;
564 softc->event_max = 1 << ctrl->event.len;
565 softc->event = malloc(sizeof(struct amdvi_event) *
566 softc->event_max, M_AMDVI, M_WAITOK | M_ZERO);
567 if ((uintptr_t)softc->event & PAGE_MASK) {
568 device_printf(softc->dev, "Event buffer not aligned on page.");
571 ctrl->event.base = vtophys(softc->event) / PAGE_SIZE;
573 /* Reset the pointers. */
581 amdvi_decode_evt_flag(uint16_t flag)
584 flag &= AMDVI_EVENT_FLAG_MASK;
585 printf("0x%b]\n", flag,
599 /* See section 2.5.4 of AMD IOMMU spec ver 2.62.*/
601 amdvi_decode_evt_flag_type(uint8_t type)
604 switch (AMDVI_EVENT_FLAG_TYPE(type)) {
609 printf("Master Abort\n");
612 printf("Target Abort\n");
615 printf("Data Err\n");
623 amdvi_decode_inv_dte_evt(uint16_t devid, uint16_t domid, uint64_t addr,
627 printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
630 amdvi_decode_evt_flag(flag);
634 amdvi_decode_pf_evt(uint16_t devid, uint16_t domid, uint64_t addr,
638 printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
641 amdvi_decode_evt_flag(flag);
645 amdvi_decode_dte_hwerr_evt(uint16_t devid, uint16_t domid,
646 uint64_t addr, uint16_t flag)
649 printf("\t[DEV_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
650 " Addr:0x%lx", devid, domid, addr);
651 amdvi_decode_evt_flag(flag);
652 amdvi_decode_evt_flag_type(flag);
656 amdvi_decode_page_hwerr_evt(uint16_t devid, uint16_t domid, uint64_t addr,
660 printf("\t[PAGE_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
661 " Addr:0x%lx", devid, domid, addr);
662 amdvi_decode_evt_flag(flag);
663 amdvi_decode_evt_flag_type(AMDVI_EVENT_FLAG_TYPE(flag));
667 amdvi_decode_evt(struct amdvi_event *evt)
669 struct amdvi_cmd *cmd;
671 switch (evt->opcode) {
672 case AMDVI_EVENT_INVALID_DTE:
673 amdvi_decode_inv_dte_evt(evt->devid, evt->pasid_domid,
674 evt->addr, evt->flag);
677 case AMDVI_EVENT_PFAULT:
678 amdvi_decode_pf_evt(evt->devid, evt->pasid_domid,
679 evt->addr, evt->flag);
682 case AMDVI_EVENT_DTE_HW_ERROR:
683 amdvi_decode_dte_hwerr_evt(evt->devid, evt->pasid_domid,
684 evt->addr, evt->flag);
687 case AMDVI_EVENT_PAGE_HW_ERROR:
688 amdvi_decode_page_hwerr_evt(evt->devid, evt->pasid_domid,
689 evt->addr, evt->flag);
692 case AMDVI_EVENT_ILLEGAL_CMD:
694 case AMDVI_EVENT_CMD_HW_ERROR:
695 printf("\t[%s EVT]", (evt->opcode == AMDVI_EVENT_ILLEGAL_CMD) ?
696 "ILLEGAL CMD" : "CMD HW ERR");
697 cmd = (struct amdvi_cmd *)PHYS_TO_DMAP(evt->addr);
698 printf("\tCMD opcode= 0x%x 0x%x 0x%x 0x%lx\n",
699 cmd->opcode, cmd->word0, cmd->word1, cmd->addr);
702 case AMDVI_EVENT_IOTLB_TIMEOUT:
703 printf("\t[IOTLB_INV_TIMEOUT devid:0x%x addr:0x%lx",
704 evt->devid, evt->addr);
707 case AMDVI_EVENT_INVALID_DTE_REQ:
708 printf("\t[INV_DTE devid:0x%x addr:0x%lx",
709 evt->devid, evt->addr);
712 case AMDVI_EVENT_INVALID_PPR_REQ:
713 case AMDVI_EVENT_COUNTER_ZERO:
714 printf("AMD-Vi: v2 events.\n");
718 printf("Unsupported AMD-Vi event:%d", evt->opcode);
723 amdvi_print_events(struct amdvi_softc *softc)
725 struct amdvi_ctrl *ctrl;
726 struct amdvi_event *event;
730 size = sizeof(struct amdvi_event);
731 for (i = 0; i < softc->event_max; i++) {
732 event = &softc->event[ctrl->evt_head / size];
735 device_printf(softc->dev, "\t[Event%d: Head:0x%x Tail:0x%x]\n",
736 i, ctrl->evt_head, ctrl->evt_tail);
737 amdvi_decode_evt(event);
738 ctrl->evt_head = MOD_INC(ctrl->evt_head, size,
744 amdvi_init_dte(struct amdvi_softc *softc)
746 struct amdvi_ctrl *ctrl;
749 ctrl->dte.base = vtophys(amdvi_dte) / PAGE_SIZE;
750 ctrl->dte.size = 0x1FF; /* 2MB device table. */
756 * Not all capabilities of IOMMU are available in ACPI IVHD flag
757 * or EFR entry, read directly from device.
760 amdvi_print_pci_cap(device_t dev)
762 struct amdvi_softc *softc;
766 softc = device_get_softc(dev);
767 off = softc->cap_off;
770 * Section 3.7.1 of IOMMU sepc rev 2.0.
771 * Read capability from device.
773 cap = amdvi_pci_read(softc, off);
775 /* Make sure capability type[18:16] is 3. */
776 KASSERT((((cap >> 16) & 0x7) == 0x3),
777 ("Not a IOMMU capability 0x%x@0x%x", cap, off));
779 softc->pci_cap = cap >> 24;
780 device_printf(softc->dev, "PCI cap 0x%x@0x%x feature:%b\n",
781 cap, off, softc->pci_cap,
782 "\020\001IOTLB\002HT\003NPCache\004EFR");
784 /* IOMMU spec Rev 2.0, section 3.7.2.1 */
785 softc->pci_efr = softc->ctrl->ex_feature;
786 if (softc->pci_efr) {
787 device_printf(softc->dev, "PCI extended Feature:%b\n",
789 "\020\001PreFSup\002PPRSup\003XTSup\004NXSup\006IASup"
790 "\007GASup\008HESup\009PCSup");
791 device_printf(softc->dev,
792 "PCI HATS = %d GATS = %d GLXSup = %d, max PASID: 0x%x ",
793 (int)((softc->pci_efr >> 10) & 0x3),
794 (int)((softc->pci_efr >> 12) & 0x3),
795 (int)((softc->pci_efr >> 14) & 0x3),
796 (int)((softc->pci_efr >> 32) & 0x1F) + 1);
803 amdvi_event_intr(void *arg)
805 struct amdvi_softc *softc;
806 struct amdvi_ctrl *ctrl;
808 softc = (struct amdvi_softc *)arg;
810 device_printf(softc->dev, "EVT INTR %ld Status:0x%x"
811 " EVT Head:0x%x Tail:0x%x]\n", softc->event_intr_cnt++,
812 ctrl->status, ctrl->evt_head, ctrl->evt_tail);
813 printf(" [CMD Total 0x%lx] Tail:0x%x, Head:0x%x.\n",
814 softc->total_cmd, ctrl->cmd_tail, ctrl->cmd_head);
816 amdvi_print_events(softc);
820 amdvi_free_evt_intr_res(device_t dev)
823 struct amdvi_softc *softc;
825 softc = device_get_softc(dev);
826 if (softc->event_tag != NULL) {
827 bus_teardown_intr(dev, softc->event_res, softc->event_tag);
829 if (softc->event_res != NULL) {
830 bus_release_resource(dev, SYS_RES_IRQ, softc->event_rid,
833 bus_delete_resource(dev, SYS_RES_IRQ, softc->event_rid);
834 PCIB_RELEASE_MSI(device_get_parent(device_get_parent(dev)),
835 dev, 1, &softc->event_irq);
839 amdvi_alloc_intr_resources(struct amdvi_softc *softc)
843 uint32_t msi_data, temp;
847 pcib = device_get_parent(device_get_parent(dev));
848 softc->event_irq = -1;
849 softc->event_rid = 0;
851 * Section 3.7.1 of IOMMU rev 2.0. With MSI, there is only one
852 * interrupt. XXX: Enable MSI/X support.
855 err = PCIB_ALLOC_MSI(pcib, dev, 1, 1, &softc->event_irq);
858 "Couldn't find event MSI IRQ resource.\n");
861 err = bus_set_resource(dev, SYS_RES_IRQ, softc->event_rid,
862 softc->event_irq, 1);
864 device_printf(dev, "Couldn't set event MSI resource.\n");
867 softc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
868 &softc->event_rid, RF_ACTIVE);
869 if (!softc->event_res) {
871 "Unable to allocate event INTR resource.\n");
875 if (bus_setup_intr(dev, softc->event_res,
876 INTR_TYPE_MISC | INTR_MPSAFE, NULL, amdvi_event_intr,
877 softc, &softc->event_tag)) {
878 device_printf(dev, "Fail to setup event intr\n");
879 bus_release_resource(softc->dev, SYS_RES_IRQ,
880 softc->event_rid, softc->event_res);
881 softc->event_res = NULL;
885 bus_describe_intr(dev, softc->event_res, softc->event_tag,
888 err = amdvi_find_pci_cap(softc, PCIY_MSI, &msi_off);
890 device_printf(dev, "Couldn't find MSI capability, err = %d.\n",
895 err = PCIB_MAP_MSI(pcib, dev, softc->event_irq, &msi_addr,
899 "Event interrupt config failed, err=%d.\n",
901 amdvi_free_evt_intr_res(softc->dev);
906 amdvi_pci_write(softc, msi_off + PCIR_MSI_ADDR, msi_addr);
907 amdvi_pci_write(softc, msi_off + PCIR_MSI_ADDR_HIGH,
909 amdvi_pci_write(softc, msi_off + PCIR_MSI_DATA_64BIT, msi_data);
911 /* Now enable MSI interrupt. */
912 temp = amdvi_pci_read(softc, msi_off);
913 temp |= (PCIM_MSICTRL_MSI_ENABLE << 16); /* MSI enable. */
914 amdvi_pci_write(softc, msi_off, temp);
921 amdvi_print_dev_cap(struct amdvi_softc *softc)
923 struct ivhd_dev_cfg *cfg;
926 cfg = softc->dev_cfg;
927 for (i = 0; i < softc->dev_cfg_cnt; i++) {
928 device_printf(softc->dev, "device [0x%x - 0x%x]"
929 "config:%b%s\n", cfg->start_id, cfg->end_id,
931 "\020\001INIT\002ExtInt\003NMI"
932 "\007LINT0\008LINT1",
933 cfg->enable_ats ? "ATS enabled" : "");
939 amdvi_handle_sysctl(SYSCTL_HANDLER_ARGS)
941 struct amdvi_softc *softc;
942 int result, type, error = 0;
944 softc = (struct amdvi_softc *)arg1;
949 result = softc->ctrl->cmd_head;
950 error = sysctl_handle_int(oidp, &result, 0,
954 result = softc->ctrl->cmd_tail;
955 error = sysctl_handle_int(oidp, &result, 0,
959 result = softc->ctrl->evt_head;
960 error = sysctl_handle_int(oidp, &result, 0,
964 result = softc->ctrl->evt_tail;
965 error = sysctl_handle_int(oidp, &result, 0,
970 device_printf(softc->dev, "Unknown sysctl:%d\n", type);
977 amdvi_add_sysctl(struct amdvi_softc *softc)
979 struct sysctl_oid_list *child;
980 struct sysctl_ctx_list *ctx;
984 ctx = device_get_sysctl_ctx(dev);
985 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
987 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "event_intr_count", CTLFLAG_RD,
988 &softc->event_intr_cnt, "Event interrupt count");
989 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "command_count", CTLFLAG_RD,
990 &softc->total_cmd, "Command submitted count");
991 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "pci_rid", CTLFLAG_RD,
992 (int *)&softc->pci_rid, 0,
994 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "start_dev_rid", CTLFLAG_RD,
995 (int *)&softc->start_dev_rid, 0,
996 "Start of device under this IOMMU");
997 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "end_dev_rid", CTLFLAG_RD,
998 (int *)&softc->end_dev_rid, 0,
999 "End of device under this IOMMU");
1000 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_head",
1001 CTLTYPE_UINT | CTLFLAG_RD, softc, 0,
1002 amdvi_handle_sysctl, "IU", "Command head");
1003 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_tail",
1004 CTLTYPE_UINT | CTLFLAG_RD, softc, 1,
1005 amdvi_handle_sysctl, "IU", "Command tail");
1006 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_head",
1007 CTLTYPE_UINT | CTLFLAG_RD, softc, 2,
1008 amdvi_handle_sysctl, "IU", "Command head");
1009 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_tail",
1010 CTLTYPE_UINT | CTLFLAG_RD, softc, 3,
1011 amdvi_handle_sysctl, "IU", "Command tail");
1015 amdvi_setup_hw(struct amdvi_softc *softc)
1022 amdvi_hw_enable_iotlb(softc);
1024 amdvi_print_dev_cap(softc);
1026 if ((status = amdvi_print_pci_cap(dev)) != 0) {
1027 device_printf(dev, "PCI capability.\n");
1030 if ((status = amdvi_init_cmd(softc)) != 0) {
1031 device_printf(dev, "Couldn't configure command buffer.\n");
1034 if ((status = amdvi_init_event(softc)) != 0) {
1035 device_printf(dev, "Couldn't configure event buffer.\n");
1038 if ((status = amdvi_init_dte(softc)) != 0) {
1039 device_printf(dev, "Couldn't configure device table.\n");
1042 if ((status = amdvi_alloc_intr_resources(softc)) != 0) {
1045 amdvi_add_sysctl(softc);
1050 amdvi_teardown_hw(struct amdvi_softc *softc)
1057 * Called after disable, h/w is stopped by now, free all the resources.
1059 amdvi_free_evt_intr_res(dev);
1062 free(softc->cmd, M_AMDVI);
1065 free(softc->event, M_AMDVI);
1070 /*********** bhyve interfaces *********************/
1077 if (!amdvi_enable_user && ivhd_count) {
1078 printf("bhyve: Found %d AMD-Vi/IOMMU device(s), "
1079 "use hw.vmm.amdvi_enable=1 to enable pass-through.\n",
1093 amdvi_domainId(void)
1097 * If we hit maximum domain limit, rollover leaving host
1099 * XXX: make sure that this domain is not used.
1101 if (amdvi_dom_id == AMDVI_MAX_DOMAIN)
1104 return ((uint16_t)amdvi_dom_id++);
1108 amdvi_do_inv_domain(uint16_t domain_id, bool create)
1110 struct amdvi_softc *softc;
1113 for (i = 0; i < ivhd_count; i++) {
1114 softc = device_get_softc(ivhd_devs[i]);
1115 KASSERT(softc, ("softc is NULL"));
1117 * If not present pages are cached, invalidate page after
1121 if (create && ((softc->pci_cap & AMDVI_PCI_CAP_NPCACHE) == 0))
1124 amdvi_inv_domain(softc, domain_id);
1130 amdvi_create_domain(vm_paddr_t maxaddr)
1132 struct amdvi_domain *dom;
1134 dom = malloc(sizeof(struct amdvi_domain), M_AMDVI, M_ZERO | M_WAITOK);
1135 dom->id = amdvi_domainId();
1136 //dom->maxaddr = maxaddr;
1137 #ifdef AMDVI_DEBUG_CMD
1138 printf("Created domain #%d\n", dom->id);
1141 * Host domain(#0) don't create translation table.
1143 if (dom->id || amdvi_host_ptp)
1144 dom->ptp = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
1146 dom->ptp_level = amdvi_ptp_level;
1148 amdvi_do_inv_domain(dom->id, true);
1149 SLIST_INSERT_HEAD(&dom_head, dom, next);
1155 amdvi_free_ptp(uint64_t *ptp, int level)
1162 for (i = 0; i < NPTEPG ; i++) {
1163 if ((ptp[i] & AMDVI_PT_PRESENT) == 0)
1165 /* XXX: Add super-page or PTE mapping > 4KB. */
1167 /* Super-page mapping. */
1168 if (AMDVI_PD_SUPER(ptp[i]))
1172 amdvi_free_ptp((uint64_t *)PHYS_TO_DMAP(ptp[i]
1173 & AMDVI_PT_MASK), level - 1);
1181 amdvi_destroy_domain(void *arg)
1183 struct amdvi_domain *domain;
1185 domain = (struct amdvi_domain *)arg;
1186 KASSERT(domain, ("domain is NULL"));
1187 #ifdef AMDVI_DEBUG_CMD
1188 printf("Destroying domain %d\n", domain->id);
1191 amdvi_free_ptp(domain->ptp, domain->ptp_level);
1193 amdvi_do_inv_domain(domain->id, false);
1194 SLIST_REMOVE(&dom_head, domain, amdvi_domain, next);
1195 free(domain, M_AMDVI);
1199 amdvi_set_pt(uint64_t *pt, int level, vm_paddr_t gpa,
1200 vm_paddr_t hpa, uint64_t pg_size, bool create)
1204 const int PT_SHIFT = 9;
1205 const int PT_INDEX_MASK = (1 << PT_SHIFT) - 1; /* Based on PT_SHIFT */
1210 if (hpa & (pg_size - 1)) {
1211 printf("HPA is not size aligned.\n");
1214 if (gpa & (pg_size - 1)) {
1215 printf("HPA is not size aligned.\n");
1219 while ((shift > PAGE_SHIFT) && (pg_size < (1UL << shift))) {
1220 index = (gpa >> shift) & PT_INDEX_MASK;
1222 if ((pt[index] == 0) && create) {
1223 page = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
1225 pt[index] = pa | AMDVI_PT_PRESENT | AMDVI_PT_RW |
1226 ((level - 1) << AMDVI_PD_LEVEL_SHIFT);
1228 #ifdef AMDVI_DEBUG_PTE
1229 if ((gpa % 0x1000000) == 0)
1230 printf("[level%d, shift = %d]PTE:0x%lx\n",
1231 level, shift, pt[index]);
1233 #define PTE2PA(x) ((uint64_t)(x) & AMDVI_PT_MASK)
1234 pa = PTE2PA(pt[index]);
1235 pt = (uint64_t *)PHYS_TO_DMAP(pa);
1241 index = (gpa >> shift) & PT_INDEX_MASK;
1244 pt[index] = hpa | AMDVI_PT_RW | AMDVI_PT_PRESENT;
1248 #ifdef AMDVI_DEBUG_PTE
1249 if ((gpa % 0x1000000) == 0)
1250 printf("[Last level%d, shift = %d]PTE:0x%lx\n",
1251 level, shift, pt[index]);
1253 return (1ULL << shift);
1257 amdvi_update_mapping(struct amdvi_domain *domain, vm_paddr_t gpa,
1258 vm_paddr_t hpa, uint64_t size, bool create)
1260 uint64_t mapped, *ptp, len;
1263 KASSERT(domain, ("domain is NULL"));
1264 level = domain->ptp_level;
1265 KASSERT(level, ("Page table level is 0"));
1268 KASSERT(ptp, ("PTP is NULL"));
1270 while (mapped < size) {
1271 len = amdvi_set_pt(ptp, level, gpa + mapped, hpa + mapped,
1274 printf("Error: Couldn't map HPA:0x%lx GPA:0x%lx\n",
1285 amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
1288 struct amdvi_domain *domain;
1290 domain = (struct amdvi_domain *)arg;
1292 if (domain->id && !domain->ptp) {
1293 printf("ptp is NULL");
1298 * If host domain is created w/o page table, skip IOMMU page
1302 return (amdvi_update_mapping(domain, gpa, hpa, len, true));
1308 amdvi_destroy_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
1310 struct amdvi_domain *domain;
1312 domain = (struct amdvi_domain *)arg;
1314 * If host domain is created w/o page table, skip IOMMU page
1318 return (amdvi_update_mapping(domain, gpa, 0, len, false));
1323 static struct amdvi_softc *
1324 amdvi_find_iommu(uint16_t devid)
1326 struct amdvi_softc *softc;
1329 for (i = 0; i < ivhd_count; i++) {
1330 softc = device_get_softc(ivhd_devs[i]);
1331 if ((devid >= softc->start_dev_rid) &&
1332 (devid <= softc->end_dev_rid))
1337 * XXX: BIOS bug, device not in IVRS table, assume its from first IOMMU.
1339 printf("BIOS bug device(%d.%d.%d) doesn't have IVHD entry.\n",
1340 RID2PCI_STR(devid));
1342 return (device_get_softc(ivhd_devs[0]));
1346 * Set-up device table entry.
1347 * IOMMU spec Rev 2.0, section 3.2.2.2, some of the fields must
1348 * be set concurrently, e.g. read and write bits.
1351 amdvi_set_dte(struct amdvi_domain *domain, uint16_t devid, bool enable)
1353 struct amdvi_softc *softc;
1354 struct amdvi_dte temp;
1356 softc = amdvi_find_iommu(devid);
1357 KASSERT(softc, ("softc is NULL for pci_rid:0x%x\n", devid));
1359 memset(&temp, 0, sizeof(struct amdvi_dte));
1361 #ifdef AMDVI_ATS_ENABLE
1362 /* If IOMMU and device support IOTLB, enable it. */
1363 if (amdvi_dev_support_iotlb(softc, devid) && softc->iotlb)
1364 temp.iotlb_enable = 1;
1367 /* Avoid duplicate I/O faults. */
1368 temp.sup_second_io_fault = 1;
1369 temp.sup_all_io_fault = amdvi_disable_io_fault;
1372 temp.domain_id = domain->id;
1376 temp.pt_base = vtophys(domain->ptp) >> 12;
1377 temp.pt_level = amdvi_ptp_level;
1380 * XXX: Page table valid[TV] bit must be set even if host domain
1381 * page tables are not enabled.
1384 temp.read_allow = 1;
1385 temp.write_allow = 1;
1387 amdvi_dte[devid] = temp;
1391 amdvi_inv_device(uint16_t devid)
1393 struct amdvi_softc *softc;
1395 softc = amdvi_find_iommu(devid);
1396 KASSERT(softc, ("softc is NULL"));
1398 amdvi_cmd_inv_dte(softc, devid);
1399 #ifdef AMDVI_ATS_ENABLE
1400 if (amdvi_dev_support_iotlb(softc, devid))
1401 amdvi_cmd_inv_iotlb(softc, devid);
1407 amdvi_add_device(void *arg, uint16_t devid)
1409 struct amdvi_domain *domain;
1411 domain = (struct amdvi_domain *)arg;
1412 KASSERT(domain != NULL, ("domain is NULL"));
1413 #ifdef AMDVI_DEBUG_CMD
1414 printf("Assigning device(%d.%d.%d) to domain:%d\n",
1415 RID2PCI_STR(devid), domain->id);
1417 amdvi_set_dte(domain, devid, true);
1418 amdvi_inv_device(devid);
1422 amdvi_remove_device(void *arg, uint16_t devid)
1424 struct amdvi_domain *domain;
1426 domain = (struct amdvi_domain *)arg;
1427 #ifdef AMDVI_DEBUG_CMD
1428 printf("Remove device(0x%x) from domain:%d\n",
1431 amdvi_set_dte(domain, devid, false);
1432 amdvi_inv_device(devid);
1438 struct amdvi_ctrl *ctrl;
1439 struct amdvi_softc *softc;
1443 for (i = 0; i < ivhd_count; i++) {
1444 softc = device_get_softc(ivhd_devs[i]);
1445 KASSERT(softc, ("softc is NULL\n"));
1447 KASSERT(ctrl, ("ctrl is NULL\n"));
1449 val = ( AMDVI_CTRL_EN |
1452 AMDVI_CTRL_ELOGINT |
1453 AMDVI_CTRL_INV_TO_1S);
1455 if (softc->ivhd_flag & IVHD_FLAG_COH)
1456 val |= AMDVI_CTRL_COH;
1457 if (softc->ivhd_flag & IVHD_FLAG_HTT)
1458 val |= AMDVI_CTRL_HTT;
1459 if (softc->ivhd_flag & IVHD_FLAG_RPPW)
1460 val |= AMDVI_CTRL_RPPW;
1461 if (softc->ivhd_flag & IVHD_FLAG_PPW)
1462 val |= AMDVI_CTRL_PPW;
1463 if (softc->ivhd_flag & IVHD_FLAG_ISOC)
1464 val |= AMDVI_CTRL_ISOC;
1466 ctrl->control = val;
1473 struct amdvi_ctrl *ctrl;
1474 struct amdvi_softc *softc;
1477 for (i = 0; i < ivhd_count; i++) {
1478 softc = device_get_softc(ivhd_devs[i]);
1479 KASSERT(softc, ("softc is NULL\n"));
1481 KASSERT(ctrl, ("ctrl is NULL\n"));
1488 amdvi_inv_tlb(void *arg)
1490 struct amdvi_domain *domain;
1492 domain = (struct amdvi_domain *)arg;
1493 KASSERT(domain, ("domain is NULL"));
1494 amdvi_do_inv_domain(domain->id, false);
1497 struct iommu_ops iommu_ops_amd = {
1502 amdvi_create_domain,
1503 amdvi_destroy_domain,
1504 amdvi_create_mapping,
1505 amdvi_destroy_mapping,
1507 amdvi_remove_device,