From 028d9311cdfb847aa01ea7901658010ac421bd88 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Wed, 10 Apr 2013 02:12:39 +0000 Subject: [PATCH] Improve PCI BAR emulation: - Respect the MEMEN and PORTEN bits in the command register - Allow the guest to reprogram the address decoded by the BAR Submitted by: Gopakumar T Obtained from: NetApp --- usr.sbin/bhyve/consport.c | 1 + usr.sbin/bhyve/dbgport.c | 1 + usr.sbin/bhyve/inout.c | 56 ++++++-- usr.sbin/bhyve/inout.h | 4 +- usr.sbin/bhyve/mem.c | 53 +++++++- usr.sbin/bhyve/mem.h | 1 + usr.sbin/bhyve/pci_emul.c | 264 ++++++++++++++++++++++++++++++++------ 7 files changed, 320 insertions(+), 60 deletions(-) diff --git a/usr.sbin/bhyve/consport.c b/usr.sbin/bhyve/consport.c index 3915b6d71a2..a31038a9c89 100644 --- a/usr.sbin/bhyve/consport.c +++ b/usr.sbin/bhyve/consport.c @@ -128,6 +128,7 @@ console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, static struct inout_port consport = { "bvmcons", BVM_CONSOLE_PORT, + 1, IOPORT_F_INOUT, console_handler }; diff --git a/usr.sbin/bhyve/dbgport.c b/usr.sbin/bhyve/dbgport.c index 034531c6a09..97a86ef0bab 100644 --- a/usr.sbin/bhyve/dbgport.c +++ b/usr.sbin/bhyve/dbgport.c @@ -105,6 +105,7 @@ dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, static struct inout_port dbgport = { "bvmdbg", BVM_DBG_PORT, + 1, IOPORT_F_INOUT, dbg_handler }; diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index 5f47a89f31a..29d9f17966b 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include "inout.h" @@ -41,6 +42,9 @@ SET_DECLARE(inout_port_set, struct inout_port); #define MAX_IOPORTS (1 << 16) +#define VERIFY_IOPORT(port, size) \ + assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS) + static struct { const char *name; int flags; @@ -69,6 +73,23 @@ default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, return (0); } +static void +register_default_iohandler(int start, int size) +{ + struct inout_port iop; + + VERIFY_IOPORT(start, size); + + bzero(&iop, sizeof(iop)); + iop.name = "default"; + iop.port = start; + iop.size = size; + iop.flags = IOPORT_F_INOUT; + iop.handler = default_inout; + + register_inout(&iop); +} + int emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, int strict) @@ -113,17 +134,11 @@ void init_inout(void) { struct inout_port **iopp, *iop; - int i; /* * Set up the default handler for all ports */ - for (i = 0; i < MAX_IOPORTS; i++) { - inout_handlers[i].name = "default"; - inout_handlers[i].flags = IOPORT_F_IN | IOPORT_F_OUT; - inout_handlers[i].handler = default_inout; - inout_handlers[i].arg = NULL; - } + register_default_iohandler(0, MAX_IOPORTS); /* * Overwrite with specified handlers @@ -141,11 +156,28 @@ init_inout(void) int register_inout(struct inout_port *iop) { - assert(iop->port < MAX_IOPORTS); - inout_handlers[iop->port].name = iop->name; - inout_handlers[iop->port].flags = iop->flags; - inout_handlers[iop->port].handler = iop->handler; - inout_handlers[iop->port].arg = iop->arg; + int i; + + VERIFY_IOPORT(iop->port, iop->size); + + for (i = iop->port; i < iop->port + iop->size; i++) { + inout_handlers[i].name = iop->name; + inout_handlers[i].flags = iop->flags; + inout_handlers[i].handler = iop->handler; + inout_handlers[i].arg = iop->arg; + } + + return (0); +} + +int +unregister_inout(struct inout_port *iop) +{ + + VERIFY_IOPORT(iop->port, iop->size); + assert(inout_handlers[iop->port].name == iop->name); + + register_default_iohandler(iop->port, iop->size); return (0); } diff --git a/usr.sbin/bhyve/inout.h b/usr.sbin/bhyve/inout.h index a73b78d8080..4de58d10f58 100644 --- a/usr.sbin/bhyve/inout.h +++ b/usr.sbin/bhyve/inout.h @@ -39,6 +39,7 @@ typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port, struct inout_port { const char *name; int port; + int size; int flags; inout_func_t handler; void *arg; @@ -51,6 +52,7 @@ struct inout_port { static struct inout_port __CONCAT(__inout_port, __LINE__) = { \ #name, \ (port), \ + 1, \ (flags), \ (handler), \ 0 \ @@ -61,7 +63,7 @@ void init_inout(void); int emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes, uint32_t *eax, int strict); int register_inout(struct inout_port *iop); - +int unregister_inout(struct inout_port *iop); void init_bvmcons(void); #endif /* _INOUT_H_ */ diff --git a/usr.sbin/bhyve/mem.c b/usr.sbin/bhyve/mem.c index c2b5ab4f401..049838b34aa 100644 --- a/usr.sbin/bhyve/mem.c +++ b/usr.sbin/bhyve/mem.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include "mem.h" @@ -71,6 +72,8 @@ RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback; */ static struct mmio_rb_range *mmio_hint[VM_MAXCPU]; +static pthread_rwlock_t rwlock; + static int mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b) { @@ -125,10 +128,12 @@ mmio_rb_dump(struct mmio_rb_tree *rbt) { struct mmio_rb_range *np; + pthread_rwlock_rdlock(&rwlock); RB_FOREACH(np, mmio_rb_tree, rbt) { printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end, np->mr_param.name); } + pthread_rwlock_unlock(&rwlock); } #endif @@ -161,7 +166,8 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie) { struct mmio_rb_range *entry; int err; - + + pthread_rwlock_rdlock(&rwlock); /* * First check the per-vCPU cache */ @@ -173,10 +179,11 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie) entry = NULL; if (entry == NULL) { - if (!mmio_rb_lookup(&mmio_rb_root, paddr, &entry)) { + if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) { /* Update the per-vCPU cache */ mmio_hint[vcpu] = entry; } else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) { + pthread_rwlock_unlock(&rwlock); return (ESRCH); } } @@ -184,25 +191,29 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie) assert(entry != NULL); err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, mem_read, mem_write, &entry->mr_param); + pthread_rwlock_unlock(&rwlock); + return (err); } static int register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp) { - struct mmio_rb_range *mrp; + struct mmio_rb_range *entry, *mrp; int err; err = 0; mrp = malloc(sizeof(struct mmio_rb_range)); - + if (mrp != NULL) { mrp->mr_param = *memp; mrp->mr_base = memp->base; mrp->mr_end = memp->base + memp->size - 1; - - err = mmio_rb_add(rbt, mrp); + pthread_rwlock_wrlock(&rwlock); + if (mmio_rb_lookup(rbt, memp->base, &entry) != 0) + err = mmio_rb_add(rbt, mrp); + pthread_rwlock_unlock(&rwlock); if (err) free(mrp); } else @@ -225,10 +236,40 @@ register_mem_fallback(struct mem_range *memp) return (register_mem_int(&mmio_rb_fallback, memp)); } +int +unregister_mem(struct mem_range *memp) +{ + struct mem_range *mr; + struct mmio_rb_range *entry = NULL; + int err, i; + + pthread_rwlock_wrlock(&rwlock); + err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry); + if (err == 0) { + mr = &entry->mr_param; + assert(mr->name == memp->name); + assert(mr->base == memp->base && mr->size == memp->size); + RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry); + + /* flush Per-vCPU cache */ + for (i=0; i < VM_MAXCPU; i++) { + if (mmio_hint[i] == entry) + mmio_hint[i] = NULL; + } + } + pthread_rwlock_unlock(&rwlock); + + if (entry) + free(entry); + + return (err); +} + void init_mem(void) { RB_INIT(&mmio_rb_root); RB_INIT(&mmio_rb_fallback); + pthread_rwlock_init(&rwlock, NULL); } diff --git a/usr.sbin/bhyve/mem.h b/usr.sbin/bhyve/mem.h index 05e7508f312..264bff9e82b 100644 --- a/usr.sbin/bhyve/mem.h +++ b/usr.sbin/bhyve/mem.h @@ -54,5 +54,6 @@ int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie); int register_mem(struct mem_range *memp); int register_mem_fallback(struct mem_range *memp); +int unregister_mem(struct mem_range *memp); #endif /* _MEM_H_ */ diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index b5e923c1595..713e1976bf5 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -38,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -353,20 +355,150 @@ pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); } +/* + * Register (or unregister) the MMIO or I/O region associated with the BAR + * register 'idx' of an emulated pci device. + */ +static void +modify_bar_registration(struct pci_devinst *pi, int idx, int registration) +{ + int error; + struct inout_port iop; + struct mem_range mr; + + switch (pi->pi_bar[idx].type) { + case PCIBAR_IO: + bzero(&iop, sizeof(struct inout_port)); + iop.name = pi->pi_name; + iop.port = pi->pi_bar[idx].addr; + iop.size = pi->pi_bar[idx].size; + if (registration) { + iop.flags = IOPORT_F_INOUT; + iop.handler = pci_emul_io_handler; + iop.arg = pi; + error = register_inout(&iop); + } else + error = unregister_inout(&iop); + break; + case PCIBAR_MEM32: + case PCIBAR_MEM64: + bzero(&mr, sizeof(struct mem_range)); + mr.name = pi->pi_name; + mr.base = pi->pi_bar[idx].addr; + mr.size = pi->pi_bar[idx].size; + if (registration) { + mr.flags = MEM_F_RW; + mr.handler = pci_emul_mem_handler; + mr.arg1 = pi; + mr.arg2 = idx; + error = register_mem(&mr); + } else + error = unregister_mem(&mr); + break; + default: + error = EINVAL; + break; + } + assert(error == 0); +} + +static void +unregister_bar(struct pci_devinst *pi, int idx) +{ + + modify_bar_registration(pi, idx, 0); +} + +static void +register_bar(struct pci_devinst *pi, int idx) +{ + + modify_bar_registration(pi, idx, 1); +} + +/* Are we decoding i/o port accesses for the emulated pci device? */ +static int +porten(struct pci_devinst *pi) +{ + uint16_t cmd; + + cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); + + return (cmd & PCIM_CMD_PORTEN); +} + +/* Are we decoding memory accesses for the emulated pci device? */ +static int +memen(struct pci_devinst *pi) +{ + uint16_t cmd; + + cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); + + return (cmd & PCIM_CMD_MEMEN); +} + +/* + * Update the MMIO or I/O address that is decoded by the BAR register. + * + * If the pci device has enabled the address space decoding then intercept + * the address range decoded by the BAR register. + */ +static void +update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) +{ + int decode; + + if (pi->pi_bar[idx].type == PCIBAR_IO) + decode = porten(pi); + else + decode = memen(pi); + + if (decode) + unregister_bar(pi, idx); + + switch (type) { + case PCIBAR_IO: + case PCIBAR_MEM32: + pi->pi_bar[idx].addr = addr; + break; + case PCIBAR_MEM64: + pi->pi_bar[idx].addr &= ~0xffffffffUL; + pi->pi_bar[idx].addr |= addr; + break; + case PCIBAR_MEMHI64: + pi->pi_bar[idx].addr &= 0xffffffff; + pi->pi_bar[idx].addr |= addr; + break; + default: + assert(0); + } + + if (decode) + register_bar(pi, idx); +} + int pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, enum pcibar_type type, uint64_t size) { - int i, error; + int error; uint64_t *baseptr, limit, addr, mask, lobits, bar; - struct inout_port iop; - struct mem_range memp; assert(idx >= 0 && idx <= PCI_BARMAX); if ((size & (size - 1)) != 0) size = 1UL << flsl(size); /* round up to a power of 2 */ + /* Enforce minimum BAR sizes required by the PCI standard */ + if (type == PCIBAR_IO) { + if (size < 4) + size = 4; + } else { + if (size < 16) + size = 16; + } + switch (type) { case PCIBAR_NONE: baseptr = NULL; @@ -443,30 +575,7 @@ pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); } - /* add a handler to intercept accesses to the I/O bar */ - if (type == PCIBAR_IO) { - iop.name = pdi->pi_name; - iop.flags = IOPORT_F_INOUT; - iop.handler = pci_emul_io_handler; - iop.arg = pdi; - - for (i = 0; i < size; i++) { - iop.port = addr + i; - register_inout(&iop); - } - } else if (type == PCIBAR_MEM32 || type == PCIBAR_MEM64) { - /* add memory bar intercept handler */ - memp.name = pdi->pi_name; - memp.flags = MEM_F_RW; - memp.base = addr; - memp.size = size; - memp.handler = pci_emul_mem_handler; - memp.arg1 = pdi; - memp.arg2 = idx; - - error = register_mem(&memp); - assert(error == 0); - } + register_bar(pdi, idx); return (0); } @@ -1101,6 +1210,62 @@ pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, } INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_OUT, pci_emul_cfgaddr); +static uint32_t +bits_changed(uint32_t old, uint32_t new, uint32_t mask) +{ + + return ((old ^ new) & mask); +} + +static void +pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) +{ + int i; + uint16_t old; + + /* + * The command register is at an offset of 4 bytes and thus the + * guest could write 1, 2 or 4 bytes starting at this offset. + */ + + old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ + CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */ + new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ + + /* + * If the MMIO or I/O address space decoding has changed then + * register/unregister all BARs that decode that address space. + */ + for (i = 0; i < PCI_BARMAX; i++) { + switch (pi->pi_bar[i].type) { + case PCIBAR_NONE: + case PCIBAR_MEMHI64: + break; + case PCIBAR_IO: + /* I/O address space decoding changed? */ + if (bits_changed(old, new, PCIM_CMD_PORTEN)) { + if (porten(pi)) + register_bar(pi, i); + else + unregister_bar(pi, i); + } + break; + case PCIBAR_MEM32: + case PCIBAR_MEM64: + /* MMIO address space decoding changed? */ + if (bits_changed(old, new, PCIM_CMD_MEMEN)) { + if (memen(pi)) + register_bar(pi, i); + else + unregister_bar(pi, i); + } + break; + default: + assert(0); + } + } +} + static int pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) @@ -1108,7 +1273,7 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, struct pci_devinst *pi; struct pci_devemu *pe; int coff, idx, needcfg; - uint64_t mask, bar; + uint64_t addr, bar, mask; assert(bytes == 1 || bytes == 2 || bytes == 4); @@ -1175,33 +1340,48 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, if (bytes != 4 || (coff & 0x3) != 0) return (0); idx = (coff - PCIR_BAR(0)) / 4; + mask = ~(pi->pi_bar[idx].size - 1); switch (pi->pi_bar[idx].type) { case PCIBAR_NONE: - bar = 0; + pi->pi_bar[idx].addr = bar = 0; break; case PCIBAR_IO: - mask = ~(pi->pi_bar[idx].size - 1); - mask &= PCIM_BAR_IO_BASE; - bar = (*eax & mask) | PCIM_BAR_IO_SPACE; + addr = *eax & mask; + addr &= 0xffff; + bar = addr | PCIM_BAR_IO_SPACE; + /* + * Register the new BAR value for interception + */ + if (addr != pi->pi_bar[idx].addr) { + update_bar_address(pi, addr, idx, + PCIBAR_IO); + } break; case PCIBAR_MEM32: - mask = ~(pi->pi_bar[idx].size - 1); - mask &= PCIM_BAR_MEM_BASE; - bar = *eax & mask; + addr = bar = *eax & mask; bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; + if (addr != pi->pi_bar[idx].addr) { + update_bar_address(pi, addr, idx, + PCIBAR_MEM32); + } break; case PCIBAR_MEM64: - mask = ~(pi->pi_bar[idx].size - 1); - mask &= PCIM_BAR_MEM_BASE; - bar = *eax & mask; + addr = bar = *eax & mask; bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | PCIM_BAR_MEM_PREFETCH; + if (addr != (uint32_t)pi->pi_bar[idx].addr) { + update_bar_address(pi, addr, idx, + PCIBAR_MEM64); + } break; case PCIBAR_MEMHI64: mask = ~(pi->pi_bar[idx - 1].size - 1); - mask &= PCIM_BAR_MEM_BASE; - bar = ((uint64_t)*eax << 32) & mask; - bar = bar >> 32; + addr = ((uint64_t)*eax << 32) & mask; + bar = addr >> 32; + if (bar != pi->pi_bar[idx - 1].addr >> 32) { + update_bar_address(pi, addr, idx - 1, + PCIBAR_MEMHI64); + } break; default: assert(0); @@ -1210,6 +1390,8 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, } else if (pci_emul_iscap(pi, coff)) { pci_emul_capwrite(pi, coff, bytes, *eax); + } else if (coff == PCIR_COMMAND) { + pci_emul_cmdwrite(pi, *eax, bytes); } else { CFGWRITE(pi, coff, *eax, bytes); } -- 2.45.0