From 16c157baab9767e239718b67e137c81c527b66b6 Mon Sep 17 00:00:00 2001 From: mav Date: Thu, 18 Aug 2016 10:39:00 +0000 Subject: [PATCH] MFC r302484: NewBus'ify NTB subsystem. This follows NTB subsystem modularization in Linux, tuning it to FreeBSD native NewBus interfaces. This change allows to support different types of hardware with different drivers, support multiple NTB instances in a system, ntb_transport module use for needs other then if_ntb, etc. Sponsored by: iXsystems, Inc. git-svn-id: svn://svn.freebsd.org/base/stable/10@304380 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sys/conf/files.amd64 | 5 +- sys/conf/files.i386 | 5 +- sys/dev/ntb/if_ntb/if_ntb.c | 1550 ++---------------------- sys/dev/ntb/ntb.c | 43 + sys/dev/ntb/ntb.h | 37 + sys/dev/ntb/ntb_hw/ntb_hw.c | 654 ++++------ sys/dev/ntb/ntb_hw/ntb_hw.h | 125 -- sys/dev/ntb/ntb_hw/ntb_regs.h | 3 + sys/dev/ntb/ntb_if.m | 497 ++++++++ sys/dev/ntb/ntb_transport.c | 1496 +++++++++++++++++++++++ sys/dev/ntb/ntb_transport.h | 60 + sys/modules/ntb/Makefile | 2 +- sys/modules/ntb/ntb/Makefile | 11 + sys/modules/ntb/ntb_hw/Makefile | 4 +- sys/modules/ntb/ntb_transport/Makefile | 11 + 15 files changed, 2498 insertions(+), 2005 deletions(-) create mode 100644 sys/dev/ntb/ntb.c create mode 100644 sys/dev/ntb/ntb.h delete mode 100644 sys/dev/ntb/ntb_hw/ntb_hw.h create mode 100644 sys/dev/ntb/ntb_if.m create mode 100644 sys/dev/ntb/ntb_transport.c create mode 100644 sys/dev/ntb/ntb_transport.h create mode 100644 sys/modules/ntb/ntb/Makefile create mode 100644 sys/modules/ntb/ntb_transport/Makefile diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 484355aee..e0bd62b08 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -285,7 +285,10 @@ dev/lindev/full.c optional lindev dev/lindev/lindev.c optional lindev dev/nfe/if_nfe.c optional nfe pci dev/ntb/if_ntb/if_ntb.c optional if_ntb -dev/ntb/ntb_hw/ntb_hw.c optional if_ntb ntb_hw +dev/ntb/ntb_transport.c optional if_ntb +dev/ntb/ntb.c optional if_ntb | ntb_hw +dev/ntb/ntb_if.m optional if_ntb | ntb_hw +dev/ntb/ntb_hw/ntb_hw.c optional ntb_hw dev/nvd/nvd.c optional nvd nvme dev/nve/if_nve.c optional nve pci dev/nvme/nvme.c optional nvme diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 56ce492b8..1810bfb03 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -281,7 +281,10 @@ dev/mse/mse.c optional mse dev/mse/mse_isa.c optional mse isa dev/nfe/if_nfe.c optional nfe pci dev/ntb/if_ntb/if_ntb.c optional if_ntb -dev/ntb/ntb_hw/ntb_hw.c optional if_ntb | ntb_hw +dev/ntb/ntb_transport.c optional if_ntb +dev/ntb/ntb.c optional if_ntb | ntb_hw +dev/ntb/ntb_if.m optional if_ntb | ntb_hw +dev/ntb/ntb_hw/ntb_hw.c optional ntb_hw dev/nvd/nvd.c optional nvd nvme dev/nve/if_nve.c optional nve pci dev/nvme/nvme.c optional nvme diff --git a/sys/dev/ntb/if_ntb/if_ntb.c b/sys/dev/ntb/if_ntb/if_ntb.c index 3d74c972f..37a3afaa9 100644 --- a/sys/dev/ntb/if_ntb/if_ntb.c +++ b/sys/dev/ntb/if_ntb/if_ntb.c @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2016 Alexander Motin * Copyright (C) 2013 Intel Corporation * Copyright (C) 2015 EMC Corporation * All rights reserved. @@ -25,25 +26,28 @@ * SUCH DAMAGE. */ +/* + * The Non-Transparent Bridge (NTB) is a device that allows you to connect + * two or more systems using a PCI-e links, providing remote memory access. + * + * This module contains a driver for simulated Ethernet device, using + * underlying NTB Transport device. + * + * NOTE: Much of the code in this module is shared with Linux. Any patches may + * be picked up and redistributed in Linux with a dual GPL/BSD license. + */ + #include __FBSDID("$FreeBSD$"); #include #include #include -#include #include -#include #include -#include -#include #include -#include -#include #include #include -#include -#include #include #include @@ -52,254 +56,24 @@ __FBSDID("$FreeBSD$"); #include #include -#include -#include - #include -#include -#include - -#include -#include - -#include "../ntb_hw/ntb_hw.h" - -/* - * The Non-Transparent Bridge (NTB) is a device on some Intel processors that - * allows you to connect two systems using a PCI-e link. - * - * This module contains a protocol for sending and receiving messages, and - * exposes that protocol through a simulated ethernet device called ntb. - * - * NOTE: Much of the code in this module is shared with Linux. Any patches may - * be picked up and redistributed in Linux with a dual GPL/BSD license. - */ -#define QP_SETSIZE 64 -BITSET_DEFINE(_qpset, QP_SETSIZE); -#define test_bit(pos, addr) BIT_ISSET(QP_SETSIZE, (pos), (addr)) -#define set_bit(pos, addr) BIT_SET(QP_SETSIZE, (pos), (addr)) -#define clear_bit(pos, addr) BIT_CLR(QP_SETSIZE, (pos), (addr)) -#define ffs_bit(addr) BIT_FFS(QP_SETSIZE, (addr)) +#include "../ntb_transport.h" #define KTR_NTB KTR_SPARE3 -#define NTB_TRANSPORT_VERSION 4 -#define NTB_RX_MAX_PKTS 64 -#define NTB_RXQ_SIZE 300 - -enum ntb_link_event { - NTB_LINK_DOWN = 0, - NTB_LINK_UP, -}; - -static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb"); - -static unsigned g_if_ntb_debug_level; -TUNABLE_INT("hw.if_ntb.debug_level", &g_if_ntb_debug_level); -SYSCTL_UINT(_hw_if_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN, - &g_if_ntb_debug_level, 0, "if_ntb log level -- higher is more verbose"); -#define ntb_printf(lvl, ...) do { \ - if ((lvl) <= g_if_ntb_debug_level) { \ - if_printf(nt->ifp, __VA_ARGS__); \ - } \ -} while (0) - -static unsigned transport_mtu = IP_MAXPACKET + ETHER_HDR_LEN + ETHER_CRC_LEN; - -static uint64_t max_mw_size; -TUNABLE_QUAD("hw.if_ntb.max_mw_size", &max_mw_size); -SYSCTL_UQUAD(_hw_if_ntb, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0, - "If enabled (non-zero), limit the size of large memory windows. " - "Both sides of the NTB MUST set the same value here."); - -static unsigned max_num_clients; -TUNABLE_INT("hw.if_ntb.max_num_clients", &max_num_clients); -SYSCTL_UINT(_hw_if_ntb, OID_AUTO, max_num_clients, CTLFLAG_RDTUN, - &max_num_clients, 0, "Maximum number of NTB transport clients. " - "0 (default) - use all available NTB memory windows; " - "positive integer N - Limit to N memory windows."); - -static unsigned enable_xeon_watchdog; -TUNABLE_INT("hw.if_ntb.enable_xeon_watchdog", &enable_xeon_watchdog); -SYSCTL_UINT(_hw_if_ntb, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN, - &enable_xeon_watchdog, 0, "If non-zero, write a register every second to " - "keep a watchdog from tearing down the NTB link"); - -STAILQ_HEAD(ntb_queue_list, ntb_queue_entry); - -typedef uint32_t ntb_q_idx_t; - -struct ntb_queue_entry { - /* ntb_queue list reference */ - STAILQ_ENTRY(ntb_queue_entry) entry; - - /* info on data to be transferred */ - void *cb_data; - void *buf; - uint32_t len; - uint32_t flags; - - struct ntb_transport_qp *qp; - struct ntb_payload_header *x_hdr; - ntb_q_idx_t index; -}; - -struct ntb_rx_info { - ntb_q_idx_t entry; -}; - -struct ntb_transport_qp { - struct ntb_transport_ctx *transport; - struct ntb_softc *ntb; - - void *cb_data; - - bool client_ready; - volatile bool link_is_up; - uint8_t qp_num; /* Only 64 QPs are allowed. 0-63 */ - - struct ntb_rx_info *rx_info; - struct ntb_rx_info *remote_rx_info; - - void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - struct ntb_queue_list tx_free_q; - struct mtx ntb_tx_free_q_lock; - caddr_t tx_mw; - bus_addr_t tx_mw_phys; - ntb_q_idx_t tx_index; - ntb_q_idx_t tx_max_entry; - uint64_t tx_max_frame; - - void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - struct ntb_queue_list rx_post_q; - struct ntb_queue_list rx_pend_q; - /* ntb_rx_q_lock: synchronize access to rx_XXXX_q */ - struct mtx ntb_rx_q_lock; - struct task rx_completion_task; - struct task rxc_db_work; - caddr_t rx_buff; - ntb_q_idx_t rx_index; - ntb_q_idx_t rx_max_entry; - uint64_t rx_max_frame; - - void (*event_handler)(void *data, enum ntb_link_event status); - struct callout link_work; - struct callout queue_full; - struct callout rx_full; - - uint64_t last_rx_no_buf; - - /* Stats */ - uint64_t rx_bytes; - uint64_t rx_pkts; - uint64_t rx_ring_empty; - uint64_t rx_err_no_buf; - uint64_t rx_err_oflow; - uint64_t rx_err_ver; - uint64_t tx_bytes; - uint64_t tx_pkts; - uint64_t tx_ring_full; - uint64_t tx_err_no_buf; -}; - -struct ntb_queue_handlers { - void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*event_handler)(void *data, enum ntb_link_event status); -}; - -struct ntb_transport_mw { - vm_paddr_t phys_addr; - size_t phys_size; - size_t xlat_align; - size_t xlat_align_size; - bus_addr_t addr_limit; - /* Tx buff is off vbase / phys_addr */ - caddr_t vbase; - size_t xlat_size; - size_t buff_size; - /* Rx buff is off virt_addr / dma_addr */ - caddr_t virt_addr; - bus_addr_t dma_addr; -}; - -struct ntb_transport_ctx { - struct ntb_softc *ntb; +struct ntb_net_ctx { + device_t *dev; struct ifnet *ifp; - struct ntb_transport_mw mw_vec[NTB_MAX_NUM_MW]; - struct ntb_transport_qp *qp_vec; - struct _qpset qp_bitmap; - struct _qpset qp_bitmap_free; - unsigned mw_count; - unsigned qp_count; - volatile bool link_is_up; - struct callout link_work; - struct callout link_watchdog; - struct task link_cleanup; - uint64_t bufsize; + struct ntb_transport_qp *qp; u_char eaddr[ETHER_ADDR_LEN]; struct mtx tx_lock; - struct mtx rx_lock; - - /* The hardcoded single queuepair in ntb_setup_interface() */ - struct ntb_transport_qp *qp; -}; - -static struct ntb_transport_ctx net_softc; - -enum { - IF_NTB_DESC_DONE_FLAG = 1 << 0, - IF_NTB_LINK_DOWN_FLAG = 1 << 1, -}; - -struct ntb_payload_header { - ntb_q_idx_t ver; - uint32_t len; - uint32_t flags; -}; - -enum { - /* - * The order of this enum is part of the if_ntb remote protocol. Do - * not reorder without bumping protocol version (and it's probably best - * to keep the protocol in lock-step with the Linux NTB driver. - */ - IF_NTB_VERSION = 0, - IF_NTB_QP_LINKS, - IF_NTB_NUM_QPS, - IF_NTB_NUM_MWS, - /* - * N.B.: transport_link_work assumes MW1 enums = MW0 + 2. - */ - IF_NTB_MW0_SZ_HIGH, - IF_NTB_MW0_SZ_LOW, - IF_NTB_MW1_SZ_HIGH, - IF_NTB_MW1_SZ_LOW, - IF_NTB_MAX_SPAD, - - /* - * Some NTB-using hardware have a watchdog to work around NTB hangs; if - * a register or doorbell isn't written every few seconds, the link is - * torn down. Write an otherwise unused register every few seconds to - * work around this watchdog. - */ - IF_NTB_WATCHDOG_SPAD = 15 + struct callout queue_full; }; -CTASSERT(IF_NTB_WATCHDOG_SPAD < XEON_SPAD_COUNT && - IF_NTB_WATCHDOG_SPAD < ATOM_SPAD_COUNT); -#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count) -#define NTB_QP_DEF_NUM_ENTRIES 100 -#define NTB_LINK_DOWN_TIMEOUT 10 - -static int ntb_handle_module_events(struct module *m, int what, void *arg); -static int ntb_setup_interface(void); -static int ntb_teardown_interface(void); +static int ntb_net_probe(device_t dev); +static int ntb_net_attach(device_t dev); +static int ntb_net_detach(device_t dev); static void ntb_net_init(void *arg); static int ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data); static void ntb_start(struct ifnet *ifp); @@ -308,165 +82,72 @@ static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len); static void ntb_net_event_handler(void *data, enum ntb_link_event status); -static int ntb_transport_probe(struct ntb_softc *ntb); -static void ntb_transport_free(struct ntb_transport_ctx *); -static void ntb_transport_init_queue(struct ntb_transport_ctx *nt, - unsigned int qp_num); -static void ntb_transport_free_queue(struct ntb_transport_qp *qp); -static struct ntb_transport_qp *ntb_transport_create_queue(void *data, - struct ntb_softc *pdev, const struct ntb_queue_handlers *handlers); -static void ntb_transport_link_up(struct ntb_transport_qp *qp); -static int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, - void *data, unsigned int len); -static int ntb_process_tx(struct ntb_transport_qp *qp, - struct ntb_queue_entry *entry); -static void ntb_memcpy_tx(struct ntb_transport_qp *qp, - struct ntb_queue_entry *entry, void *offset); static void ntb_qp_full(void *arg); -static void ntb_transport_rxc_db(void *arg, int pending); -static int ntb_process_rxc(struct ntb_transport_qp *qp); -static void ntb_memcpy_rx(struct ntb_transport_qp *qp, - struct ntb_queue_entry *entry, void *offset); -static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp, - void *data); -static void ntb_complete_rxc(void *arg, int pending); -static void ntb_transport_doorbell_callback(void *data, uint32_t vector); -static void ntb_transport_event_callback(void *data); -static void ntb_transport_link_work(void *arg); -static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size); -static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw); -static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, - unsigned int qp_num); -static void ntb_qp_link_work(void *arg); -static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt); -static void ntb_transport_link_cleanup_work(void *, int); -static void ntb_qp_link_down(struct ntb_transport_qp *qp); -static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp); -static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp); -static void ntb_transport_link_down(struct ntb_transport_qp *qp); -static void ntb_send_link_down(struct ntb_transport_qp *qp); -static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry, - struct ntb_queue_list *list); -static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock, - struct ntb_queue_list *list); -static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock, - struct ntb_queue_list *from, struct ntb_queue_list *to); static void create_random_local_eui48(u_char *eaddr); -static unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); -static void xeon_link_watchdog_hb(void *); - -static const struct ntb_ctx_ops ntb_transport_ops = { - .link_event = ntb_transport_event_callback, - .db_event = ntb_transport_doorbell_callback, -}; - -MALLOC_DEFINE(M_NTB_IF, "if_ntb", "ntb network driver"); - -static inline void -iowrite32(uint32_t val, void *addr) -{ - bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr, - val); -} - -/* Module setup and teardown */ static int -ntb_handle_module_events(struct module *m, int what, void *arg) +ntb_net_probe(device_t dev) { - int err = 0; - switch (what) { - case MOD_LOAD: - err = ntb_setup_interface(); - break; - case MOD_UNLOAD: - err = ntb_teardown_interface(); - break; - default: - err = EOPNOTSUPP; - break; - } - return (err); + device_set_desc(dev, "NTB Network Interface"); + return (0); } -static moduledata_t if_ntb_mod = { - "if_ntb", - ntb_handle_module_events, - NULL -}; - -DECLARE_MODULE(if_ntb, if_ntb_mod, SI_SUB_KLD, SI_ORDER_ANY); -MODULE_DEPEND(if_ntb, ntb_hw, 1, 1, 1); - static int -ntb_setup_interface(void) +ntb_net_attach(device_t dev) { + struct ntb_net_ctx *sc = device_get_softc(dev); struct ifnet *ifp; struct ntb_queue_handlers handlers = { ntb_net_rx_handler, ntb_net_tx_handler, ntb_net_event_handler }; - int rc; - - net_softc.ntb = devclass_get_softc(devclass_find("ntb_hw"), 0); - if (net_softc.ntb == NULL) { - printf("ntb: Cannot find devclass\n"); - return (ENXIO); - } - ifp = net_softc.ifp = if_alloc(IFT_ETHER); + ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { - ntb_transport_free(&net_softc); printf("ntb: Cannot allocate ifnet structure\n"); return (ENOMEM); } - if_initname(ifp, "ntb", 0); + if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - rc = ntb_transport_probe(net_softc.ntb); - if (rc != 0) { - printf("ntb: Cannot init transport: %d\n", rc); - if_free(net_softc.ifp); - return (rc); - } + mtx_init(&sc->tx_lock, "ntb tx", NULL, MTX_DEF); + callout_init(&sc->queue_full, 1); - net_softc.qp = ntb_transport_create_queue(ifp, net_softc.ntb, + sc->qp = ntb_transport_create_queue(ifp, device_get_parent(dev), &handlers); ifp->if_init = ntb_net_init; - ifp->if_softc = &net_softc; + ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ntb_ioctl; ifp->if_start = ntb_start; IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; IFQ_SET_READY(&ifp->if_snd); - create_random_local_eui48(net_softc.eaddr); - ether_ifattach(ifp, net_softc.eaddr); + create_random_local_eui48(sc->eaddr); + ether_ifattach(ifp, sc->eaddr); ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_JUMBO_MTU; ifp->if_capenable = ifp->if_capabilities; - ifp->if_mtu = ntb_transport_max_size(net_softc.qp) - ETHER_HDR_LEN - + ifp->if_mtu = ntb_transport_max_size(sc->qp) - ETHER_HDR_LEN - ETHER_CRC_LEN; - ntb_transport_link_up(net_softc.qp); - net_softc.bufsize = ntb_transport_max_size(net_softc.qp) + - sizeof(struct ether_header); + ntb_transport_link_up(sc->qp); return (0); } static int -ntb_teardown_interface(void) +ntb_net_detach(device_t dev) { + struct ntb_net_ctx *sc = device_get_softc(dev); - if (net_softc.qp != NULL) { - ntb_transport_link_down(net_softc.qp); - - ntb_transport_free_queue(net_softc.qp); - ntb_transport_free(&net_softc); + if (sc->qp != NULL) { + ntb_transport_link_down(sc->qp); + ntb_transport_free_queue(sc->qp); } - if (net_softc.ifp != NULL) { - ether_ifdetach(net_softc.ifp); - if_free(net_softc.ifp); - net_softc.ifp = NULL; + if (sc->ifp != NULL) { + ether_ifdetach(sc->ifp); + if_free(sc->ifp); + sc->ifp = NULL; } + mtx_destroy(&sc->tx_lock); return (0); } @@ -476,8 +157,8 @@ ntb_teardown_interface(void) static void ntb_net_init(void *arg) { - struct ntb_transport_ctx *ntb_softc = arg; - struct ifnet *ifp = ntb_softc->ifp; + struct ntb_net_ctx *sc = arg; + struct ifnet *ifp = sc->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; @@ -488,14 +169,14 @@ ntb_net_init(void *arg) static int ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { - struct ntb_transport_ctx *nt = ifp->if_softc; + struct ntb_net_ctx *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int error = 0; switch (command) { case SIOCSIFMTU: { - if (ifr->ifr_mtu > ntb_transport_max_size(nt->qp) - + if (ifr->ifr_mtu > ntb_transport_max_size(sc->qp) - ETHER_HDR_LEN - ETHER_CRC_LEN) { error = EINVAL; break; @@ -517,16 +198,16 @@ static void ntb_start(struct ifnet *ifp) { struct mbuf *m_head; - struct ntb_transport_ctx *nt = ifp->if_softc; + struct ntb_net_ctx *sc = ifp->if_softc; int rc; - mtx_lock(&nt->tx_lock); + mtx_lock(&sc->tx_lock); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; CTR0(KTR_NTB, "TX: ntb_start"); while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); CTR1(KTR_NTB, "TX: start mbuf %p", m_head); - rc = ntb_transport_tx_enqueue(nt->qp, m_head, m_head, + rc = ntb_transport_tx_enqueue(sc->qp, m_head, m_head, m_length(m_head, NULL)); if (rc != 0) { CTR1(KTR_NTB, @@ -535,14 +216,13 @@ ntb_start(struct ifnet *ifp) if (rc == EAGAIN) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); - callout_reset(&nt->qp->queue_full, hz / 1000, + callout_reset(&sc->queue_full, hz / 1000, ntb_qp_full, ifp); } break; } - } - mtx_unlock(&nt->tx_lock); + mtx_unlock(&sc->tx_lock); } /* Network Device Callbacks */ @@ -563,6 +243,7 @@ ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, struct ifnet *ifp = qp_data; CTR0(KTR_NTB, "RX: rx handler"); + m->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID; (*ifp->if_input)(ifp, m); } @@ -586,444 +267,6 @@ ntb_net_event_handler(void *data, enum ntb_link_event status) } } -/* Transport Init and teardown */ - -static void -xeon_link_watchdog_hb(void *arg) -{ - struct ntb_transport_ctx *nt; - - nt = arg; - ntb_spad_write(nt->ntb, IF_NTB_WATCHDOG_SPAD, 0); - callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt); -} - -static int -ntb_transport_probe(struct ntb_softc *ntb) -{ - struct ntb_transport_ctx *nt = &net_softc; - struct ntb_transport_mw *mw; - uint64_t qp_bitmap; - int rc; - unsigned i; - - nt->mw_count = ntb_mw_count(ntb); - for (i = 0; i < nt->mw_count; i++) { - mw = &nt->mw_vec[i]; - - rc = ntb_mw_get_range(ntb, i, &mw->phys_addr, &mw->vbase, - &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size, - &mw->addr_limit); - if (rc != 0) - goto err; - - mw->buff_size = 0; - mw->xlat_size = 0; - mw->virt_addr = NULL; - mw->dma_addr = 0; - - rc = ntb_mw_set_wc(nt->ntb, i, VM_MEMATTR_WRITE_COMBINING); - if (rc) - ntb_printf(0, "Unable to set mw%d caching\n", i); - } - - qp_bitmap = ntb_db_valid_mask(ntb); - nt->qp_count = flsll(qp_bitmap); - KASSERT(nt->qp_count != 0, ("bogus db bitmap")); - nt->qp_count -= 1; - - if (max_num_clients != 0 && max_num_clients < nt->qp_count) - nt->qp_count = max_num_clients; - else if (nt->mw_count < nt->qp_count) - nt->qp_count = nt->mw_count; - KASSERT(nt->qp_count <= QP_SETSIZE, ("invalid qp_count")); - - mtx_init(&nt->tx_lock, "ntb transport tx", NULL, MTX_DEF); - mtx_init(&nt->rx_lock, "ntb transport rx", NULL, MTX_DEF); - - nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_IF, - M_WAITOK | M_ZERO); - - for (i = 0; i < nt->qp_count; i++) { - set_bit(i, &nt->qp_bitmap); - set_bit(i, &nt->qp_bitmap_free); - ntb_transport_init_queue(nt, i); - } - - callout_init(&nt->link_work, 0); - callout_init(&nt->link_watchdog, 0); - TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt); - - rc = ntb_set_ctx(ntb, nt, &ntb_transport_ops); - if (rc != 0) - goto err; - - nt->link_is_up = false; - ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); - ntb_link_event(ntb); - - callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt); - if (enable_xeon_watchdog != 0) - callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt); - return (0); - -err: - free(nt->qp_vec, M_NTB_IF); - nt->qp_vec = NULL; - return (rc); -} - -static void -ntb_transport_free(struct ntb_transport_ctx *nt) -{ - struct ntb_softc *ntb = nt->ntb; - struct _qpset qp_bitmap_alloc; - uint8_t i; - - ntb_transport_link_cleanup(nt); - taskqueue_drain(taskqueue_swi, &nt->link_cleanup); - callout_drain(&nt->link_work); - callout_drain(&nt->link_watchdog); - - BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc); - BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free); - - /* Verify that all the QPs are freed */ - for (i = 0; i < nt->qp_count; i++) - if (test_bit(i, &qp_bitmap_alloc)) - ntb_transport_free_queue(&nt->qp_vec[i]); - - ntb_link_disable(ntb); - ntb_clear_ctx(ntb); - - for (i = 0; i < nt->mw_count; i++) - ntb_free_mw(nt, i); - - free(nt->qp_vec, M_NTB_IF); -} - -static void -ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num) -{ - struct ntb_transport_mw *mw; - struct ntb_transport_qp *qp; - vm_paddr_t mw_base; - uint64_t mw_size, qp_offset; - size_t tx_size; - unsigned num_qps_mw, mw_num, mw_count; - - mw_count = nt->mw_count; - mw_num = QP_TO_MW(nt, qp_num); - mw = &nt->mw_vec[mw_num]; - - qp = &nt->qp_vec[qp_num]; - qp->qp_num = qp_num; - qp->transport = nt; - qp->ntb = nt->ntb; - qp->client_ready = false; - qp->event_handler = NULL; - ntb_qp_link_down_reset(qp); - - if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count) - num_qps_mw = nt->qp_count / mw_count + 1; - else - num_qps_mw = nt->qp_count / mw_count; - - mw_base = mw->phys_addr; - mw_size = mw->phys_size; - - tx_size = mw_size / num_qps_mw; - qp_offset = tx_size * (qp_num / mw_count); - - qp->tx_mw = mw->vbase + qp_offset; - KASSERT(qp->tx_mw != NULL, ("uh oh?")); - - /* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */ - qp->tx_mw_phys = mw_base + qp_offset; - KASSERT(qp->tx_mw_phys != 0, ("uh oh?")); - - tx_size -= sizeof(struct ntb_rx_info); - qp->rx_info = (void *)(qp->tx_mw + tx_size); - - /* Due to house-keeping, there must be at least 2 buffs */ - qp->tx_max_frame = qmin(tx_size / 2, - transport_mtu + sizeof(struct ntb_payload_header)); - qp->tx_max_entry = tx_size / qp->tx_max_frame; - - callout_init(&qp->link_work, 0); - callout_init(&qp->queue_full, CALLOUT_MPSAFE); - callout_init(&qp->rx_full, CALLOUT_MPSAFE); - - mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN); - mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN); - TASK_INIT(&qp->rx_completion_task, 0, ntb_complete_rxc, qp); - TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp); - - STAILQ_INIT(&qp->rx_post_q); - STAILQ_INIT(&qp->rx_pend_q); - STAILQ_INIT(&qp->tx_free_q); - - callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); -} - -static void -ntb_transport_free_queue(struct ntb_transport_qp *qp) -{ - struct ntb_queue_entry *entry; - - if (qp == NULL) - return; - - callout_drain(&qp->link_work); - - ntb_db_set_mask(qp->ntb, 1ull << qp->qp_num); - taskqueue_drain(taskqueue_swi, &qp->rxc_db_work); - taskqueue_drain(taskqueue_swi, &qp->rx_completion_task); - - qp->cb_data = NULL; - qp->rx_handler = NULL; - qp->tx_handler = NULL; - qp->event_handler = NULL; - - while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q))) - free(entry, M_NTB_IF); - - while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q))) - free(entry, M_NTB_IF); - - while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) - free(entry, M_NTB_IF); - - set_bit(qp->qp_num, &qp->transport->qp_bitmap_free); -} - -/** - * ntb_transport_create_queue - Create a new NTB transport layer queue - * @rx_handler: receive callback function - * @tx_handler: transmit callback function - * @event_handler: event callback function - * - * Create a new NTB transport layer queue and provide the queue with a callback - * routine for both transmit and receive. The receive callback routine will be - * used to pass up data when the transport has received it on the queue. The - * transmit callback routine will be called when the transport has completed the - * transmission of the data on the queue and the data is ready to be freed. - * - * RETURNS: pointer to newly created ntb_queue, NULL on error. - */ -static struct ntb_transport_qp * -ntb_transport_create_queue(void *data, struct ntb_softc *ntb, - const struct ntb_queue_handlers *handlers) -{ - struct ntb_queue_entry *entry; - struct ntb_transport_qp *qp; - struct ntb_transport_ctx *nt; - unsigned int free_queue; - int i; - - nt = ntb_get_ctx(ntb, NULL); - KASSERT(nt != NULL, ("bogus")); - - free_queue = ffs_bit(&nt->qp_bitmap); - if (free_queue == 0) - return (NULL); - - /* decrement free_queue to make it zero based */ - free_queue--; - - qp = &nt->qp_vec[free_queue]; - clear_bit(qp->qp_num, &nt->qp_bitmap_free); - qp->cb_data = data; - qp->rx_handler = handlers->rx_handler; - qp->tx_handler = handlers->tx_handler; - qp->event_handler = handlers->event_handler; - - for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { - entry = malloc(sizeof(*entry), M_NTB_IF, M_WAITOK | M_ZERO); - entry->cb_data = nt->ifp; - entry->buf = NULL; - entry->len = transport_mtu; - ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q); - } - - for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { - entry = malloc(sizeof(*entry), M_NTB_IF, M_WAITOK | M_ZERO); - ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); - } - - ntb_db_clear(ntb, 1ull << qp->qp_num); - ntb_db_clear_mask(ntb, 1ull << qp->qp_num); - return (qp); -} - -/** - * ntb_transport_link_up - Notify NTB transport of client readiness to use queue - * @qp: NTB transport layer queue to be enabled - * - * Notify NTB transport layer of client readiness to use queue - */ -static void -ntb_transport_link_up(struct ntb_transport_qp *qp) -{ - struct ntb_transport_ctx *nt; - - if (qp == NULL) - return; - - qp->client_ready = true; - - nt = qp->transport; - ntb_printf(2, "qp client ready\n"); - - if (qp->transport->link_is_up) - callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); -} - - - -/* Transport Tx */ - -/** - * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry - * @qp: NTB transport layer queue the entry is to be enqueued on - * @cb: per buffer pointer for callback function to use - * @data: pointer to data buffer that will be sent - * @len: length of the data buffer - * - * Enqueue a new transmit buffer onto the transport queue from which a NTB - * payload will be transmitted. This assumes that a lock is being held to - * serialize access to the qp. - * - * RETURNS: An appropriate ERRNO error value on error, or zero for success. - */ -static int -ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, - unsigned int len) -{ - struct ntb_queue_entry *entry; - int rc; - - if (qp == NULL || !qp->link_is_up || len == 0) { - CTR0(KTR_NTB, "TX: link not up"); - return (EINVAL); - } - - entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); - if (entry == NULL) { - CTR0(KTR_NTB, "TX: could not get entry from tx_free_q"); - qp->tx_err_no_buf++; - return (EBUSY); - } - CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry); - - entry->cb_data = cb; - entry->buf = data; - entry->len = len; - entry->flags = 0; - - rc = ntb_process_tx(qp, entry); - if (rc != 0) { - ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); - CTR1(KTR_NTB, - "TX: process_tx failed. Returning entry %p to tx_free_q", - entry); - } - return (rc); -} - -static int -ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry) -{ - void *offset; - - offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index; - CTR3(KTR_NTB, - "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u", - qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry); - if (qp->tx_index == qp->remote_rx_info->entry) { - CTR0(KTR_NTB, "TX: ring full"); - qp->tx_ring_full++; - return (EAGAIN); - } - - if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) { - if (qp->tx_handler != NULL) - qp->tx_handler(qp, qp->cb_data, entry->buf, - EIO); - else - m_freem(entry->buf); - - entry->buf = NULL; - ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); - CTR1(KTR_NTB, - "TX: frame too big. returning entry %p to tx_free_q", - entry); - return (0); - } - CTR2(KTR_NTB, "TX: copying entry %p to offset %p", entry, offset); - ntb_memcpy_tx(qp, entry, offset); - - qp->tx_index++; - qp->tx_index %= qp->tx_max_entry; - - qp->tx_pkts++; - - return (0); -} - -static void -ntb_memcpy_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry, - void *offset) -{ - struct ntb_payload_header *hdr; - - /* This piece is from Linux' ntb_async_tx() */ - hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame - - sizeof(struct ntb_payload_header)); - entry->x_hdr = hdr; - iowrite32(entry->len, &hdr->len); - iowrite32(qp->tx_pkts, &hdr->ver); - - /* This piece is ntb_memcpy_tx() */ - CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset); - if (entry->buf != NULL) { - m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset); - - /* - * Ensure that the data is fully copied before setting the - * flags - */ - wmb(); - } - - /* The rest is ntb_tx_copy_callback() */ - iowrite32(entry->flags | IF_NTB_DESC_DONE_FLAG, &hdr->flags); - CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr); - - ntb_peer_db_set(qp->ntb, 1ull << qp->qp_num); - - /* - * The entry length can only be zero if the packet is intended to be a - * "link down" or similar. Since no payload is being sent in these - * cases, there is nothing to add to the completion queue. - */ - if (entry->len > 0) { - qp->tx_bytes += entry->len; - - if (qp->tx_handler) - qp->tx_handler(qp, qp->cb_data, entry->buf, - entry->len); - else - m_freem(entry->buf); - entry->buf = NULL; - } - - CTR3(KTR_NTB, - "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning " - "to tx_free_q", entry, hdr->ver, hdr->flags); - ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); -} - static void ntb_qp_full(void *arg) { @@ -1032,659 +275,6 @@ ntb_qp_full(void *arg) ntb_start(arg); } -/* Transport Rx */ -static void -ntb_transport_rxc_db(void *arg, int pending __unused) -{ - struct ntb_transport_qp *qp = arg; - ntb_q_idx_t i; - int rc; - - /* - * Limit the number of packets processed in a single interrupt to - * provide fairness to others - */ - CTR0(KTR_NTB, "RX: transport_rx"); - mtx_lock(&qp->transport->rx_lock); - for (i = 0; i < qp->rx_max_entry; i++) { - rc = ntb_process_rxc(qp); - if (rc != 0) { - CTR0(KTR_NTB, "RX: process_rxc failed"); - break; - } - } - mtx_unlock(&qp->transport->rx_lock); - - if (i == qp->rx_max_entry) - taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work); - else if ((ntb_db_read(qp->ntb) & (1ull << qp->qp_num)) != 0) { - /* If db is set, clear it and read it back to commit clear. */ - ntb_db_clear(qp->ntb, 1ull << qp->qp_num); - (void)ntb_db_read(qp->ntb); - - /* - * An interrupt may have arrived between finishing - * ntb_process_rxc and clearing the doorbell bit: there might - * be some more work to do. - */ - taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work); - } -} - -static int -ntb_process_rxc(struct ntb_transport_qp *qp) -{ - struct ntb_payload_header *hdr; - struct ntb_queue_entry *entry; - caddr_t offset; - - offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index; - hdr = (void *)(offset + qp->rx_max_frame - - sizeof(struct ntb_payload_header)); - - CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index); - if ((hdr->flags & IF_NTB_DESC_DONE_FLAG) == 0) { - CTR0(KTR_NTB, "RX: hdr not done"); - qp->rx_ring_empty++; - return (EAGAIN); - } - - if ((hdr->flags & IF_NTB_LINK_DOWN_FLAG) != 0) { - CTR0(KTR_NTB, "RX: link down"); - ntb_qp_link_down(qp); - hdr->flags = 0; - return (EAGAIN); - } - - if (hdr->ver != (uint32_t)qp->rx_pkts) { - CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). " - "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts); - qp->rx_err_ver++; - return (EIO); - } - - entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q); - if (entry == NULL) { - qp->rx_err_no_buf++; - CTR0(KTR_NTB, "RX: No entries in rx_pend_q"); - return (EAGAIN); - } - callout_stop(&qp->rx_full); - CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry); - - entry->x_hdr = hdr; - entry->index = qp->rx_index; - - if (hdr->len > entry->len) { - CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju", - (uintmax_t)hdr->len, (uintmax_t)entry->len); - qp->rx_err_oflow++; - - entry->len = -EIO; - entry->flags |= IF_NTB_DESC_DONE_FLAG; - - taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task); - } else { - qp->rx_bytes += hdr->len; - qp->rx_pkts++; - - CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts); - - entry->len = hdr->len; - - ntb_memcpy_rx(qp, entry, offset); - } - - qp->rx_index++; - qp->rx_index %= qp->rx_max_entry; - return (0); -} - -static void -ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry, - void *offset) -{ - struct ifnet *ifp = entry->cb_data; - unsigned int len = entry->len; - struct mbuf *m; - - CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset); - m = m_devget(offset, len, 0, ifp, NULL); - m->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID; - - entry->buf = (void *)m; - - /* Ensure that the data is globally visible before clearing the flag */ - wmb(); - - CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, m); - ntb_rx_copy_callback(qp, entry); -} - -static inline void -ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data) -{ - struct ntb_queue_entry *entry; - - entry = data; - entry->flags |= IF_NTB_DESC_DONE_FLAG; - taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task); -} - -static void -ntb_complete_rxc(void *arg, int pending) -{ - struct ntb_transport_qp *qp = arg; - struct ntb_queue_entry *entry; - struct mbuf *m; - unsigned len; - - CTR0(KTR_NTB, "RX: rx_completion_task"); - - mtx_lock_spin(&qp->ntb_rx_q_lock); - - while (!STAILQ_EMPTY(&qp->rx_post_q)) { - entry = STAILQ_FIRST(&qp->rx_post_q); - if ((entry->flags & IF_NTB_DESC_DONE_FLAG) == 0) - break; - - entry->x_hdr->flags = 0; - iowrite32(entry->index, &qp->rx_info->entry); - - STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry); - - len = entry->len; - m = entry->buf; - - /* - * Re-initialize queue_entry for reuse; rx_handler takes - * ownership of the mbuf. - */ - entry->buf = NULL; - entry->len = transport_mtu; - entry->cb_data = qp->transport->ifp; - - STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry); - - mtx_unlock_spin(&qp->ntb_rx_q_lock); - - CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m); - if (qp->rx_handler != NULL && qp->client_ready) - qp->rx_handler(qp, qp->cb_data, m, len); - else - m_freem(m); - - mtx_lock_spin(&qp->ntb_rx_q_lock); - } - - mtx_unlock_spin(&qp->ntb_rx_q_lock); -} - -static void -ntb_transport_doorbell_callback(void *data, uint32_t vector) -{ - struct ntb_transport_ctx *nt = data; - struct ntb_transport_qp *qp; - struct _qpset db_bits; - uint64_t vec_mask; - unsigned qp_num; - - BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &db_bits); - BIT_NAND(QP_SETSIZE, &db_bits, &nt->qp_bitmap_free); - - vec_mask = ntb_db_vector_mask(nt->ntb, vector); - while (vec_mask != 0) { - qp_num = ffsll(vec_mask) - 1; - - if (test_bit(qp_num, &db_bits)) { - qp = &nt->qp_vec[qp_num]; - taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work); - } - - vec_mask &= ~(1ull << qp_num); - } -} - -/* Link Event handler */ -static void -ntb_transport_event_callback(void *data) -{ - struct ntb_transport_ctx *nt = data; - - if (ntb_link_is_up(nt->ntb, NULL, NULL)) { - ntb_printf(1, "HW link up\n"); - callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt); - } else { - ntb_printf(1, "HW link down\n"); - taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup); - } -} - -/* Link bring up */ -static void -ntb_transport_link_work(void *arg) -{ - struct ntb_transport_ctx *nt = arg; - struct ntb_softc *ntb = nt->ntb; - struct ntb_transport_qp *qp; - uint64_t val64, size; - uint32_t val; - unsigned i; - int rc; - - /* send the local info, in the opposite order of the way we read it */ - for (i = 0; i < nt->mw_count; i++) { - size = nt->mw_vec[i].phys_size; - - if (max_mw_size != 0 && size > max_mw_size) - size = max_mw_size; - - ntb_peer_spad_write(ntb, IF_NTB_MW0_SZ_HIGH + (i * 2), - size >> 32); - ntb_peer_spad_write(ntb, IF_NTB_MW0_SZ_LOW + (i * 2), size); - } - - ntb_peer_spad_write(ntb, IF_NTB_NUM_MWS, nt->mw_count); - - ntb_peer_spad_write(ntb, IF_NTB_NUM_QPS, nt->qp_count); - - ntb_peer_spad_write(ntb, IF_NTB_VERSION, NTB_TRANSPORT_VERSION); - - /* Query the remote side for its info */ - val = 0; - ntb_spad_read(ntb, IF_NTB_VERSION, &val); - if (val != NTB_TRANSPORT_VERSION) - goto out; - - ntb_spad_read(ntb, IF_NTB_NUM_QPS, &val); - if (val != nt->qp_count) - goto out; - - ntb_spad_read(ntb, IF_NTB_NUM_MWS, &val); - if (val != nt->mw_count) - goto out; - - for (i = 0; i < nt->mw_count; i++) { - ntb_spad_read(ntb, IF_NTB_MW0_SZ_HIGH + (i * 2), &val); - val64 = (uint64_t)val << 32; - - ntb_spad_read(ntb, IF_NTB_MW0_SZ_LOW + (i * 2), &val); - val64 |= val; - - rc = ntb_set_mw(nt, i, val64); - if (rc != 0) - goto free_mws; - } - - nt->link_is_up = true; - ntb_printf(1, "transport link up\n"); - - for (i = 0; i < nt->qp_count; i++) { - qp = &nt->qp_vec[i]; - - ntb_transport_setup_qp_mw(nt, i); - - if (qp->client_ready) - callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); - } - - return; - -free_mws: - for (i = 0; i < nt->mw_count; i++) - ntb_free_mw(nt, i); -out: - if (ntb_link_is_up(ntb, NULL, NULL)) - callout_reset(&nt->link_work, - NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt); -} - -static int -ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size) -{ - struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; - size_t xlat_size, buff_size; - int rc; - - if (size == 0) - return (EINVAL); - - xlat_size = roundup(size, mw->xlat_align_size); - buff_size = xlat_size; - - /* No need to re-setup */ - if (mw->xlat_size == xlat_size) - return (0); - - if (mw->buff_size != 0) - ntb_free_mw(nt, num_mw); - - /* Alloc memory for receiving data. Must be aligned */ - mw->xlat_size = xlat_size; - mw->buff_size = buff_size; - - mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_IF, M_ZERO, 0, - mw->addr_limit, mw->xlat_align, 0); - if (mw->virt_addr == NULL) { - ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n", - mw->buff_size, mw->xlat_size); - mw->xlat_size = 0; - mw->buff_size = 0; - return (ENOMEM); - } - /* TODO: replace with bus_space_* functions */ - mw->dma_addr = vtophys(mw->virt_addr); - - /* - * Ensure that the allocation from contigmalloc is aligned as - * requested. XXX: This may not be needed -- brought in for parity - * with the Linux driver. - */ - if (mw->dma_addr % mw->xlat_align != 0) { - ntb_printf(0, - "DMA memory 0x%jx not aligned to BAR size 0x%zx\n", - (uintmax_t)mw->dma_addr, size); - ntb_free_mw(nt, num_mw); - return (ENOMEM); - } - - /* Notify HW the memory location of the receive buffer */ - rc = ntb_mw_set_trans(nt->ntb, num_mw, mw->dma_addr, mw->xlat_size); - if (rc) { - ntb_printf(0, "Unable to set mw%d translation\n", num_mw); - ntb_free_mw(nt, num_mw); - return (rc); - } - - return (0); -} - -static void -ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) -{ - struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; - - if (mw->virt_addr == NULL) - return; - - ntb_mw_clear_trans(nt->ntb, num_mw); - contigfree(mw->virt_addr, mw->xlat_size, M_NTB_IF); - mw->xlat_size = 0; - mw->buff_size = 0; - mw->virt_addr = NULL; -} - -static int -ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num) -{ - struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; - struct ntb_transport_mw *mw; - void *offset; - ntb_q_idx_t i; - size_t rx_size; - unsigned num_qps_mw, mw_num, mw_count; - - mw_count = nt->mw_count; - mw_num = QP_TO_MW(nt, qp_num); - mw = &nt->mw_vec[mw_num]; - - if (mw->virt_addr == NULL) - return (ENOMEM); - - if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count) - num_qps_mw = nt->qp_count / mw_count + 1; - else - num_qps_mw = nt->qp_count / mw_count; - - rx_size = mw->xlat_size / num_qps_mw; - qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count); - rx_size -= sizeof(struct ntb_rx_info); - - qp->remote_rx_info = (void*)(qp->rx_buff + rx_size); - - /* Due to house-keeping, there must be at least 2 buffs */ - qp->rx_max_frame = qmin(rx_size / 2, - transport_mtu + sizeof(struct ntb_payload_header)); - qp->rx_max_entry = rx_size / qp->rx_max_frame; - qp->rx_index = 0; - - qp->remote_rx_info->entry = qp->rx_max_entry - 1; - - /* Set up the hdr offsets with 0s */ - for (i = 0; i < qp->rx_max_entry; i++) { - offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) - - sizeof(struct ntb_payload_header)); - memset(offset, 0, sizeof(struct ntb_payload_header)); - } - - qp->rx_pkts = 0; - qp->tx_pkts = 0; - qp->tx_index = 0; - - return (0); -} - -static void -ntb_qp_link_work(void *arg) -{ - struct ntb_transport_qp *qp = arg; - struct ntb_softc *ntb = qp->ntb; - struct ntb_transport_ctx *nt = qp->transport; - uint32_t val, dummy; - - ntb_spad_read(ntb, IF_NTB_QP_LINKS, &val); - - ntb_peer_spad_write(ntb, IF_NTB_QP_LINKS, val | (1ull << qp->qp_num)); - - /* query remote spad for qp ready bits */ - ntb_peer_spad_read(ntb, IF_NTB_QP_LINKS, &dummy); - - /* See if the remote side is up */ - if ((val & (1ull << qp->qp_num)) != 0) { - ntb_printf(2, "qp link up\n"); - qp->link_is_up = true; - - if (qp->event_handler != NULL) - qp->event_handler(qp->cb_data, NTB_LINK_UP); - - taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work); - } else if (nt->link_is_up) - callout_reset(&qp->link_work, - NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp); -} - -/* Link down event*/ -static void -ntb_transport_link_cleanup(struct ntb_transport_ctx *nt) -{ - struct ntb_transport_qp *qp; - struct _qpset qp_bitmap_alloc; - unsigned i; - - BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc); - BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free); - - /* Pass along the info to any clients */ - for (i = 0; i < nt->qp_count; i++) - if (test_bit(i, &qp_bitmap_alloc)) { - qp = &nt->qp_vec[i]; - ntb_qp_link_cleanup(qp); - callout_drain(&qp->link_work); - } - - if (!nt->link_is_up) - callout_drain(&nt->link_work); - - /* - * The scratchpad registers keep the values if the remote side - * goes down, blast them now to give them a sane value the next - * time they are accessed - */ - for (i = 0; i < IF_NTB_MAX_SPAD; i++) - ntb_spad_write(nt->ntb, i, 0); -} - -static void -ntb_transport_link_cleanup_work(void *arg, int pending __unused) -{ - - ntb_transport_link_cleanup(arg); -} - -static void -ntb_qp_link_down(struct ntb_transport_qp *qp) -{ - - ntb_qp_link_cleanup(qp); -} - -static void -ntb_qp_link_down_reset(struct ntb_transport_qp *qp) -{ - - qp->link_is_up = false; - - qp->tx_index = qp->rx_index = 0; - qp->tx_bytes = qp->rx_bytes = 0; - qp->tx_pkts = qp->rx_pkts = 0; - - qp->rx_ring_empty = 0; - qp->tx_ring_full = 0; - - qp->rx_err_no_buf = qp->tx_err_no_buf = 0; - qp->rx_err_oflow = qp->rx_err_ver = 0; -} - -static void -ntb_qp_link_cleanup(struct ntb_transport_qp *qp) -{ - struct ntb_transport_ctx *nt = qp->transport; - - callout_drain(&qp->link_work); - ntb_qp_link_down_reset(qp); - - if (qp->event_handler != NULL) - qp->event_handler(qp->cb_data, NTB_LINK_DOWN); - - if (nt->link_is_up) - callout_reset(&qp->link_work, - NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp); -} - -/* Link commanded down */ -/** - * ntb_transport_link_down - Notify NTB transport to no longer enqueue data - * @qp: NTB transport layer queue to be disabled - * - * Notify NTB transport layer of client's desire to no longer receive data on - * transport queue specified. It is the client's responsibility to ensure all - * entries on queue are purged or otherwise handled appropriately. - */ -static void -ntb_transport_link_down(struct ntb_transport_qp *qp) -{ - uint32_t val; - - if (qp == NULL) - return; - - qp->client_ready = false; - - ntb_spad_read(qp->ntb, IF_NTB_QP_LINKS, &val); - - ntb_peer_spad_write(qp->ntb, IF_NTB_QP_LINKS, - val & ~(1 << qp->qp_num)); - - if (qp->link_is_up) - ntb_send_link_down(qp); - else - callout_drain(&qp->link_work); -} - -static void -ntb_send_link_down(struct ntb_transport_qp *qp) -{ - struct ntb_queue_entry *entry; - int i, rc; - - if (!qp->link_is_up) - return; - - for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) { - entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); - if (entry != NULL) - break; - pause("NTB Wait for link down", hz / 10); - } - - if (entry == NULL) - return; - - entry->cb_data = NULL; - entry->buf = NULL; - entry->len = 0; - entry->flags = IF_NTB_LINK_DOWN_FLAG; - - mtx_lock(&qp->transport->tx_lock); - rc = ntb_process_tx(qp, entry); - if (rc != 0) - printf("ntb: Failed to send link down\n"); - mtx_unlock(&qp->transport->tx_lock); - - ntb_qp_link_down_reset(qp); -} - - -/* List Management */ - -static void -ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry, - struct ntb_queue_list *list) -{ - - mtx_lock_spin(lock); - STAILQ_INSERT_TAIL(list, entry, entry); - mtx_unlock_spin(lock); -} - -static struct ntb_queue_entry * -ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list) -{ - struct ntb_queue_entry *entry; - - mtx_lock_spin(lock); - if (STAILQ_EMPTY(list)) { - entry = NULL; - goto out; - } - entry = STAILQ_FIRST(list); - STAILQ_REMOVE_HEAD(list, entry); -out: - mtx_unlock_spin(lock); - - return (entry); -} - -static struct ntb_queue_entry * -ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from, - struct ntb_queue_list *to) -{ - struct ntb_queue_entry *entry; - - mtx_lock_spin(lock); - if (STAILQ_EMPTY(from)) { - entry = NULL; - goto out; - } - entry = STAILQ_FIRST(from); - STAILQ_REMOVE_HEAD(from, entry); - STAILQ_INSERT_TAIL(to, entry, entry); - -out: - mtx_unlock_spin(lock); - return (entry); -} - /* Helper functions */ /* TODO: This too should really be part of the kernel */ #define EUI48_MULTICAST 1 << 0 @@ -1700,20 +290,18 @@ create_random_local_eui48(u_char *eaddr) eaddr[5] = counter++; } -/** - * ntb_transport_max_size - Query the max payload size of a qp - * @qp: NTB transport layer queue to be queried - * - * Query the maximum payload size permissible on the given qp - * - * RETURNS: the max payload size of a qp - */ -static unsigned int -ntb_transport_max_size(struct ntb_transport_qp *qp) -{ - - if (qp == NULL) - return (0); +static device_method_t ntb_net_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ntb_net_probe), + DEVMETHOD(device_attach, ntb_net_attach), + DEVMETHOD(device_detach, ntb_net_detach), + DEVMETHOD_END +}; - return (qp->tx_max_frame - sizeof(struct ntb_payload_header)); -} +devclass_t ntb_net_devclass; +static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods, + sizeof(struct ntb_net_ctx)); +DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass, + NULL, NULL); +MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1); +MODULE_VERSION(if_ntb, 1); diff --git a/sys/dev/ntb/ntb.c b/sys/dev/ntb/ntb.c new file mode 100644 index 000000000..9af16ce49 --- /dev/null +++ b/sys/dev/ntb/ntb.c @@ -0,0 +1,43 @@ +/*- + * Copyright (c) 2016 Alexander Motin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include + +#include "ntb.h" + +devclass_t ntb_hw_devclass; +SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls"); + +MODULE_VERSION(ntb, 1); diff --git a/sys/dev/ntb/ntb.h b/sys/dev/ntb/ntb.h new file mode 100644 index 000000000..5218cbd7b --- /dev/null +++ b/sys/dev/ntb/ntb.h @@ -0,0 +1,37 @@ +/*- + * Copyright (c) 2016 Alexander Motin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NTB_H_ +#define _NTB_H_ + +#include "ntb_if.h" + +extern devclass_t ntb_hw_devclass; +SYSCTL_DECL(_hw_ntb); + +#endif /* _NTB_H_ */ diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.c b/sys/dev/ntb/ntb_hw/ntb_hw.c index 30dc5af92..846fb9865 100644 --- a/sys/dev/ntb/ntb_hw/ntb_hw.c +++ b/sys/dev/ntb/ntb_hw/ntb_hw.c @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2016 Alexander Motin * Copyright (C) 2013 Intel Corporation * Copyright (C) 2015 EMC Corporation * All rights reserved. @@ -25,6 +26,16 @@ * SUCH DAMAGE. */ +/* + * The Non-Transparent Bridge (NTB) is a device that allows you to connect + * two or more systems using a PCI-e links, providing remote memory access. + * + * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs. + * + * NOTE: Much of the code in this module is shared with Linux. Any patches may + * be picked up and redistributed in Linux with a dual GPL/BSD license. + */ + #include __FBSDID("$FreeBSD$"); @@ -51,19 +62,7 @@ __FBSDID("$FreeBSD$"); #include #include "ntb_regs.h" -#include "ntb_hw.h" - -/* - * The Non-Transparent Bridge (NTB) is a device on some Intel processors that - * allows you to connect two systems using a PCI-e link. - * - * This module contains the hardware abstraction layer for the NTB. It allows - * you to send and recieve interrupts, map the memory windows and send and - * receive messages in the scratch-pad registers. - * - * NOTE: Much of the code in this module is shared with Linux. Any patches may - * be picked up and redistributed in Linux with a dual GPL/BSD license. - */ +#include "../ntb.h" #define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT) @@ -71,8 +70,6 @@ __FBSDID("$FreeBSD$"); #define ATOM_LINK_RECOVERY_TIME 500 /* ms */ #define BAR_HIGH_MASK (~((1ull << 12) - 1)) -#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev)) - #define NTB_MSIX_VER_GUARD 0xaabbccdd #define NTB_MSIX_RECEIVED 0xe0f0e0f0 @@ -123,8 +120,8 @@ enum { }; /* Device features and workarounds */ -#define HAS_FEATURE(feature) \ - ((ntb->features & (feature)) != 0) +#define HAS_FEATURE(ntb, feature) \ + (((ntb)->features & (feature)) != 0) struct ntb_hw_info { uint32_t device_id; @@ -306,6 +303,17 @@ bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle, static int ntb_probe(device_t device); static int ntb_attach(device_t device); static int ntb_detach(device_t device); +static uint64_t ntb_db_valid_mask(device_t dev); +static void ntb_spad_clear(device_t dev); +static uint64_t ntb_db_vector_mask(device_t dev, uint32_t vector); +static bool ntb_link_is_up(device_t dev, enum ntb_speed *speed, + enum ntb_width *width); +static int ntb_link_enable(device_t dev, enum ntb_speed speed, + enum ntb_width width); +static int ntb_link_disable(device_t dev); +static int ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val); +static int ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val); + static unsigned ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx); static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw); static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar); @@ -359,7 +367,8 @@ static inline bool atom_link_is_err(struct ntb_softc *ntb); static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *); static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *); static void atom_link_hb(void *arg); -static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec); +static void ntb_link_event(device_t dev); +static void ntb_db_event(device_t dev, uint32_t vec); static void recover_atom_link(void *arg); static bool ntb_poll_link(struct ntb_softc *ntb); static void save_bar_parameters(struct ntb_pci_bar_info *bar); @@ -446,6 +455,7 @@ ntb_vm_memattr_to_str(vm_memattr_t pat) } static int g_ntb_msix_idx = 0; +TUNABLE_INT("hw.ntb.msix_mw_idx", &g_ntb_msix_idx); SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx, 0, "Use this memory window to access the peer MSIX message complex on " "certain Xeon-based NTB systems, as a workaround for a hardware errata. " @@ -461,6 +471,18 @@ SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx, "available memory window. Both sides of the NTB MUST set the same " "value here! (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)"); +/* Hardware owns the low 16 bits of features. */ +#define NTB_BAR_SIZE_4K (1 << 0) +#define NTB_SDOORBELL_LOCKUP (1 << 1) +#define NTB_SB01BASE_LOCKUP (1 << 2) +#define NTB_B2BDOORBELL_BIT14 (1 << 3) +/* Software/configuration owns the top 16 bits. */ +#define NTB_SPLIT_BAR (1ull << 16) + +#define NTB_FEATURES_STR \ + "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \ + "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K" + static struct ntb_hw_info pci_ids[] = { /* XXX: PS/SS IDs left out until they are supported. */ { 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B", @@ -609,24 +631,6 @@ SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN, */ MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations"); -static device_method_t ntb_pci_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, ntb_probe), - DEVMETHOD(device_attach, ntb_attach), - DEVMETHOD(device_detach, ntb_detach), - DEVMETHOD_END -}; - -static driver_t ntb_pci_driver = { - "ntb_hw", - ntb_pci_methods, - sizeof(struct ntb_softc), -}; - -static devclass_t ntb_devclass; -DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL); -MODULE_VERSION(ntb_hw, 1); - SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls"); /* @@ -652,7 +656,7 @@ ntb_attach(device_t device) struct ntb_hw_info *p; int error; - ntb = DEVICE2SOFTC(device); + ntb = device_get_softc(device); p = ntb_get_device_info(pci_get_devid(device)); ntb->device = device; @@ -689,12 +693,16 @@ ntb_attach(device_t device) if (error != 0) goto out; - ntb_spad_clear(ntb); + ntb_spad_clear(device); ntb_poll_link(ntb); ntb_sysctl_init(ntb); + /* Attach children to this controller */ + device_add_child(device, NULL, -1); + bus_generic_attach(device); + out: if (error != 0) ntb_detach(device); @@ -706,7 +714,10 @@ ntb_detach(device_t device) { struct ntb_softc *ntb; - ntb = DEVICE2SOFTC(device); + ntb = device_get_softc(device); + + /* Detach & delete all children */ + device_delete_children(device); if (ntb->self_reg != NULL) { DB_MASK_LOCK(ntb); @@ -748,7 +759,7 @@ bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar) { /* XXX This assertion could be stronger. */ KASSERT(bar < NTB_MAX_BARS, ("bogus bar")); - return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR)); + return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR)); } static inline void @@ -814,7 +825,7 @@ ntb_map_pci_bars(struct ntb_softc *ntb) ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET; - if (!HAS_FEATURE(NTB_SPLIT_BAR)) + if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR)) goto out; ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5); @@ -888,7 +899,7 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar) * but the PCI driver does not honor the size in this call, so we have * to modify it after the fact. */ - if (HAS_FEATURE(NTB_BAR_SIZE_4K)) { + if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) { if (bar->pci_resource_id == PCIR_BAR(2)) bar_size_bits = pci_read_config(ntb->device, XEON_PBAR23SZ_OFFSET, 1); @@ -1071,7 +1082,7 @@ ntb_init_isr(struct ntb_softc *ntb) num_vectors = 1; if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) { - if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) { + if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { device_printf(ntb->device, "Errata workaround does not support MSI or INTX\n"); return (EINVAL); @@ -1082,7 +1093,7 @@ ntb_init_isr(struct ntb_softc *ntb) rc = ntb_setup_legacy_interrupt(ntb); } else { if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS && - HAS_FEATURE(NTB_SB01BASE_LOCKUP)) { + HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { device_printf(ntb->device, "Errata workaround expects %d doorbell bits\n", XEON_NONLINK_DB_MSIX_BITS); @@ -1091,7 +1102,7 @@ ntb_init_isr(struct ntb_softc *ntb) ntb_create_msix_vec(ntb, num_vectors); rc = ntb_setup_msix(ntb, num_vectors); - if (rc == 0 && HAS_FEATURE(NTB_SB01BASE_LOCKUP)) + if (rc == 0 && HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) ntb_get_msix_info(ntb); } if (rc != 0) { @@ -1194,11 +1205,12 @@ db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val) ntb_reg_write(2, regoff, (uint16_t)val); } -void -ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits) +static void +ntb_db_set_mask(device_t dev, uint64_t bits) { + struct ntb_softc *ntb = device_get_softc(dev); - if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) + if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) return; DB_MASK_LOCK(ntb); @@ -1207,16 +1219,17 @@ ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits) DB_MASK_UNLOCK(ntb); } -void -ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits) +static void +ntb_db_clear_mask(device_t dev, uint64_t bits) { + struct ntb_softc *ntb = device_get_softc(dev); KASSERT((bits & ~ntb->db_valid_mask) == 0, ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__, (uintmax_t)(bits & ~ntb->db_valid_mask), (uintmax_t)ntb->db_valid_mask)); - if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) + if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) return; DB_MASK_LOCK(ntb); @@ -1225,18 +1238,19 @@ ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits) DB_MASK_UNLOCK(ntb); } -uint64_t -ntb_db_read(struct ntb_softc *ntb) +static uint64_t +ntb_db_read(device_t dev) { + struct ntb_softc *ntb = device_get_softc(dev); - if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) { + if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { uint64_t res; unsigned i; res = 0; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { if (ntb->msix_vec[i].masked != 0) - res |= ntb_db_vector_mask(ntb, i); + res |= ntb_db_vector_mask(dev, i); } return (res); } @@ -1244,20 +1258,21 @@ ntb_db_read(struct ntb_softc *ntb) return (db_ioread(ntb, ntb->self_reg->db_bell)); } -void -ntb_db_clear(struct ntb_softc *ntb, uint64_t bits) +static void +ntb_db_clear(device_t dev, uint64_t bits) { + struct ntb_softc *ntb = device_get_softc(dev); KASSERT((bits & ~ntb->db_valid_mask) == 0, ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__, (uintmax_t)(bits & ~ntb->db_valid_mask), (uintmax_t)ntb->db_valid_mask)); - if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) { + if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { unsigned i; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { - if ((bits & ntb_db_vector_mask(ntb, i)) != 0) { + if ((bits & ntb_db_vector_mask(dev, i)) != 0) { DB_MASK_LOCK(ntb); if (ntb->msix_vec[i].masked != 0) { /* XXX These need a public API. */ @@ -1295,10 +1310,10 @@ ntb_interrupt(struct ntb_softc *ntb, uint32_t vec) if ((vec_mask & ntb->db_link_mask) != 0) { if (ntb_poll_link(ntb)) - ntb_link_event(ntb); + ntb_link_event(ntb->device); } - if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) && + if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) && (vec_mask & ntb->db_link_mask) == 0) { DB_MASK_LOCK(ntb); if (ntb->msix_vec[vec].masked == 0) { @@ -1312,7 +1327,7 @@ ntb_interrupt(struct ntb_softc *ntb, uint32_t vec) } if ((vec_mask & ntb->db_valid_mask) != 0) - ntb_db_event(ntb, vec); + ntb_db_event(ntb->device, vec); } static void @@ -1406,7 +1421,7 @@ ntb_teardown_xeon(struct ntb_softc *ntb) { if (ntb->reg != NULL) - ntb_link_disable(ntb); + ntb_link_disable(ntb->device); } static void @@ -1418,7 +1433,7 @@ ntb_detect_max_mw(struct ntb_softc *ntb) return; } - if (HAS_FEATURE(NTB_SPLIT_BAR)) + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT; else ntb->mw_count = XEON_SNB_MW_COUNT; @@ -1444,7 +1459,7 @@ ntb_detect_xeon(struct ntb_softc *ntb) * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP * errata workaround; only do one at a time. */ - if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) + if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) ntb->features &= ~NTB_SDOORBELL_LOCKUP; conn_type = ppd & XEON_PPD_CONN_TYPE; @@ -1509,7 +1524,7 @@ ntb_xeon_init_dev(struct ntb_softc *ntb) ntb->peer_reg = &xeon_b2b_reg; ntb->xlat_reg = &xeon_sec_xlat; - if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) { + if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) % ntb->mw_count; ntb_printf(2, "Setting up MSIX mw idx %d means %u\n", @@ -1517,7 +1532,7 @@ ntb_xeon_init_dev(struct ntb_softc *ntb) rc = ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx, VM_MEMATTR_UNCACHEABLE); KASSERT(rc == 0, ("shouldn't fail")); - } else if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) { + } else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { /* * There is a Xeon hardware errata related to writes to SDOORBELL or * B2BDOORBELL in conjunction with inbound access to NTB MMIO space, @@ -1532,7 +1547,7 @@ ntb_xeon_init_dev(struct ntb_softc *ntb) rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx, VM_MEMATTR_UNCACHEABLE); KASSERT(rc == 0, ("shouldn't fail")); - } else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14)) + } else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14)) /* * HW Errata on bit 14 of b2bdoorbell register. Writes will not be * mirrored to the remote system. Shrink the number of bits by one, @@ -1610,7 +1625,7 @@ ntb_atom_init_dev(struct ntb_softc *ntb) return (error); /* Initiate PCI-E link training */ - ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb); @@ -1662,7 +1677,7 @@ xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx, struct ntb_pci_bar_info *bar; uint8_t bar_sz; - if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3) + if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3) return; bar = &ntb->bar_info[idx]; @@ -1722,7 +1737,7 @@ xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx) struct ntb_pci_bar_info *bar; bar = &ntb->bar_info[idx]; - if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) { + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) { ntb_reg_write(4, bar->pbarxlat_off, base_addr); base_addr = ntb_reg_read(4, bar->pbarxlat_off); } else { @@ -1737,7 +1752,7 @@ xeon_setup_msix_bar(struct ntb_softc *ntb) { enum ntb_bar bar_num; - if (!HAS_FEATURE(NTB_SB01BASE_LOCKUP)) + if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) return (0); bar_num = ntb_mw_to_bar(ntb, ntb->msix_mw_idx); @@ -1790,7 +1805,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr, bar_addr = addr->bar0_addr; else if (b2b_bar_num == NTB_B2B_BAR_1) bar_addr = addr->bar2_addr64; - else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR)) + else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) bar_addr = addr->bar4_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2) bar_addr = addr->bar4_addr32; @@ -1810,7 +1825,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr, */ xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1, b2b_bar_num); - if (HAS_FEATURE(NTB_SPLIT_BAR)) { + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32, NTB_B2B_BAR_2, b2b_bar_num); xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32, @@ -1823,7 +1838,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr, ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0); ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0); - if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) { + if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { size_t size, xlatoffset; switch (ntb_mw_to_bar(ntb, ntb->msix_mw_idx)) { @@ -1833,7 +1848,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr, break; case NTB_B2B_BAR_2: xlatoffset = XEON_SBAR4XLAT_OFFSET; - if (HAS_FEATURE(NTB_SPLIT_BAR)) + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) size = 4; else size = 8; @@ -1869,7 +1884,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr, /* Set outgoing translation offsets */ xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1); - if (HAS_FEATURE(NTB_SPLIT_BAR)) { + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2); xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3); } else @@ -1881,7 +1896,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr, bar_addr = peer_addr->bar0_addr; else if (b2b_bar_num == NTB_B2B_BAR_1) bar_addr = peer_addr->bar2_addr64; - else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR)) + else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) bar_addr = peer_addr->bar4_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2) bar_addr = peer_addr->bar4_addr32; @@ -1914,7 +1929,7 @@ link_is_up(struct ntb_softc *ntb) if (ntb->type == NTB_XEON) return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good || - !HAS_FEATURE(NTB_SB01BASE_LOCKUP))); + !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))); KASSERT(ntb->type == NTB_ATOM, ("ntb type")); return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0); @@ -1955,7 +1970,7 @@ atom_link_hb(void *arg) } if (ntb_poll_link(ntb)) - ntb_link_event(ntb); + ntb_link_event(ntb->device); if (!link_is_up(ntb) && atom_link_is_err(ntb)) { /* Link is down with error, proceed with recovery */ @@ -2006,21 +2021,10 @@ atom_perform_link_restart(struct ntb_softc *ntb) ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status); } -/* - * ntb_set_ctx() - associate a driver context with an ntb device - * @ntb: NTB device context - * @ctx: Driver context - * @ctx_ops: Driver context operations - * - * Associate a driver context and operations with a ntb device. The context is - * provided by the client driver, and the driver may associate a different - * context with each ntb device. - * - * Return: Zero if the context is associated, otherwise an error number. - */ -int -ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops) +static int +ntb_set_ctx(device_t dev, void *ctx, const struct ntb_ctx_ops *ops) { + struct ntb_softc *ntb = device_get_softc(dev); if (ctx == NULL || ops == NULL) return (EINVAL); @@ -2043,9 +2047,10 @@ ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops) * It is expected that this will only be used from contexts where the ctx_lock * is not needed to protect ntb_ctx lifetime. */ -void * -ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops) +static void * +ntb_get_ctx(device_t dev, const struct ntb_ctx_ops **ops) { + struct ntb_softc *ntb = device_get_softc(dev); KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus")); if (ops != NULL) @@ -2053,16 +2058,10 @@ ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops) return (ntb->ntb_ctx); } -/* - * ntb_clear_ctx() - disassociate any driver context from an ntb device - * @ntb: NTB device context - * - * Clear any association that may exist between a driver context and the ntb - * device. - */ -void -ntb_clear_ctx(struct ntb_softc *ntb) +static void +ntb_clear_ctx(device_t dev) { + struct ntb_softc *ntb = device_get_softc(dev); CTX_LOCK(ntb); ntb->ntb_ctx = NULL; @@ -2077,9 +2076,10 @@ ntb_clear_ctx(struct ntb_softc *ntb) * Notify the driver context that the link status may have changed. The driver * should call ntb_link_is_up() to get the current status. */ -void -ntb_link_event(struct ntb_softc *ntb) +static void +ntb_link_event(device_t dev) { + struct ntb_softc *ntb = device_get_softc(dev); CTX_LOCK(ntb); if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL) @@ -2101,8 +2101,9 @@ ntb_link_event(struct ntb_softc *ntb) * those bits are associated with the vector number. */ static void -ntb_db_event(struct ntb_softc *ntb, uint32_t vec) +ntb_db_event(device_t dev, uint32_t vec) { + struct ntb_softc *ntb = device_get_softc(dev); CTX_LOCK(ntb); if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL) @@ -2110,26 +2111,11 @@ ntb_db_event(struct ntb_softc *ntb, uint32_t vec) CTX_UNLOCK(ntb); } -/* - * ntb_link_enable() - enable the link on the secondary side of the ntb - * @ntb: NTB device context - * @max_speed: The maximum link speed expressed as PCIe generation number[0] - * @max_width: The maximum link width expressed as the number of PCIe lanes[0] - * - * Enable the link on the secondary side of the ntb. This can only be done - * from the primary side of the ntb in primary or b2b topology. The ntb device - * should train the link to its maximum speed and width, or the requested speed - * and width, whichever is smaller, if supported. - * - * Return: Zero on success, otherwise an error number. - * - * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed - * and width input will be ignored. - */ -int -ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused, - enum ntb_width w __unused) +static int +ntb_link_enable(device_t dev, enum ntb_speed speed __unused, + enum ntb_width width __unused) { + struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; ntb_printf(2, "%s\n", __func__); @@ -2141,7 +2127,7 @@ ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused, } if (ntb->conn_type == NTB_CONN_TRANSPARENT) { - ntb_link_event(ntb); + ntb_link_event(dev); return (0); } @@ -2149,49 +2135,39 @@ ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused, cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK); cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP; cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP; - if (HAS_FEATURE(NTB_SPLIT_BAR)) + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP; ntb_reg_write(4, ntb->reg->ntb_ctl, cntl); return (0); } -/* - * ntb_link_disable() - disable the link on the secondary side of the ntb - * @ntb: NTB device context - * - * Disable the link on the secondary side of the ntb. This can only be done - * from the primary side of the ntb in primary or b2b topology. The ntb device - * should disable the link. Returning from this call must indicate that a - * barrier has passed, though with no more writes may pass in either direction - * across the link, except if this call returns an error number. - * - * Return: Zero on success, otherwise an error number. - */ -int -ntb_link_disable(struct ntb_softc *ntb) +static int +ntb_link_disable(device_t dev) { + struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; ntb_printf(2, "%s\n", __func__); if (ntb->conn_type == NTB_CONN_TRANSPARENT) { - ntb_link_event(ntb); + ntb_link_event(dev); return (0); } cntl = ntb_reg_read(4, ntb->reg->ntb_ctl); cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP); cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP); - if (HAS_FEATURE(NTB_SPLIT_BAR)) + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP); cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK; ntb_reg_write(4, ntb->reg->ntb_ctl, cntl); return (0); } -bool -ntb_link_enabled(struct ntb_softc *ntb) +static bool +ntb_link_enabled(device_t dev) { + struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; if (ntb->type == NTB_ATOM) { @@ -2275,7 +2251,7 @@ ntb_poll_link(struct ntb_softc *ntb) ntb->lnk_sta = reg_val; - if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) { + if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { if (_xeon_link_is_up(ntb)) { if (!ntb->peer_msix_good) { callout_reset(&ntb->peer_msix_work, 0, @@ -2426,7 +2402,7 @@ ntb_sysctl_init(struct ntb_softc *ntb) CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_xlat, sysctl_handle_register, "QU", "Incoming XLAT23 register"); - if (HAS_FEATURE(NTB_SPLIT_BAR)) { + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_xlat, @@ -2446,7 +2422,7 @@ ntb_sysctl_init(struct ntb_softc *ntb) CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_limit, sysctl_handle_register, "QU", "Incoming LMT23 register"); - if (HAS_FEATURE(NTB_SPLIT_BAR)) { + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_limit, @@ -2537,7 +2513,7 @@ ntb_sysctl_init(struct ntb_softc *ntb) CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off, sysctl_handle_register, "QU", "Outgoing XLAT23 register"); - if (HAS_FEATURE(NTB_SPLIT_BAR)) { + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off, @@ -2557,7 +2533,7 @@ ntb_sysctl_init(struct ntb_softc *ntb) CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | XEON_PBAR2LMT_OFFSET, sysctl_handle_register, "QU", "Outgoing LMT23 register"); - if (HAS_FEATURE(NTB_SPLIT_BAR)) { + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | XEON_PBAR4LMT_OFFSET, @@ -2581,7 +2557,7 @@ ntb_sysctl_init(struct ntb_softc *ntb) CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_base, sysctl_handle_register, "QU", "Secondary BAR23 base register"); - if (HAS_FEATURE(NTB_SPLIT_BAR)) { + if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_base, @@ -2626,7 +2602,7 @@ sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS) unsigned old, new; int error; - old = ntb_link_enabled(ntb); + old = ntb_link_enabled(ntb->device); error = SYSCTL_OUT(req, &old, sizeof(old)); if (error != 0 || req->newptr == NULL) @@ -2640,9 +2616,9 @@ sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS) (new != 0)? "en" : "dis"); if (new != 0) - error = ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + error = ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); else - error = ntb_link_disable(ntb); + error = ntb_link_disable(ntb->device); return (error); } @@ -2657,7 +2633,7 @@ sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS) sbuf_new_for_sysctl(&sb, NULL, 32, req); - if (ntb_link_is_up(ntb, &speed, &width)) + if (ntb_link_is_up(ntb->device, &speed, &width)) sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u", (unsigned)speed, (unsigned)width); else @@ -2678,7 +2654,7 @@ sysctl_handle_link_status(SYSCTL_HANDLER_ARGS) unsigned res; int error; - res = ntb_link_is_up(ntb, NULL, NULL); + res = ntb_link_is_up(ntb->device, NULL, NULL); error = SYSCTL_OUT(req, &res, sizeof(res)); if (error || !req->newptr) @@ -2787,22 +2763,22 @@ ntb_exchange_msix(void *ctx) goto msix_done; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { - ntb_peer_spad_write(ntb, NTB_MSIX_DATA0 + i, + ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i, ntb->msix_data[i].nmd_data); - ntb_peer_spad_write(ntb, NTB_MSIX_OFS0 + i, + ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i, ntb->msix_data[i].nmd_ofs - ntb->msix_xlat); } - ntb_peer_spad_write(ntb, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD); + ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD); - ntb_spad_read(ntb, NTB_MSIX_GUARD, &val); + ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val); if (val != NTB_MSIX_VER_GUARD) goto reschedule; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { - ntb_spad_read(ntb, NTB_MSIX_DATA0 + i, &val); + ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val); ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val); ntb->peer_msix_data[i].nmd_data = val; - ntb_spad_read(ntb, NTB_MSIX_OFS0 + i, &val); + ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val); ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val); ntb->peer_msix_data[i].nmd_ofs = val; } @@ -2810,8 +2786,8 @@ ntb_exchange_msix(void *ctx) ntb->peer_msix_done = true; msix_done: - ntb_peer_spad_write(ntb, NTB_MSIX_DONE, NTB_MSIX_RECEIVED); - ntb_spad_read(ntb, NTB_MSIX_DONE, &val); + ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED); + ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val); if (val != NTB_MSIX_RECEIVED) goto reschedule; @@ -2821,7 +2797,7 @@ ntb_exchange_msix(void *ctx) msix_good: ntb_poll_link(ntb); - ntb_link_event(ntb); + ntb_link_event(ntb->device); return; reschedule: @@ -2831,38 +2807,25 @@ ntb_exchange_msix(void *ctx) hz * (ntb->peer_msix_good ? 2 : 1) / 100, ntb_exchange_msix, ntb); } else - ntb_spad_clear(ntb); + ntb_spad_clear(ntb->device); } /* * Public API to the rest of the OS */ -/** - * ntb_get_max_spads() - get the total scratch regs usable - * @ntb: pointer to ntb_softc instance - * - * This function returns the max 32bit scratchpad registers usable by the - * upper layer. - * - * RETURNS: total number of scratch pad registers available - */ -uint8_t -ntb_get_max_spads(struct ntb_softc *ntb) +static uint8_t +ntb_spad_count(device_t dev) { + struct ntb_softc *ntb = device_get_softc(dev); return (ntb->spad_count); } -/* - * ntb_mw_count() - Get the number of memory windows available for KPI - * consumers. - * - * (Excludes any MW wholly reserved for register access.) - */ -uint8_t -ntb_mw_count(struct ntb_softc *ntb) +static uint8_t +ntb_mw_count(device_t dev) { + struct ntb_softc *ntb = device_get_softc(dev); uint8_t res; res = ntb->mw_count; @@ -2873,20 +2836,10 @@ ntb_mw_count(struct ntb_softc *ntb) return (res); } -/** - * ntb_spad_write() - write to the secondary scratchpad register - * @ntb: pointer to ntb_softc instance - * @idx: index to the scratchpad register, 0 based - * @val: the data value to put into the register - * - * This function allows writing of a 32bit value to the indexed scratchpad - * register. The register resides on the secondary (external) side. - * - * RETURNS: An appropriate ERRNO error value on error, or zero for success. - */ -int -ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val) +static int +ntb_spad_write(device_t dev, unsigned int idx, uint32_t val) { + struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); @@ -2899,29 +2852,20 @@ ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val) /* * Zeros the local scratchpad. */ -void -ntb_spad_clear(struct ntb_softc *ntb) +static void +ntb_spad_clear(device_t dev) { + struct ntb_softc *ntb = device_get_softc(dev); unsigned i; for (i = 0; i < ntb->spad_count; i++) - ntb_spad_write(ntb, i, 0); + ntb_spad_write(dev, i, 0); } -/** - * ntb_spad_read() - read from the primary scratchpad register - * @ntb: pointer to ntb_softc instance - * @idx: index to scratchpad register, 0 based - * @val: pointer to 32bit integer for storing the register value - * - * This function allows reading of the 32bit scratchpad register on - * the primary (internal) side. - * - * RETURNS: An appropriate ERRNO error value on error, or zero for success. - */ -int -ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val) +static int +ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val) { + struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); @@ -2931,25 +2875,15 @@ ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val) return (0); } -/** - * ntb_peer_spad_write() - write to the secondary scratchpad register - * @ntb: pointer to ntb_softc instance - * @idx: index to the scratchpad register, 0 based - * @val: the data value to put into the register - * - * This function allows writing of a 32bit value to the indexed scratchpad - * register. The register resides on the secondary (external) side. - * - * RETURNS: An appropriate ERRNO error value on error, or zero for success. - */ -int -ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val) +static int +ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val) { + struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); - if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) + if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val); else ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val); @@ -2957,25 +2891,15 @@ ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val) return (0); } -/** - * ntb_peer_spad_read() - read from the primary scratchpad register - * @ntb: pointer to ntb_softc instance - * @idx: index to scratchpad register, 0 based - * @val: pointer to 32bit integer for storing the register value - * - * This function allows reading of the 32bit scratchpad register on - * the primary (internal) side. - * - * RETURNS: An appropriate ERRNO error value on error, or zero for success. - */ -int -ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val) +static int +ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val) { + struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); - if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) + if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) *val = ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4); else *val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4); @@ -2983,34 +2907,18 @@ ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val) return (0); } -/* - * ntb_mw_get_range() - get the range of a memory window - * @ntb: NTB device context - * @idx: Memory window number - * @base: OUT - the base address for mapping the memory window - * @size: OUT - the size for mapping the memory window - * @align: OUT - the base alignment for translating the memory window - * @align_size: OUT - the size alignment for translating the memory window - * - * Get the range of a memory window. NULL may be given for any output - * parameter if the value is not needed. The base and size may be used for - * mapping the memory window, to access the peer memory. The alignment and - * size may be used for translating the memory window, for the peer to access - * memory on the local system. - * - * Return: Zero on success, otherwise an error number. - */ -int -ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base, +static int +ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base, caddr_t *vbase, size_t *size, size_t *align, size_t *align_size, bus_addr_t *plimit) { + struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; bus_addr_t limit; size_t bar_b2b_off; enum ntb_bar bar_num; - if (mw_idx >= ntb_mw_count(ntb)) + if (mw_idx >= ntb_mw_count(dev)) return (EINVAL); mw_idx = ntb_user_mw_to_idx(ntb, mw_idx); @@ -3043,33 +2951,17 @@ ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base, return (0); } -/* - * ntb_mw_set_trans() - set the translation of a memory window - * @ntb: NTB device context - * @idx: Memory window number - * @addr: The dma address local memory to expose to the peer - * @size: The size of the local memory to expose to the peer - * - * Set the translation of a memory window. The peer may access local memory - * through the window starting at the address, up to the size. The address - * must be aligned to the alignment specified by ntb_mw_get_range(). The size - * must be aligned to the size alignment specified by ntb_mw_get_range(). The - * address must be below the plimit specified by ntb_mw_get_range() (i.e. for - * 32-bit BARs). - * - * Return: Zero on success, otherwise an error number. - */ -int -ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr, - size_t size) +static int +ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size) { + struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; uint64_t base, limit, reg_val; size_t bar_size, mw_size; uint32_t base_reg, xlat_reg, limit_reg; enum ntb_bar bar_num; - if (idx >= ntb_mw_count(ntb)) + if (idx >= ntb_mw_count(dev)) return (EINVAL); idx = ntb_user_mw_to_idx(ntb, idx); @@ -3147,37 +3039,20 @@ ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr, return (0); } -/* - * ntb_mw_clear_trans() - clear the translation of a memory window - * @ntb: NTB device context - * @idx: Memory window number - * - * Clear the translation of a memory window. The peer may no longer access - * local memory through the window. - * - * Return: Zero on success, otherwise an error number. - */ -int -ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx) +static int +ntb_mw_clear_trans(device_t dev, unsigned mw_idx) { - return (ntb_mw_set_trans(ntb, mw_idx, 0, 0)); + return (ntb_mw_set_trans(dev, mw_idx, 0, 0)); } -/* - * ntb_mw_get_wc - Get the write-combine status of a memory window - * - * Returns: Zero on success, setting *wc; otherwise an error number (e.g. if - * idx is an invalid memory window). - * - * Mode is a VM_MEMATTR_* type. - */ -int -ntb_mw_get_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t *mode) +static int +ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode) { + struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; - if (idx >= ntb_mw_count(ntb)) + if (idx >= ntb_mw_count(dev)) return (EINVAL); idx = ntb_user_mw_to_idx(ntb, idx); @@ -3186,21 +3061,12 @@ ntb_mw_get_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t *mode) return (0); } -/* - * ntb_mw_set_wc - Set the write-combine status of a memory window - * - * If 'mode' matches the current status, this does nothing and succeeds. Mode - * is a VM_MEMATTR_* type. - * - * Returns: Zero on success, setting the caching attribute on the virtual - * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid - * memory window, or if changing the caching attribute fails). - */ -int -ntb_mw_set_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode) +static int +ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode) { + struct ntb_softc *ntb = device_get_softc(dev); - if (idx >= ntb_mw_count(ntb)) + if (idx >= ntb_mw_count(dev)) return (EINVAL); idx = ntb_user_mw_to_idx(ntb, idx); @@ -3224,26 +3090,19 @@ ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode) return (rc); } -/** - * ntb_peer_db_set() - Set the doorbell on the secondary/external side - * @ntb: pointer to ntb_softc instance - * @bit: doorbell bits to ring - * - * This function allows triggering of a doorbell on the secondary/external - * side that will initiate an interrupt on the remote host - */ -void -ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit) +static void +ntb_peer_db_set(device_t dev, uint64_t bit) { + struct ntb_softc *ntb = device_get_softc(dev); - if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) { + if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { struct ntb_pci_bar_info *lapic; unsigned i; lapic = ntb->peer_lapic_bar; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { - if ((bit & ntb_db_vector_mask(ntb, i)) != 0) + if ((bit & ntb_db_vector_mask(dev, i)) != 0) bus_space_write_4(lapic->pci_bus_tag, lapic->pci_bus_handle, ntb->peer_msix_data[i].nmd_ofs, @@ -3252,7 +3111,7 @@ ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit) return; } - if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) { + if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit); return; } @@ -3260,25 +3119,16 @@ ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit) db_iowrite(ntb, ntb->peer_reg->db_bell, bit); } -/* - * ntb_get_peer_db_addr() - Return the address of the remote doorbell register, - * as well as the size of the register (via *sz_out). - * - * This function allows a caller using I/OAT DMA to chain the remote doorbell - * ring to its memory window write. - * - * Note that writing the peer doorbell via a memory window will *not* generate - * an interrupt on the remote host; that must be done seperately. - */ -bus_addr_t -ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out) +static int +ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size) { + struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; uint64_t regoff; - KASSERT(sz_out != NULL, ("must be non-NULL")); + KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL")); - if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) { + if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { bar = &ntb->bar_info[NTB_CONFIG_BAR]; regoff = ntb->peer_reg->db_bell; } else { @@ -3290,56 +3140,42 @@ ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out) } KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh")); - *sz_out = ntb->reg->db_size; /* HACK: Specific to current x86 bus implementation. */ - return ((uint64_t)bar->pci_bus_handle + regoff); + *db_addr = ((uint64_t)bar->pci_bus_handle + regoff); + *db_size = ntb->reg->db_size; + return (0); } -/* - * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb - * @ntb: NTB device context - * - * Hardware may support different number or arrangement of doorbell bits. - * - * Return: A mask of doorbell bits supported by the ntb. - */ -uint64_t -ntb_db_valid_mask(struct ntb_softc *ntb) +static uint64_t +ntb_db_valid_mask(device_t dev) { + struct ntb_softc *ntb = device_get_softc(dev); return (ntb->db_valid_mask); } -/* - * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector - * @ntb: NTB device context - * @vector: Doorbell vector number - * - * Each interrupt vector may have a different number or arrangement of bits. - * - * Return: A mask of doorbell bits serviced by a vector. - */ -uint64_t -ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector) +static int +ntb_db_vector_count(device_t dev) +{ + struct ntb_softc *ntb = device_get_softc(dev); + + return (ntb->db_vec_count); +} + +static uint64_t +ntb_db_vector_mask(device_t dev, uint32_t vector) { + struct ntb_softc *ntb = device_get_softc(dev); if (vector > ntb->db_vec_count) return (0); return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector)); } -/** - * ntb_link_is_up() - get the current ntb link state - * @ntb: NTB device context - * @speed: OUT - The link speed expressed as PCIe generation number - * @width: OUT - The link width expressed as the number of PCIe lanes - * - * RETURNS: true or false based on the hardware link state - */ -bool -ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed, - enum ntb_width *width) +static bool +ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width) { + struct ntb_softc *ntb = device_get_softc(dev); if (speed != NULL) *speed = ntb_link_sta_speed(ntb); @@ -3359,17 +3195,45 @@ save_bar_parameters(struct ntb_pci_bar_info *bar) bar->vbase = rman_get_virtual(bar->pci_resource); } -device_t -ntb_get_device(struct ntb_softc *ntb) -{ - - return (ntb->device); -} - -/* Export HW-specific errata information. */ -bool -ntb_has_feature(struct ntb_softc *ntb, uint32_t feature) -{ +static device_method_t ntb_intel_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ntb_probe), + DEVMETHOD(device_attach, ntb_attach), + DEVMETHOD(device_detach, ntb_detach), + /* NTB interface */ + DEVMETHOD(ntb_link_is_up, ntb_link_is_up), + DEVMETHOD(ntb_link_enable, ntb_link_enable), + DEVMETHOD(ntb_link_disable, ntb_link_disable), + DEVMETHOD(ntb_link_enabled, ntb_link_enabled), + DEVMETHOD(ntb_set_ctx, ntb_set_ctx), + DEVMETHOD(ntb_get_ctx, ntb_get_ctx), + DEVMETHOD(ntb_clear_ctx, ntb_clear_ctx), + DEVMETHOD(ntb_mw_count, ntb_mw_count), + DEVMETHOD(ntb_mw_get_range, ntb_mw_get_range), + DEVMETHOD(ntb_mw_set_trans, ntb_mw_set_trans), + DEVMETHOD(ntb_mw_clear_trans, ntb_mw_clear_trans), + DEVMETHOD(ntb_mw_get_wc, ntb_mw_get_wc), + DEVMETHOD(ntb_mw_set_wc, ntb_mw_set_wc), + DEVMETHOD(ntb_spad_count, ntb_spad_count), + DEVMETHOD(ntb_spad_clear, ntb_spad_clear), + DEVMETHOD(ntb_spad_write, ntb_spad_write), + DEVMETHOD(ntb_spad_read, ntb_spad_read), + DEVMETHOD(ntb_peer_spad_write, ntb_peer_spad_write), + DEVMETHOD(ntb_peer_spad_read, ntb_peer_spad_read), + DEVMETHOD(ntb_db_valid_mask, ntb_db_valid_mask), + DEVMETHOD(ntb_db_vector_count, ntb_db_vector_count), + DEVMETHOD(ntb_db_vector_mask, ntb_db_vector_mask), + DEVMETHOD(ntb_db_clear, ntb_db_clear), + DEVMETHOD(ntb_db_clear_mask, ntb_db_clear_mask), + DEVMETHOD(ntb_db_read, ntb_db_read), + DEVMETHOD(ntb_db_set_mask, ntb_db_set_mask), + DEVMETHOD(ntb_peer_db_addr, ntb_peer_db_addr), + DEVMETHOD(ntb_peer_db_set, ntb_peer_db_set), + DEVMETHOD_END +}; - return (HAS_FEATURE(feature)); -} +static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods, + sizeof(struct ntb_softc)); +DRIVER_MODULE(ntb_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL); +MODULE_DEPEND(ntb_intel, ntb, 1, 1, 1); +MODULE_VERSION(ntb_intel, 1); diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.h b/sys/dev/ntb/ntb_hw/ntb_hw.h deleted file mode 100644 index f05acda6a..000000000 --- a/sys/dev/ntb/ntb_hw/ntb_hw.h +++ /dev/null @@ -1,125 +0,0 @@ -/*- - * Copyright (C) 2013 Intel Corporation - * Copyright (C) 2015 EMC Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _NTB_HW_H_ -#define _NTB_HW_H_ - -struct ntb_softc; - -#define NTB_MAX_NUM_MW 3 - -enum ntb_speed { - NTB_SPEED_AUTO = -1, - NTB_SPEED_NONE = 0, - NTB_SPEED_GEN1 = 1, - NTB_SPEED_GEN2 = 2, - NTB_SPEED_GEN3 = 3, -}; - -enum ntb_width { - NTB_WIDTH_AUTO = -1, - NTB_WIDTH_NONE = 0, - NTB_WIDTH_1 = 1, - NTB_WIDTH_2 = 2, - NTB_WIDTH_4 = 4, - NTB_WIDTH_8 = 8, - NTB_WIDTH_12 = 12, - NTB_WIDTH_16 = 16, - NTB_WIDTH_32 = 32, -}; - -SYSCTL_DECL(_hw_ntb); - -typedef void (*ntb_db_callback)(void *data, uint32_t vector); -typedef void (*ntb_event_callback)(void *data); - -struct ntb_ctx_ops { - ntb_event_callback link_event; - ntb_db_callback db_event; -}; - -device_t ntb_get_device(struct ntb_softc *); - -bool ntb_link_is_up(struct ntb_softc *, enum ntb_speed *, enum ntb_width *); -void ntb_link_event(struct ntb_softc *); -int ntb_link_enable(struct ntb_softc *, enum ntb_speed, enum ntb_width); -int ntb_link_disable(struct ntb_softc *); -bool ntb_link_enabled(struct ntb_softc *); - -int ntb_set_ctx(struct ntb_softc *, void *, const struct ntb_ctx_ops *); -void *ntb_get_ctx(struct ntb_softc *, const struct ntb_ctx_ops **); -void ntb_clear_ctx(struct ntb_softc *); - -uint8_t ntb_mw_count(struct ntb_softc *); -int ntb_mw_get_range(struct ntb_softc *, unsigned mw_idx, vm_paddr_t *base, - caddr_t *vbase, size_t *size, size_t *align, size_t *align_size, - bus_addr_t *plimit); -int ntb_mw_set_trans(struct ntb_softc *, unsigned mw_idx, bus_addr_t, size_t); -int ntb_mw_clear_trans(struct ntb_softc *, unsigned mw_idx); - -int ntb_mw_get_wc(struct ntb_softc *, unsigned mw_idx, vm_memattr_t *mode); -int ntb_mw_set_wc(struct ntb_softc *, unsigned mw_idx, vm_memattr_t mode); - -uint8_t ntb_get_max_spads(struct ntb_softc *ntb); -void ntb_spad_clear(struct ntb_softc *ntb); -int ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val); -int ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val); -int ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, - uint32_t val); -int ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, - uint32_t *val); - -uint64_t ntb_db_valid_mask(struct ntb_softc *); -uint64_t ntb_db_vector_mask(struct ntb_softc *, uint32_t vector); -bus_addr_t ntb_get_peer_db_addr(struct ntb_softc *, vm_size_t *sz_out); - -void ntb_db_clear(struct ntb_softc *, uint64_t bits); -void ntb_db_clear_mask(struct ntb_softc *, uint64_t bits); -uint64_t ntb_db_read(struct ntb_softc *); -void ntb_db_set_mask(struct ntb_softc *, uint64_t bits); -void ntb_peer_db_set(struct ntb_softc *, uint64_t bits); - -#define XEON_SPAD_COUNT 16 -#define ATOM_SPAD_COUNT 16 - -/* Hardware owns the low 16 bits of features. */ -#define NTB_BAR_SIZE_4K (1 << 0) -#define NTB_SDOORBELL_LOCKUP (1 << 1) -#define NTB_SB01BASE_LOCKUP (1 << 2) -#define NTB_B2BDOORBELL_BIT14 (1 << 3) -/* Software/configuration owns the top 16 bits. */ -#define NTB_SPLIT_BAR (1ull << 16) - -#define NTB_FEATURES_STR \ - "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \ - "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K" - -bool ntb_has_feature(struct ntb_softc *, uint32_t); - -#endif /* _NTB_HW_H_ */ diff --git a/sys/dev/ntb/ntb_hw/ntb_regs.h b/sys/dev/ntb/ntb_hw/ntb_regs.h index fb445d7d8..a03773627 100644 --- a/sys/dev/ntb/ntb_hw/ntb_regs.h +++ b/sys/dev/ntb/ntb_hw/ntb_regs.h @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2016 Alexander Motin * Copyright (C) 2013 Intel Corporation * Copyright (C) 2015 EMC Corporation * All rights reserved. @@ -76,6 +77,7 @@ #define XEON_SDBMSK_OFFSET 0x0066 #define XEON_USMEMMISS_OFFSET 0x0070 #define XEON_SPAD_OFFSET 0x0080 +#define XEON_SPAD_COUNT 16 #define XEON_SPADSEMA4_OFFSET 0x00c0 #define XEON_WCCNTRL_OFFSET 0x00e0 #define XEON_UNCERRSTS_OFFSET 0x014c @@ -104,6 +106,7 @@ #define ATOM_NTBCNTL_OFFSET 0x0060 #define ATOM_EBDF_OFFSET 0x0064 #define ATOM_SPAD_OFFSET 0x0080 +#define ATOM_SPAD_COUNT 16 #define ATOM_SPADSEMA_OFFSET 0x00c0 #define ATOM_STKYSPAD_OFFSET 0x00c4 #define ATOM_PBAR2XLAT_OFFSET 0x8008 diff --git a/sys/dev/ntb/ntb_if.m b/sys/dev/ntb/ntb_if.m new file mode 100644 index 000000000..bb3b7bd10 --- /dev/null +++ b/sys/dev/ntb/ntb_if.m @@ -0,0 +1,497 @@ +#- +# Copyright (c) 2016 Alexander Motin +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +#include +#include + +INTERFACE ntb; + +HEADER { + enum ntb_speed { + NTB_SPEED_AUTO = -1, + NTB_SPEED_NONE = 0, + NTB_SPEED_GEN1 = 1, + NTB_SPEED_GEN2 = 2, + NTB_SPEED_GEN3 = 3, + }; + + enum ntb_width { + NTB_WIDTH_AUTO = -1, + NTB_WIDTH_NONE = 0, + NTB_WIDTH_1 = 1, + NTB_WIDTH_2 = 2, + NTB_WIDTH_4 = 4, + NTB_WIDTH_8 = 8, + NTB_WIDTH_12 = 12, + NTB_WIDTH_16 = 16, + NTB_WIDTH_32 = 32, + }; + + typedef void (*ntb_db_callback)(void *data, uint32_t vector); + typedef void (*ntb_event_callback)(void *data); + struct ntb_ctx_ops { + ntb_event_callback link_event; + ntb_db_callback db_event; + }; +}; + +# +# ntb_link_is_up() - get the current ntb link state +# @ntb: NTB device context +# @speed: OUT - The link speed expressed as PCIe generation number +# @width: OUT - The link width expressed as the number of PCIe lanes +# +# RETURNS: true or false based on the hardware link state +# +METHOD bool link_is_up { + device_t ntb; + enum ntb_speed *speed; + enum ntb_width *width; +}; + +# +# ntb_link_enable() - enable the link on the secondary side of the ntb +# @ntb: NTB device context +# @max_speed: The maximum link speed expressed as PCIe generation number[0] +# @max_width: The maximum link width expressed as the number of PCIe lanes[0] +# +# Enable the link on the secondary side of the ntb. This can only be done +# from the primary side of the ntb in primary or b2b topology. The ntb device +# should train the link to its maximum speed and width, or the requested speed +# and width, whichever is smaller, if supported. +# +# Return: Zero on success, otherwise an error number. +# +# [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed +# and width input will be ignored. +#/ +METHOD int link_enable { + device_t ntb; + enum ntb_speed speed; + enum ntb_width width; +}; + +# +# ntb_link_disable() - disable the link on the secondary side of the ntb +# @ntb: NTB device context +# +# Disable the link on the secondary side of the ntb. This can only be done +# from the primary side of the ntb in primary or b2b topology. The ntb device +# should disable the link. Returning from this call must indicate that a +# barrier has passed, though with no more writes may pass in either direction +# across the link, except if this call returns an error number. +# +# Return: Zero on success, otherwise an error number. +# +METHOD int link_disable { + device_t ntb; +}; + +# +# get enable status of the link on the secondary side of the ntb +# +METHOD bool link_enabled { + device_t ntb; +}; + +# +# ntb_set_ctx() - associate a driver context with an ntb device +# @ntb: NTB device context +# @ctx: Driver context +# @ctx_ops: Driver context operations +# +# Associate a driver context and operations with a ntb device. The context is +# provided by the client driver, and the driver may associate a different +# context with each ntb device. +# +# Return: Zero if the context is associated, otherwise an error number. +# +METHOD int set_ctx { + device_t ntb; + void *ctx; + const struct ntb_ctx_ops *ctx_ops; +}; + +# +# ntb_set_ctx() - get a driver context associated with an ntb device +# @ntb: NTB device context +# @ctx_ops: Driver context operations +# +# Get a driver context and operations associated with a ntb device. +# +METHOD void * get_ctx { + device_t ntb; + const struct ntb_ctx_ops **ctx_ops; +}; + +# +# ntb_clear_ctx() - disassociate any driver context from an ntb device +# @ntb: NTB device context +# +# Clear any association that may exist between a driver context and the ntb +# device. +# +METHOD void clear_ctx { + device_t ntb; +}; + +# +# ntb_mw_count() - Get the number of memory windows available for KPI +# consumers. +# +# (Excludes any MW wholly reserved for register access.) +# +METHOD uint8_t mw_count { + device_t ntb; +}; + +# +# ntb_mw_get_range() - get the range of a memory window +# @ntb: NTB device context +# @idx: Memory window number +# @base: OUT - the base address for mapping the memory window +# @size: OUT - the size for mapping the memory window +# @align: OUT - the base alignment for translating the memory window +# @align_size: OUT - the size alignment for translating the memory window +# +# Get the range of a memory window. NULL may be given for any output +# parameter if the value is not needed. The base and size may be used for +# mapping the memory window, to access the peer memory. The alignment and +# size may be used for translating the memory window, for the peer to access +# memory on the local system. +# +# Return: Zero on success, otherwise an error number. +# +METHOD int mw_get_range { + device_t ntb; + unsigned mw_idx; + vm_paddr_t *base; + caddr_t *vbase; + size_t *size; + size_t *align; + size_t *align_size; + bus_addr_t *plimit; +}; + +# +# ntb_mw_set_trans() - set the translation of a memory window +# @ntb: NTB device context +# @idx: Memory window number +# @addr: The dma address local memory to expose to the peer +# @size: The size of the local memory to expose to the peer +# +# Set the translation of a memory window. The peer may access local memory +# through the window starting at the address, up to the size. The address +# must be aligned to the alignment specified by ntb_mw_get_range(). The size +# must be aligned to the size alignment specified by ntb_mw_get_range(). The +# address must be below the plimit specified by ntb_mw_get_range() (i.e. for +# 32-bit BARs). +# +# Return: Zero on success, otherwise an error number. +# +METHOD int mw_set_trans { + device_t ntb; + unsigned mw_idx; + bus_addr_t addr; + size_t size; +}; + +# +# ntb_mw_clear_trans() - clear the translation of a memory window +# @ntb: NTB device context +# @idx: Memory window number +# +# Clear the translation of a memory window. The peer may no longer access +# local memory through the window. +# +# Return: Zero on success, otherwise an error number. +# +METHOD int mw_clear_trans { + device_t ntb; + unsigned mw_idx; +}; + +# +# ntb_mw_get_wc - Get the write-combine status of a memory window +# +# Returns: Zero on success, setting *wc; otherwise an error number (e.g. if +# idx is an invalid memory window). +# +# Mode is a VM_MEMATTR_* type. +# +METHOD int mw_get_wc { + device_t ntb; + unsigned mw_idx; + vm_memattr_t *mode; +}; + +# +# ntb_mw_set_wc - Set the write-combine status of a memory window +# +# If 'mode' matches the current status, this does nothing and succeeds. Mode +# is a VM_MEMATTR_* type. +# +# Returns: Zero on success, setting the caching attribute on the virtual +# mapping of the BAR; otherwise an error number (e.g. if idx is an invalid +# memory window, or if changing the caching attribute fails). +# +METHOD int mw_set_wc { + device_t ntb; + unsigned mw_idx; + vm_memattr_t mode; +}; + +# +# ntb_spad_count() - get the total scratch regs usable +# @ntb: pointer to ntb_softc instance +# +# This function returns the max 32bit scratchpad registers usable by the +# upper layer. +# +# RETURNS: total number of scratch pad registers available +# +METHOD uint8_t spad_count { + device_t ntb; +}; + +# +# ntb_get_max_spads() - zero local scratch registers +# @ntb: pointer to ntb_softc instance +# +# This functions overwrites all local scratchpad registers with zeroes. +# +METHOD void spad_clear { + device_t ntb; +}; + +# +# ntb_spad_write() - write to the secondary scratchpad register +# @ntb: pointer to ntb_softc instance +# @idx: index to the scratchpad register, 0 based +# @val: the data value to put into the register +# +# This function allows writing of a 32bit value to the indexed scratchpad +# register. The register resides on the secondary (external) side. +# +# RETURNS: An appropriate ERRNO error value on error, or zero for success. +# +METHOD int spad_write { + device_t ntb; + unsigned int idx; + uint32_t val; +}; + +# +# ntb_spad_read() - read from the primary scratchpad register +# @ntb: pointer to ntb_softc instance +# @idx: index to scratchpad register, 0 based +# @val: pointer to 32bit integer for storing the register value +# +# This function allows reading of the 32bit scratchpad register on +# the primary (internal) side. +# +# RETURNS: An appropriate ERRNO error value on error, or zero for success. +# +METHOD int spad_read { + device_t ntb; + unsigned int idx; + uint32_t *val; +}; + +# +# ntb_peer_spad_write() - write to the secondary scratchpad register +# @ntb: pointer to ntb_softc instance +# @idx: index to the scratchpad register, 0 based +# @val: the data value to put into the register +# +# This function allows writing of a 32bit value to the indexed scratchpad +# register. The register resides on the secondary (external) side. +# +# RETURNS: An appropriate ERRNO error value on error, or zero for success. +# +METHOD int peer_spad_write { + device_t ntb; + unsigned int idx; + uint32_t val; +}; + +# +# ntb_peer_spad_read() - read from the primary scratchpad register +# @ntb: pointer to ntb_softc instance +# @idx: index to scratchpad register, 0 based +# @val: pointer to 32bit integer for storing the register value +# +# This function allows reading of the 32bit scratchpad register on +# the primary (internal) side. +# +# RETURNS: An appropriate ERRNO error value on error, or zero for success. +# +METHOD int peer_spad_read { + device_t ntb; + unsigned int idx; + uint32_t *val; +}; + +# +# ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb +# @ntb: NTB device context +# +# Hardware may support different number or arrangement of doorbell bits. +# +# Return: A mask of doorbell bits supported by the ntb. +# +METHOD uint64_t db_valid_mask { + device_t ntb; +}; + +# +# ntb_db_vector_count() - get the number of doorbell interrupt vectors +# @ntb: NTB device context. +# +# Hardware may support different number of interrupt vectors. +# +# Return: The number of doorbell interrupt vectors. +# +METHOD int db_vector_count { + device_t ntb; +}; + +# +# ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector +# @ntb: NTB device context +# @vector: Doorbell vector number +# +# Each interrupt vector may have a different number or arrangement of bits. +# +# Return: A mask of doorbell bits serviced by a vector. +# +METHOD uint64_t db_vector_mask { + device_t ntb; + uint32_t vector; +}; + +# +# ntb_peer_db_addr() - address and size of the peer doorbell register +# @ntb: NTB device context. +# @db_addr: OUT - The address of the peer doorbell register. +# @db_size: OUT - The number of bytes to write the peer doorbell register. +# +# Return the address of the peer doorbell register. This may be used, for +# example, by drivers that offload memory copy operations to a dma engine. +# The drivers may wish to ring the peer doorbell at the completion of memory +# copy operations. For efficiency, and to simplify ordering of operations +# between the dma memory copies and the ringing doorbell, the driver may +# append one additional dma memory copy with the doorbell register as the +# destination, after the memory copy operations. +# +# Return: Zero on success, otherwise an error number. +# +# Note that writing the peer doorbell via a memory window will *not* generate +# an interrupt on the remote host; that must be done separately. +# +METHOD int peer_db_addr { + device_t ntb; + bus_addr_t *db_addr; + vm_size_t *db_size; +}; + +# +# ntb_db_clear() - clear bits in the local doorbell register +# @ntb: NTB device context. +# @db_bits: Doorbell bits to clear. +# +# Clear bits in the local doorbell register, arming the bits for the next +# doorbell. +# +# Return: Zero on success, otherwise an error number. +# +METHOD void db_clear { + device_t ntb; + uint64_t bits; +}; + +# +# ntb_db_clear_mask() - clear bits in the local doorbell mask +# @ntb: NTB device context. +# @db_bits: Doorbell bits to clear. +# +# Clear bits in the local doorbell mask register, allowing doorbell interrupts +# from being generated for those doorbell bits. If a doorbell bit is already +# set at the time the mask is cleared, and the corresponding mask bit is +# changed from set to clear, then the ntb driver must ensure that +# ntb_db_event() is called. If the hardware does not generate the interrupt +# on clearing the mask bit, then the driver must call ntb_db_event() anyway. +# +# Return: Zero on success, otherwise an error number. +# +METHOD void db_clear_mask { + device_t ntb; + uint64_t bits; +}; + +# +# ntb_db_read() - read the local doorbell register +# @ntb: NTB device context. +# +# Read the local doorbell register, and return the bits that are set. +# +# Return: The bits currently set in the local doorbell register. +# +METHOD uint64_t db_read { + device_t ntb; +}; + +# +# ntb_db_set_mask() - set bits in the local doorbell mask +# @ntb: NTB device context. +# @db_bits: Doorbell mask bits to set. +# +# Set bits in the local doorbell mask register, preventing doorbell interrupts +# from being generated for those doorbell bits. Bits that were already set +# must remain set. +# +# Return: Zero on success, otherwise an error number. +# +METHOD void db_set_mask { + device_t ntb; + uint64_t bits; +}; + +# +# ntb_peer_db_set() - Set the doorbell on the secondary/external side +# @ntb: pointer to ntb_softc instance +# @bit: doorbell bits to ring +# +# This function allows triggering of a doorbell on the secondary/external +# side that will initiate an interrupt on the remote host +# +METHOD void peer_db_set { + device_t ntb; + uint64_t bits; +}; + diff --git a/sys/dev/ntb/ntb_transport.c b/sys/dev/ntb/ntb_transport.c new file mode 100644 index 000000000..e79051f8f --- /dev/null +++ b/sys/dev/ntb/ntb_transport.c @@ -0,0 +1,1496 @@ +/*- + * Copyright (c) 2016 Alexander Motin + * Copyright (C) 2013 Intel Corporation + * Copyright (C) 2015 EMC Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * The Non-Transparent Bridge (NTB) is a device that allows you to connect + * two or more systems using a PCI-e links, providing remote memory access. + * + * This module contains a transport for sending and receiving messages by + * writing to remote memory window(s) provided by underlying NTB device. + * + * NOTE: Much of the code in this module is shared with Linux. Any patches may + * be picked up and redistributed in Linux with a dual GPL/BSD license. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "ntb.h" +#include "ntb_transport.h" + +#define QP_SETSIZE 64 +BITSET_DEFINE(_qpset, QP_SETSIZE); +#define test_bit(pos, addr) BIT_ISSET(QP_SETSIZE, (pos), (addr)) +#define set_bit(pos, addr) BIT_SET(QP_SETSIZE, (pos), (addr)) +#define clear_bit(pos, addr) BIT_CLR(QP_SETSIZE, (pos), (addr)) +#define ffs_bit(addr) BIT_FFS(QP_SETSIZE, (addr)) + +#define KTR_NTB KTR_SPARE3 + +#define NTB_TRANSPORT_VERSION 4 + +static SYSCTL_NODE(_hw, OID_AUTO, ntb_transport, CTLFLAG_RW, 0, "ntb_transport"); + +static unsigned g_ntb_transport_debug_level; +TUNABLE_INT("hw.ntb_transport.debug_level", &g_ntb_transport_debug_level); +SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, debug_level, CTLFLAG_RWTUN, + &g_ntb_transport_debug_level, 0, + "ntb_transport log level -- higher is more verbose"); +#define ntb_printf(lvl, ...) do { \ + if ((lvl) <= g_ntb_transport_debug_level) { \ + printf(__VA_ARGS__); \ + } \ +} while (0) + +static unsigned transport_mtu = 0x10000; + +static uint64_t max_mw_size; +TUNABLE_QUAD("hw.ntb_transport.max_mw_size", &max_mw_size); +SYSCTL_UQUAD(_hw_ntb_transport, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0, + "If enabled (non-zero), limit the size of large memory windows. " + "Both sides of the NTB MUST set the same value here."); + +static unsigned max_num_clients; +TUNABLE_INT("hw.ntb_transport.max_num_clients", &max_num_clients); +SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, max_num_clients, CTLFLAG_RDTUN, + &max_num_clients, 0, "Maximum number of NTB transport clients. " + "0 (default) - use all available NTB memory windows; " + "positive integer N - Limit to N memory windows."); + +static unsigned enable_xeon_watchdog; +TUNABLE_INT("hw.ntb_transport.enable_xeon_watchdog", &enable_xeon_watchdog); +SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN, + &enable_xeon_watchdog, 0, "If non-zero, write a register every second to " + "keep a watchdog from tearing down the NTB link"); + +STAILQ_HEAD(ntb_queue_list, ntb_queue_entry); + +typedef uint32_t ntb_q_idx_t; + +struct ntb_queue_entry { + /* ntb_queue list reference */ + STAILQ_ENTRY(ntb_queue_entry) entry; + + /* info on data to be transferred */ + void *cb_data; + void *buf; + uint32_t len; + uint32_t flags; + + struct ntb_transport_qp *qp; + struct ntb_payload_header *x_hdr; + ntb_q_idx_t index; +}; + +struct ntb_rx_info { + ntb_q_idx_t entry; +}; + +struct ntb_transport_qp { + struct ntb_transport_ctx *transport; + device_t ntb; + + void *cb_data; + + bool client_ready; + volatile bool link_is_up; + uint8_t qp_num; /* Only 64 QPs are allowed. 0-63 */ + + struct ntb_rx_info *rx_info; + struct ntb_rx_info *remote_rx_info; + + void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + struct ntb_queue_list tx_free_q; + struct mtx ntb_tx_free_q_lock; + caddr_t tx_mw; + bus_addr_t tx_mw_phys; + ntb_q_idx_t tx_index; + ntb_q_idx_t tx_max_entry; + uint64_t tx_max_frame; + + void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + struct ntb_queue_list rx_post_q; + struct ntb_queue_list rx_pend_q; + /* ntb_rx_q_lock: synchronize access to rx_XXXX_q */ + struct mtx ntb_rx_q_lock; + struct task rx_completion_task; + struct task rxc_db_work; + caddr_t rx_buff; + ntb_q_idx_t rx_index; + ntb_q_idx_t rx_max_entry; + uint64_t rx_max_frame; + + void (*event_handler)(void *data, enum ntb_link_event status); + struct callout link_work; + struct callout rx_full; + + uint64_t last_rx_no_buf; + + /* Stats */ + uint64_t rx_bytes; + uint64_t rx_pkts; + uint64_t rx_ring_empty; + uint64_t rx_err_no_buf; + uint64_t rx_err_oflow; + uint64_t rx_err_ver; + uint64_t tx_bytes; + uint64_t tx_pkts; + uint64_t tx_ring_full; + uint64_t tx_err_no_buf; +}; + +struct ntb_transport_mw { + vm_paddr_t phys_addr; + size_t phys_size; + size_t xlat_align; + size_t xlat_align_size; + bus_addr_t addr_limit; + /* Tx buff is off vbase / phys_addr */ + caddr_t vbase; + size_t xlat_size; + size_t buff_size; + /* Rx buff is off virt_addr / dma_addr */ + caddr_t virt_addr; + bus_addr_t dma_addr; +}; + +struct ntb_transport_ctx { + device_t ntb; + struct ntb_transport_mw *mw_vec; + struct ntb_transport_qp *qp_vec; + struct _qpset qp_bitmap; + struct _qpset qp_bitmap_free; + unsigned mw_count; + unsigned qp_count; + volatile bool link_is_up; + struct callout link_work; + struct callout link_watchdog; + struct task link_cleanup; + struct mtx tx_lock; + struct mtx rx_lock; +}; + +enum { + NTBT_DESC_DONE_FLAG = 1 << 0, + NTBT_LINK_DOWN_FLAG = 1 << 1, +}; + +struct ntb_payload_header { + ntb_q_idx_t ver; + uint32_t len; + uint32_t flags; +}; + +enum { + /* + * The order of this enum is part of the remote protocol. Do not + * reorder without bumping protocol version (and it's probably best + * to keep the protocol in lock-step with the Linux NTB driver. + */ + NTBT_VERSION = 0, + NTBT_QP_LINKS, + NTBT_NUM_QPS, + NTBT_NUM_MWS, + /* + * N.B.: transport_link_work assumes MW1 enums = MW0 + 2. + */ + NTBT_MW0_SZ_HIGH, + NTBT_MW0_SZ_LOW, + NTBT_MW1_SZ_HIGH, + NTBT_MW1_SZ_LOW, + NTBT_MAX_SPAD, + + /* + * Some NTB-using hardware have a watchdog to work around NTB hangs; if + * a register or doorbell isn't written every few seconds, the link is + * torn down. Write an otherwise unused register every few seconds to + * work around this watchdog. + */ + NTBT_WATCHDOG_SPAD = 15 +}; + +#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count) +#define NTB_QP_DEF_NUM_ENTRIES 100 +#define NTB_LINK_DOWN_TIMEOUT 10 + +static int ntb_transport_probe(device_t dev); +static int ntb_transport_attach(device_t dev); +static int ntb_transport_detach(device_t dev); +static void ntb_transport_init_queue(struct ntb_transport_ctx *nt, + unsigned int qp_num); +static int ntb_process_tx(struct ntb_transport_qp *qp, + struct ntb_queue_entry *entry); +static void ntb_memcpy_tx(struct ntb_transport_qp *qp, + struct ntb_queue_entry *entry, void *offset); +static void ntb_transport_rxc_db(void *arg, int pending); +static int ntb_process_rxc(struct ntb_transport_qp *qp); +static void ntb_memcpy_rx(struct ntb_transport_qp *qp, + struct ntb_queue_entry *entry, void *offset); +static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp, + void *data); +static void ntb_complete_rxc(void *arg, int pending); +static void ntb_transport_doorbell_callback(void *data, uint32_t vector); +static void ntb_transport_event_callback(void *data); +static void ntb_transport_link_work(void *arg); +static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size); +static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw); +static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, + unsigned int qp_num); +static void ntb_qp_link_work(void *arg); +static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt); +static void ntb_transport_link_cleanup_work(void *, int); +static void ntb_qp_link_down(struct ntb_transport_qp *qp); +static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp); +static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp); +static void ntb_send_link_down(struct ntb_transport_qp *qp); +static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry, + struct ntb_queue_list *list); +static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock, + struct ntb_queue_list *list); +static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock, + struct ntb_queue_list *from, struct ntb_queue_list *to); +static void xeon_link_watchdog_hb(void *); + +static const struct ntb_ctx_ops ntb_transport_ops = { + .link_event = ntb_transport_event_callback, + .db_event = ntb_transport_doorbell_callback, +}; + +MALLOC_DEFINE(M_NTB_T, "ntb_transport", "ntb transport driver"); + +static inline void +iowrite32(uint32_t val, void *addr) +{ + + bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr, + val); +} + +/* Transport Init and teardown */ + +static void +xeon_link_watchdog_hb(void *arg) +{ + struct ntb_transport_ctx *nt; + + nt = arg; + NTB_SPAD_WRITE(nt->ntb, NTBT_WATCHDOG_SPAD, 0); + callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt); +} + +static int +ntb_transport_probe(device_t dev) +{ + + device_set_desc(dev, "NTB Transport"); + return (0); +} + +static int +ntb_transport_attach(device_t dev) +{ + struct ntb_transport_ctx *nt = device_get_softc(dev); + device_t ntb = device_get_parent(dev); + struct ntb_transport_mw *mw; + uint64_t qp_bitmap; + int rc; + unsigned i; + + nt->ntb = ntb; + nt->mw_count = NTB_MW_COUNT(ntb); + nt->mw_vec = malloc(nt->mw_count * sizeof(*nt->mw_vec), M_NTB_T, + M_WAITOK | M_ZERO); + for (i = 0; i < nt->mw_count; i++) { + mw = &nt->mw_vec[i]; + + rc = NTB_MW_GET_RANGE(ntb, i, &mw->phys_addr, &mw->vbase, + &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size, + &mw->addr_limit); + if (rc != 0) + goto err; + + mw->buff_size = 0; + mw->xlat_size = 0; + mw->virt_addr = NULL; + mw->dma_addr = 0; + + rc = NTB_MW_SET_WC(nt->ntb, i, VM_MEMATTR_WRITE_COMBINING); + if (rc) + ntb_printf(0, "Unable to set mw%d caching\n", i); + } + + qp_bitmap = NTB_DB_VALID_MASK(ntb); + nt->qp_count = flsll(qp_bitmap); + KASSERT(nt->qp_count != 0, ("bogus db bitmap")); + nt->qp_count -= 1; + + if (max_num_clients != 0 && max_num_clients < nt->qp_count) + nt->qp_count = max_num_clients; + else if (nt->mw_count < nt->qp_count) + nt->qp_count = nt->mw_count; + KASSERT(nt->qp_count <= QP_SETSIZE, ("invalid qp_count")); + + mtx_init(&nt->tx_lock, "ntb transport tx", NULL, MTX_DEF); + mtx_init(&nt->rx_lock, "ntb transport rx", NULL, MTX_DEF); + + nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_T, + M_WAITOK | M_ZERO); + + for (i = 0; i < nt->qp_count; i++) { + set_bit(i, &nt->qp_bitmap); + set_bit(i, &nt->qp_bitmap_free); + ntb_transport_init_queue(nt, i); + } + + callout_init(&nt->link_work, 0); + callout_init(&nt->link_watchdog, 0); + TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt); + + rc = NTB_SET_CTX(ntb, nt, &ntb_transport_ops); + if (rc != 0) + goto err; + + nt->link_is_up = false; + NTB_LINK_ENABLE(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + + callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt); + if (enable_xeon_watchdog != 0) + callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt); + + /* Attach children to this transport */ + device_add_child(dev, NULL, -1); + bus_generic_attach(dev); + + return (0); + +err: + free(nt->qp_vec, M_NTB_T); + free(nt->mw_vec, M_NTB_T); + return (rc); +} + +static int +ntb_transport_detach(device_t dev) +{ + struct ntb_transport_ctx *nt = device_get_softc(dev); + device_t ntb = nt->ntb; + struct _qpset qp_bitmap_alloc; + uint8_t i; + + /* Detach & delete all children */ + device_delete_children(dev); + + ntb_transport_link_cleanup(nt); + taskqueue_drain(taskqueue_swi, &nt->link_cleanup); + callout_drain(&nt->link_work); + callout_drain(&nt->link_watchdog); + + BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc); + BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free); + + /* Verify that all the QPs are freed */ + for (i = 0; i < nt->qp_count; i++) + if (test_bit(i, &qp_bitmap_alloc)) + ntb_transport_free_queue(&nt->qp_vec[i]); + + NTB_LINK_DISABLE(ntb); + NTB_CLEAR_CTX(ntb); + + for (i = 0; i < nt->mw_count; i++) + ntb_free_mw(nt, i); + + free(nt->qp_vec, M_NTB_T); + free(nt->mw_vec, M_NTB_T); + return (0); +} + +static void +ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num) +{ + struct ntb_transport_mw *mw; + struct ntb_transport_qp *qp; + vm_paddr_t mw_base; + uint64_t mw_size, qp_offset; + size_t tx_size; + unsigned num_qps_mw, mw_num, mw_count; + + mw_count = nt->mw_count; + mw_num = QP_TO_MW(nt, qp_num); + mw = &nt->mw_vec[mw_num]; + + qp = &nt->qp_vec[qp_num]; + qp->qp_num = qp_num; + qp->transport = nt; + qp->ntb = nt->ntb; + qp->client_ready = false; + qp->event_handler = NULL; + ntb_qp_link_down_reset(qp); + + if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count) + num_qps_mw = nt->qp_count / mw_count + 1; + else + num_qps_mw = nt->qp_count / mw_count; + + mw_base = mw->phys_addr; + mw_size = mw->phys_size; + + tx_size = mw_size / num_qps_mw; + qp_offset = tx_size * (qp_num / mw_count); + + qp->tx_mw = mw->vbase + qp_offset; + KASSERT(qp->tx_mw != NULL, ("uh oh?")); + + /* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */ + qp->tx_mw_phys = mw_base + qp_offset; + KASSERT(qp->tx_mw_phys != 0, ("uh oh?")); + + tx_size -= sizeof(struct ntb_rx_info); + qp->rx_info = (void *)(qp->tx_mw + tx_size); + + /* Due to house-keeping, there must be at least 2 buffs */ + qp->tx_max_frame = qmin(tx_size / 2, + transport_mtu + sizeof(struct ntb_payload_header)); + qp->tx_max_entry = tx_size / qp->tx_max_frame; + + callout_init(&qp->link_work, 0); + callout_init(&qp->rx_full, 1); + + mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN); + mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN); + TASK_INIT(&qp->rx_completion_task, 0, ntb_complete_rxc, qp); + TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp); + + STAILQ_INIT(&qp->rx_post_q); + STAILQ_INIT(&qp->rx_pend_q); + STAILQ_INIT(&qp->tx_free_q); + + callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); +} + +void +ntb_transport_free_queue(struct ntb_transport_qp *qp) +{ + struct ntb_queue_entry *entry; + + if (qp == NULL) + return; + + callout_drain(&qp->link_work); + + NTB_DB_SET_MASK(qp->ntb, 1ull << qp->qp_num); + taskqueue_drain(taskqueue_swi, &qp->rxc_db_work); + taskqueue_drain(taskqueue_swi, &qp->rx_completion_task); + + qp->cb_data = NULL; + qp->rx_handler = NULL; + qp->tx_handler = NULL; + qp->event_handler = NULL; + + while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q))) + free(entry, M_NTB_T); + + while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q))) + free(entry, M_NTB_T); + + while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) + free(entry, M_NTB_T); + + set_bit(qp->qp_num, &qp->transport->qp_bitmap_free); +} + +/** + * ntb_transport_create_queue - Create a new NTB transport layer queue + * @rx_handler: receive callback function + * @tx_handler: transmit callback function + * @event_handler: event callback function + * + * Create a new NTB transport layer queue and provide the queue with a callback + * routine for both transmit and receive. The receive callback routine will be + * used to pass up data when the transport has received it on the queue. The + * transmit callback routine will be called when the transport has completed the + * transmission of the data on the queue and the data is ready to be freed. + * + * RETURNS: pointer to newly created ntb_queue, NULL on error. + */ +struct ntb_transport_qp * +ntb_transport_create_queue(void *data, device_t dev, + const struct ntb_queue_handlers *handlers) +{ + struct ntb_transport_ctx *nt = device_get_softc(dev); + device_t ntb = device_get_parent(dev); + struct ntb_queue_entry *entry; + struct ntb_transport_qp *qp; + unsigned int free_queue; + int i; + + free_queue = ffs_bit(&nt->qp_bitmap); + if (free_queue == 0) + return (NULL); + + /* decrement free_queue to make it zero based */ + free_queue--; + + qp = &nt->qp_vec[free_queue]; + clear_bit(qp->qp_num, &nt->qp_bitmap_free); + qp->cb_data = data; + qp->rx_handler = handlers->rx_handler; + qp->tx_handler = handlers->tx_handler; + qp->event_handler = handlers->event_handler; + + for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { + entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO); + entry->cb_data = data; + entry->buf = NULL; + entry->len = transport_mtu; + ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q); + } + + for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { + entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO); + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); + } + + NTB_DB_CLEAR(ntb, 1ull << qp->qp_num); + NTB_DB_CLEAR_MASK(ntb, 1ull << qp->qp_num); + return (qp); +} + +/** + * ntb_transport_link_up - Notify NTB transport of client readiness to use queue + * @qp: NTB transport layer queue to be enabled + * + * Notify NTB transport layer of client readiness to use queue + */ +void +ntb_transport_link_up(struct ntb_transport_qp *qp) +{ + struct ntb_transport_ctx *nt = qp->transport; + + qp->client_ready = true; + + ntb_printf(2, "qp client ready\n"); + + if (nt->link_is_up) + callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); +} + + + +/* Transport Tx */ + +/** + * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry + * @qp: NTB transport layer queue the entry is to be enqueued on + * @cb: per buffer pointer for callback function to use + * @data: pointer to data buffer that will be sent + * @len: length of the data buffer + * + * Enqueue a new transmit buffer onto the transport queue from which a NTB + * payload will be transmitted. This assumes that a lock is being held to + * serialize access to the qp. + * + * RETURNS: An appropriate ERRNO error value on error, or zero for success. + */ +int +ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, + unsigned int len) +{ + struct ntb_queue_entry *entry; + int rc; + + if (qp == NULL || !qp->link_is_up || len == 0) { + CTR0(KTR_NTB, "TX: link not up"); + return (EINVAL); + } + + entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); + if (entry == NULL) { + CTR0(KTR_NTB, "TX: could not get entry from tx_free_q"); + qp->tx_err_no_buf++; + return (EBUSY); + } + CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry); + + entry->cb_data = cb; + entry->buf = data; + entry->len = len; + entry->flags = 0; + + mtx_lock(&qp->transport->tx_lock); + rc = ntb_process_tx(qp, entry); + mtx_unlock(&qp->transport->tx_lock); + if (rc != 0) { + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); + CTR1(KTR_NTB, + "TX: process_tx failed. Returning entry %p to tx_free_q", + entry); + } + return (rc); +} + +static int +ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry) +{ + void *offset; + + offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index; + CTR3(KTR_NTB, + "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u", + qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry); + if (qp->tx_index == qp->remote_rx_info->entry) { + CTR0(KTR_NTB, "TX: ring full"); + qp->tx_ring_full++; + return (EAGAIN); + } + + if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) { + if (qp->tx_handler != NULL) + qp->tx_handler(qp, qp->cb_data, entry->buf, + EIO); + else + m_freem(entry->buf); + + entry->buf = NULL; + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); + CTR1(KTR_NTB, + "TX: frame too big. returning entry %p to tx_free_q", + entry); + return (0); + } + CTR2(KTR_NTB, "TX: copying entry %p to offset %p", entry, offset); + ntb_memcpy_tx(qp, entry, offset); + + qp->tx_index++; + qp->tx_index %= qp->tx_max_entry; + + qp->tx_pkts++; + + return (0); +} + +static void +ntb_memcpy_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry, + void *offset) +{ + struct ntb_payload_header *hdr; + + /* This piece is from Linux' ntb_async_tx() */ + hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame - + sizeof(struct ntb_payload_header)); + entry->x_hdr = hdr; + iowrite32(entry->len, &hdr->len); + iowrite32(qp->tx_pkts, &hdr->ver); + + /* This piece is ntb_memcpy_tx() */ + CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset); + if (entry->buf != NULL) { + m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset); + + /* + * Ensure that the data is fully copied before setting the + * flags + */ + wmb(); + } + + /* The rest is ntb_tx_copy_callback() */ + iowrite32(entry->flags | NTBT_DESC_DONE_FLAG, &hdr->flags); + CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr); + + NTB_PEER_DB_SET(qp->ntb, 1ull << qp->qp_num); + + /* + * The entry length can only be zero if the packet is intended to be a + * "link down" or similar. Since no payload is being sent in these + * cases, there is nothing to add to the completion queue. + */ + if (entry->len > 0) { + qp->tx_bytes += entry->len; + + if (qp->tx_handler) + qp->tx_handler(qp, qp->cb_data, entry->buf, + entry->len); + else + m_freem(entry->buf); + entry->buf = NULL; + } + + CTR3(KTR_NTB, + "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning " + "to tx_free_q", entry, hdr->ver, hdr->flags); + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); +} + +/* Transport Rx */ +static void +ntb_transport_rxc_db(void *arg, int pending __unused) +{ + struct ntb_transport_qp *qp = arg; + ntb_q_idx_t i; + int rc; + + /* + * Limit the number of packets processed in a single interrupt to + * provide fairness to others + */ + CTR0(KTR_NTB, "RX: transport_rx"); + mtx_lock(&qp->transport->rx_lock); + for (i = 0; i < qp->rx_max_entry; i++) { + rc = ntb_process_rxc(qp); + if (rc != 0) { + CTR0(KTR_NTB, "RX: process_rxc failed"); + break; + } + } + mtx_unlock(&qp->transport->rx_lock); + + if (i == qp->rx_max_entry) + taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work); + else if ((NTB_DB_READ(qp->ntb) & (1ull << qp->qp_num)) != 0) { + /* If db is set, clear it and read it back to commit clear. */ + NTB_DB_CLEAR(qp->ntb, 1ull << qp->qp_num); + (void)NTB_DB_READ(qp->ntb); + + /* + * An interrupt may have arrived between finishing + * ntb_process_rxc and clearing the doorbell bit: there might + * be some more work to do. + */ + taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work); + } +} + +static int +ntb_process_rxc(struct ntb_transport_qp *qp) +{ + struct ntb_payload_header *hdr; + struct ntb_queue_entry *entry; + caddr_t offset; + + offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index; + hdr = (void *)(offset + qp->rx_max_frame - + sizeof(struct ntb_payload_header)); + + CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index); + if ((hdr->flags & NTBT_DESC_DONE_FLAG) == 0) { + CTR0(KTR_NTB, "RX: hdr not done"); + qp->rx_ring_empty++; + return (EAGAIN); + } + + if ((hdr->flags & NTBT_LINK_DOWN_FLAG) != 0) { + CTR0(KTR_NTB, "RX: link down"); + ntb_qp_link_down(qp); + hdr->flags = 0; + return (EAGAIN); + } + + if (hdr->ver != (uint32_t)qp->rx_pkts) { + CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). " + "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts); + qp->rx_err_ver++; + return (EIO); + } + + entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q); + if (entry == NULL) { + qp->rx_err_no_buf++; + CTR0(KTR_NTB, "RX: No entries in rx_pend_q"); + return (EAGAIN); + } + callout_stop(&qp->rx_full); + CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry); + + entry->x_hdr = hdr; + entry->index = qp->rx_index; + + if (hdr->len > entry->len) { + CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju", + (uintmax_t)hdr->len, (uintmax_t)entry->len); + qp->rx_err_oflow++; + + entry->len = -EIO; + entry->flags |= NTBT_DESC_DONE_FLAG; + + taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task); + } else { + qp->rx_bytes += hdr->len; + qp->rx_pkts++; + + CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts); + + entry->len = hdr->len; + + ntb_memcpy_rx(qp, entry, offset); + } + + qp->rx_index++; + qp->rx_index %= qp->rx_max_entry; + return (0); +} + +static void +ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry, + void *offset) +{ + struct ifnet *ifp = entry->cb_data; + unsigned int len = entry->len; + + CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset); + + entry->buf = (void *)m_devget(offset, len, 0, ifp, NULL); + + /* Ensure that the data is globally visible before clearing the flag */ + wmb(); + + CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, m); + ntb_rx_copy_callback(qp, entry); +} + +static inline void +ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data) +{ + struct ntb_queue_entry *entry; + + entry = data; + entry->flags |= NTBT_DESC_DONE_FLAG; + taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task); +} + +static void +ntb_complete_rxc(void *arg, int pending) +{ + struct ntb_transport_qp *qp = arg; + struct ntb_queue_entry *entry; + struct mbuf *m; + unsigned len; + + CTR0(KTR_NTB, "RX: rx_completion_task"); + + mtx_lock_spin(&qp->ntb_rx_q_lock); + + while (!STAILQ_EMPTY(&qp->rx_post_q)) { + entry = STAILQ_FIRST(&qp->rx_post_q); + if ((entry->flags & NTBT_DESC_DONE_FLAG) == 0) + break; + + entry->x_hdr->flags = 0; + iowrite32(entry->index, &qp->rx_info->entry); + + STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry); + + len = entry->len; + m = entry->buf; + + /* + * Re-initialize queue_entry for reuse; rx_handler takes + * ownership of the mbuf. + */ + entry->buf = NULL; + entry->len = transport_mtu; + entry->cb_data = qp->cb_data; + + STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry); + + mtx_unlock_spin(&qp->ntb_rx_q_lock); + + CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m); + if (qp->rx_handler != NULL && qp->client_ready) + qp->rx_handler(qp, qp->cb_data, m, len); + else + m_freem(m); + + mtx_lock_spin(&qp->ntb_rx_q_lock); + } + + mtx_unlock_spin(&qp->ntb_rx_q_lock); +} + +static void +ntb_transport_doorbell_callback(void *data, uint32_t vector) +{ + struct ntb_transport_ctx *nt = data; + struct ntb_transport_qp *qp; + struct _qpset db_bits; + uint64_t vec_mask; + unsigned qp_num; + + BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &db_bits); + BIT_NAND(QP_SETSIZE, &db_bits, &nt->qp_bitmap_free); + + vec_mask = NTB_DB_VECTOR_MASK(nt->ntb, vector); + while (vec_mask != 0) { + qp_num = ffsll(vec_mask) - 1; + + if (test_bit(qp_num, &db_bits)) { + qp = &nt->qp_vec[qp_num]; + taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work); + } + + vec_mask &= ~(1ull << qp_num); + } +} + +/* Link Event handler */ +static void +ntb_transport_event_callback(void *data) +{ + struct ntb_transport_ctx *nt = data; + + if (NTB_LINK_IS_UP(nt->ntb, NULL, NULL)) { + ntb_printf(1, "HW link up\n"); + callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt); + } else { + ntb_printf(1, "HW link down\n"); + taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup); + } +} + +/* Link bring up */ +static void +ntb_transport_link_work(void *arg) +{ + struct ntb_transport_ctx *nt = arg; + device_t ntb = nt->ntb; + struct ntb_transport_qp *qp; + uint64_t val64, size; + uint32_t val; + unsigned i; + int rc; + + /* send the local info, in the opposite order of the way we read it */ + for (i = 0; i < nt->mw_count; i++) { + size = nt->mw_vec[i].phys_size; + + if (max_mw_size != 0 && size > max_mw_size) + size = max_mw_size; + + NTB_PEER_SPAD_WRITE(ntb, NTBT_MW0_SZ_HIGH + (i * 2), + size >> 32); + NTB_PEER_SPAD_WRITE(ntb, NTBT_MW0_SZ_LOW + (i * 2), size); + } + + NTB_PEER_SPAD_WRITE(ntb, NTBT_NUM_MWS, nt->mw_count); + + NTB_PEER_SPAD_WRITE(ntb, NTBT_NUM_QPS, nt->qp_count); + + NTB_PEER_SPAD_WRITE(ntb, NTBT_VERSION, NTB_TRANSPORT_VERSION); + + /* Query the remote side for its info */ + val = 0; + NTB_SPAD_READ(ntb, NTBT_VERSION, &val); + if (val != NTB_TRANSPORT_VERSION) + goto out; + + NTB_SPAD_READ(ntb, NTBT_NUM_QPS, &val); + if (val != nt->qp_count) + goto out; + + NTB_SPAD_READ(ntb, NTBT_NUM_MWS, &val); + if (val != nt->mw_count) + goto out; + + for (i = 0; i < nt->mw_count; i++) { + NTB_SPAD_READ(ntb, NTBT_MW0_SZ_HIGH + (i * 2), &val); + val64 = (uint64_t)val << 32; + + NTB_SPAD_READ(ntb, NTBT_MW0_SZ_LOW + (i * 2), &val); + val64 |= val; + + rc = ntb_set_mw(nt, i, val64); + if (rc != 0) + goto free_mws; + } + + nt->link_is_up = true; + ntb_printf(1, "transport link up\n"); + + for (i = 0; i < nt->qp_count; i++) { + qp = &nt->qp_vec[i]; + + ntb_transport_setup_qp_mw(nt, i); + + if (qp->client_ready) + callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); + } + + return; + +free_mws: + for (i = 0; i < nt->mw_count; i++) + ntb_free_mw(nt, i); +out: + if (NTB_LINK_IS_UP(ntb, NULL, NULL)) + callout_reset(&nt->link_work, + NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt); +} + +static int +ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size) +{ + struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; + size_t xlat_size, buff_size; + int rc; + + if (size == 0) + return (EINVAL); + + xlat_size = roundup(size, mw->xlat_align_size); + buff_size = xlat_size; + + /* No need to re-setup */ + if (mw->xlat_size == xlat_size) + return (0); + + if (mw->buff_size != 0) + ntb_free_mw(nt, num_mw); + + /* Alloc memory for receiving data. Must be aligned */ + mw->xlat_size = xlat_size; + mw->buff_size = buff_size; + + mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_T, M_ZERO, 0, + mw->addr_limit, mw->xlat_align, 0); + if (mw->virt_addr == NULL) { + ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n", + mw->buff_size, mw->xlat_size); + mw->xlat_size = 0; + mw->buff_size = 0; + return (ENOMEM); + } + /* TODO: replace with bus_space_* functions */ + mw->dma_addr = vtophys(mw->virt_addr); + + /* + * Ensure that the allocation from contigmalloc is aligned as + * requested. XXX: This may not be needed -- brought in for parity + * with the Linux driver. + */ + if (mw->dma_addr % mw->xlat_align != 0) { + ntb_printf(0, + "DMA memory 0x%jx not aligned to BAR size 0x%zx\n", + (uintmax_t)mw->dma_addr, size); + ntb_free_mw(nt, num_mw); + return (ENOMEM); + } + + /* Notify HW the memory location of the receive buffer */ + rc = NTB_MW_SET_TRANS(nt->ntb, num_mw, mw->dma_addr, mw->xlat_size); + if (rc) { + ntb_printf(0, "Unable to set mw%d translation\n", num_mw); + ntb_free_mw(nt, num_mw); + return (rc); + } + + return (0); +} + +static void +ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) +{ + struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; + + if (mw->virt_addr == NULL) + return; + + NTB_MW_CLEAR_TRANS(nt->ntb, num_mw); + contigfree(mw->virt_addr, mw->xlat_size, M_NTB_T); + mw->xlat_size = 0; + mw->buff_size = 0; + mw->virt_addr = NULL; +} + +static int +ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num) +{ + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; + struct ntb_transport_mw *mw; + void *offset; + ntb_q_idx_t i; + size_t rx_size; + unsigned num_qps_mw, mw_num, mw_count; + + mw_count = nt->mw_count; + mw_num = QP_TO_MW(nt, qp_num); + mw = &nt->mw_vec[mw_num]; + + if (mw->virt_addr == NULL) + return (ENOMEM); + + if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count) + num_qps_mw = nt->qp_count / mw_count + 1; + else + num_qps_mw = nt->qp_count / mw_count; + + rx_size = mw->xlat_size / num_qps_mw; + qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count); + rx_size -= sizeof(struct ntb_rx_info); + + qp->remote_rx_info = (void*)(qp->rx_buff + rx_size); + + /* Due to house-keeping, there must be at least 2 buffs */ + qp->rx_max_frame = qmin(rx_size / 2, + transport_mtu + sizeof(struct ntb_payload_header)); + qp->rx_max_entry = rx_size / qp->rx_max_frame; + qp->rx_index = 0; + + qp->remote_rx_info->entry = qp->rx_max_entry - 1; + + /* Set up the hdr offsets with 0s */ + for (i = 0; i < qp->rx_max_entry; i++) { + offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) - + sizeof(struct ntb_payload_header)); + memset(offset, 0, sizeof(struct ntb_payload_header)); + } + + qp->rx_pkts = 0; + qp->tx_pkts = 0; + qp->tx_index = 0; + + return (0); +} + +static void +ntb_qp_link_work(void *arg) +{ + struct ntb_transport_qp *qp = arg; + device_t ntb = qp->ntb; + struct ntb_transport_ctx *nt = qp->transport; + uint32_t val, dummy; + + NTB_SPAD_READ(ntb, NTBT_QP_LINKS, &val); + + NTB_PEER_SPAD_WRITE(ntb, NTBT_QP_LINKS, val | (1ull << qp->qp_num)); + + /* query remote spad for qp ready bits */ + NTB_PEER_SPAD_READ(ntb, NTBT_QP_LINKS, &dummy); + + /* See if the remote side is up */ + if ((val & (1ull << qp->qp_num)) != 0) { + ntb_printf(2, "qp link up\n"); + qp->link_is_up = true; + + if (qp->event_handler != NULL) + qp->event_handler(qp->cb_data, NTB_LINK_UP); + + taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work); + } else if (nt->link_is_up) + callout_reset(&qp->link_work, + NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp); +} + +/* Link down event*/ +static void +ntb_transport_link_cleanup(struct ntb_transport_ctx *nt) +{ + struct ntb_transport_qp *qp; + struct _qpset qp_bitmap_alloc; + unsigned i; + + BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc); + BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free); + + /* Pass along the info to any clients */ + for (i = 0; i < nt->qp_count; i++) + if (test_bit(i, &qp_bitmap_alloc)) { + qp = &nt->qp_vec[i]; + ntb_qp_link_cleanup(qp); + callout_drain(&qp->link_work); + } + + if (!nt->link_is_up) + callout_drain(&nt->link_work); + + /* + * The scratchpad registers keep the values if the remote side + * goes down, blast them now to give them a sane value the next + * time they are accessed + */ + for (i = 0; i < NTBT_MAX_SPAD; i++) + NTB_SPAD_WRITE(nt->ntb, i, 0); +} + +static void +ntb_transport_link_cleanup_work(void *arg, int pending __unused) +{ + + ntb_transport_link_cleanup(arg); +} + +static void +ntb_qp_link_down(struct ntb_transport_qp *qp) +{ + + ntb_qp_link_cleanup(qp); +} + +static void +ntb_qp_link_down_reset(struct ntb_transport_qp *qp) +{ + + qp->link_is_up = false; + + qp->tx_index = qp->rx_index = 0; + qp->tx_bytes = qp->rx_bytes = 0; + qp->tx_pkts = qp->rx_pkts = 0; + + qp->rx_ring_empty = 0; + qp->tx_ring_full = 0; + + qp->rx_err_no_buf = qp->tx_err_no_buf = 0; + qp->rx_err_oflow = qp->rx_err_ver = 0; +} + +static void +ntb_qp_link_cleanup(struct ntb_transport_qp *qp) +{ + struct ntb_transport_ctx *nt = qp->transport; + + callout_drain(&qp->link_work); + ntb_qp_link_down_reset(qp); + + if (qp->event_handler != NULL) + qp->event_handler(qp->cb_data, NTB_LINK_DOWN); + + if (nt->link_is_up) + callout_reset(&qp->link_work, + NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp); +} + +/* Link commanded down */ +/** + * ntb_transport_link_down - Notify NTB transport to no longer enqueue data + * @qp: NTB transport layer queue to be disabled + * + * Notify NTB transport layer of client's desire to no longer receive data on + * transport queue specified. It is the client's responsibility to ensure all + * entries on queue are purged or otherwise handled appropriately. + */ +void +ntb_transport_link_down(struct ntb_transport_qp *qp) +{ + uint32_t val; + + if (qp == NULL) + return; + + qp->client_ready = false; + + NTB_SPAD_READ(qp->ntb, NTBT_QP_LINKS, &val); + + NTB_PEER_SPAD_WRITE(qp->ntb, NTBT_QP_LINKS, + val & ~(1 << qp->qp_num)); + + if (qp->link_is_up) + ntb_send_link_down(qp); + else + callout_drain(&qp->link_work); +} + +/** + * ntb_transport_link_query - Query transport link state + * @qp: NTB transport layer queue to be queried + * + * Query connectivity to the remote system of the NTB transport queue + * + * RETURNS: true for link up or false for link down + */ +bool +ntb_transport_link_query(struct ntb_transport_qp *qp) +{ + if (qp == NULL) + return (false); + + return (qp->link_is_up); +} + +static void +ntb_send_link_down(struct ntb_transport_qp *qp) +{ + struct ntb_queue_entry *entry; + int i, rc; + + if (!qp->link_is_up) + return; + + for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) { + entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); + if (entry != NULL) + break; + pause("NTB Wait for link down", hz / 10); + } + + if (entry == NULL) + return; + + entry->cb_data = NULL; + entry->buf = NULL; + entry->len = 0; + entry->flags = NTBT_LINK_DOWN_FLAG; + + mtx_lock(&qp->transport->tx_lock); + rc = ntb_process_tx(qp, entry); + if (rc != 0) + printf("ntb: Failed to send link down\n"); + mtx_unlock(&qp->transport->tx_lock); + + ntb_qp_link_down_reset(qp); +} + + +/* List Management */ + +static void +ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry, + struct ntb_queue_list *list) +{ + + mtx_lock_spin(lock); + STAILQ_INSERT_TAIL(list, entry, entry); + mtx_unlock_spin(lock); +} + +static struct ntb_queue_entry * +ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list) +{ + struct ntb_queue_entry *entry; + + mtx_lock_spin(lock); + if (STAILQ_EMPTY(list)) { + entry = NULL; + goto out; + } + entry = STAILQ_FIRST(list); + STAILQ_REMOVE_HEAD(list, entry); +out: + mtx_unlock_spin(lock); + + return (entry); +} + +static struct ntb_queue_entry * +ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from, + struct ntb_queue_list *to) +{ + struct ntb_queue_entry *entry; + + mtx_lock_spin(lock); + if (STAILQ_EMPTY(from)) { + entry = NULL; + goto out; + } + entry = STAILQ_FIRST(from); + STAILQ_REMOVE_HEAD(from, entry); + STAILQ_INSERT_TAIL(to, entry, entry); + +out: + mtx_unlock_spin(lock); + return (entry); +} + +/** + * ntb_transport_qp_num - Query the qp number + * @qp: NTB transport layer queue to be queried + * + * Query qp number of the NTB transport queue + * + * RETURNS: a zero based number specifying the qp number + */ +unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp) +{ + if (qp == NULL) + return 0; + + return (qp->qp_num); +} + +/** + * ntb_transport_max_size - Query the max payload size of a qp + * @qp: NTB transport layer queue to be queried + * + * Query the maximum payload size permissible on the given qp + * + * RETURNS: the max payload size of a qp + */ +unsigned int +ntb_transport_max_size(struct ntb_transport_qp *qp) +{ + + if (qp == NULL) + return (0); + + return (qp->tx_max_frame - sizeof(struct ntb_payload_header)); +} + +unsigned int +ntb_transport_tx_free_entry(struct ntb_transport_qp *qp) +{ + unsigned int head = qp->tx_index; + unsigned int tail = qp->remote_rx_info->entry; + + return (tail >= head ? tail - head : qp->tx_max_entry + tail - head); +} + +static device_method_t ntb_transport_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ntb_transport_probe), + DEVMETHOD(device_attach, ntb_transport_attach), + DEVMETHOD(device_detach, ntb_transport_detach), + DEVMETHOD_END +}; + +devclass_t ntb_transport_devclass; +static DEFINE_CLASS_0(ntb_transport, ntb_transport_driver, + ntb_transport_methods, sizeof(struct ntb_transport_ctx)); +DRIVER_MODULE(ntb_transport, ntb_hw, ntb_transport_driver, + ntb_transport_devclass, NULL, NULL); +MODULE_DEPEND(ntb_transport, ntb, 1, 1, 1); +MODULE_VERSION(ntb_transport, 1); diff --git a/sys/dev/ntb/ntb_transport.h b/sys/dev/ntb/ntb_transport.h new file mode 100644 index 000000000..deb85688f --- /dev/null +++ b/sys/dev/ntb/ntb_transport.h @@ -0,0 +1,60 @@ +/*- + * Copyright (c) 2016 Alexander Motin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +struct ntb_transport_qp; + +extern devclass_t ntb_transport_devclass; + +enum ntb_link_event { + NTB_LINK_DOWN = 0, + NTB_LINK_UP, +}; + +struct ntb_queue_handlers { + void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*event_handler)(void *data, enum ntb_link_event status); +}; + +unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); +unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); +struct ntb_transport_qp * +ntb_transport_create_queue(void *data, device_t dev, + const struct ntb_queue_handlers *handlers); +void ntb_transport_free_queue(struct ntb_transport_qp *qp); +int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, + unsigned int len); +int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, + unsigned int len); +void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len); +void ntb_transport_link_up(struct ntb_transport_qp *qp); +void ntb_transport_link_down(struct ntb_transport_qp *qp); +bool ntb_transport_link_query(struct ntb_transport_qp *qp); +unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp); diff --git a/sys/modules/ntb/Makefile b/sys/modules/ntb/Makefile index a5169a00e..3eaf7516a 100644 --- a/sys/modules/ntb/Makefile +++ b/sys/modules/ntb/Makefile @@ -1,5 +1,5 @@ # $FreeBSD$ -SUBDIR= ntb_hw if_ntb +SUBDIR= ntb ntb_hw ntb_transport if_ntb .include diff --git a/sys/modules/ntb/ntb/Makefile b/sys/modules/ntb/ntb/Makefile new file mode 100644 index 000000000..a343f28e8 --- /dev/null +++ b/sys/modules/ntb/ntb/Makefile @@ -0,0 +1,11 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../dev/ntb + +KMOD = ntb +SRCS = ntb.c ntb_if.c +SRCS += device_if.h bus_if.h ntb_if.h + +MFILES= kern/bus_if.m kern/device_if.m dev/ntb/ntb_if.m + +.include diff --git a/sys/modules/ntb/ntb_hw/Makefile b/sys/modules/ntb/ntb_hw/Makefile index fc46b4682..5240411f8 100644 --- a/sys/modules/ntb/ntb_hw/Makefile +++ b/sys/modules/ntb/ntb_hw/Makefile @@ -4,6 +4,8 @@ KMOD = ntb_hw SRCS = ntb_hw.c -SRCS += device_if.h bus_if.h pci_if.h +SRCS += device_if.h bus_if.h pci_if.h ntb_if.h + +MFILES= kern/bus_if.m kern/device_if.m dev/pci/pci_if.m dev/ntb/ntb_if.m .include diff --git a/sys/modules/ntb/ntb_transport/Makefile b/sys/modules/ntb/ntb_transport/Makefile new file mode 100644 index 000000000..505560049 --- /dev/null +++ b/sys/modules/ntb/ntb_transport/Makefile @@ -0,0 +1,11 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../dev/ntb + +KMOD = ntb_transport +SRCS = ntb_transport.c +SRCS += device_if.h bus_if.h ntb_if.h + +MFILES= kern/bus_if.m kern/device_if.m dev/ntb/ntb_if.m + +.include -- 2.45.0