2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
3 * Copyright (C) 2013 Intel Corporation
4 * Copyright (C) 2015 EMC Corporation
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31 * two or more systems using a PCI-e links, providing remote memory access.
33 * This module contains a driver for simulated Ethernet device, using
34 * underlying NTB Transport device.
36 * NOTE: Much of the code in this module is shared with Linux. Any patches may
37 * be picked up and redistributed in Linux with a dual GPL/BSD license.
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
43 #include <sys/param.h>
44 #include <sys/kernel.h>
45 #include <sys/systm.h>
46 #include <sys/buf_ring.h>
48 #include <sys/limits.h>
49 #include <sys/module.h>
50 #include <sys/socket.h>
51 #include <sys/sockio.h>
52 #include <sys/sysctl.h>
53 #include <sys/taskqueue.h>
56 #include <net/if_media.h>
57 #include <net/if_types.h>
58 #include <net/if_media.h>
59 #include <net/if_var.h>
61 #include <net/ethernet.h>
63 #include <machine/bus.h>
65 #include "../ntb_transport.h"
67 #define KTR_NTB KTR_SPARE3
68 #define NTB_MEDIATYPE (IFM_ETHER | IFM_AUTO | IFM_FDX)
70 #define NTB_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
71 #define NTB_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
72 #define NTB_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
74 CSUM_IP_CHECKED | CSUM_IP_VALID | \
77 static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb");
79 static unsigned g_if_ntb_num_queues = UINT_MAX;
80 SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
81 &g_if_ntb_num_queues, 0, "Number of queues per interface");
83 struct ntb_net_queue {
84 struct ntb_net_ctx *sc;
86 struct ntb_transport_qp *qp;
89 struct taskqueue *tx_tq;
91 struct callout queue_full;
97 u_char eaddr[ETHER_ADDR_LEN];
99 struct ntb_net_queue *queues;
103 static int ntb_net_probe(device_t dev);
104 static int ntb_net_attach(device_t dev);
105 static int ntb_net_detach(device_t dev);
106 static void ntb_net_init(void *arg);
107 static int ntb_ifmedia_upd(struct ifnet *);
108 static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
109 static int ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
110 static int ntb_transmit(struct ifnet *ifp, struct mbuf *m);
111 static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
112 void *data, int len);
113 static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
114 void *data, int len);
115 static void ntb_net_event_handler(void *data, enum ntb_link_event status);
116 static void ntb_handle_tx(void *arg, int pending);
117 static void ntb_qp_full(void *arg);
118 static void ntb_qflush(struct ifnet *ifp);
119 static void create_random_local_eui48(u_char *eaddr);
122 ntb_net_probe(device_t dev)
125 device_set_desc(dev, "NTB Network Interface");
130 ntb_net_attach(device_t dev)
132 struct ntb_net_ctx *sc = device_get_softc(dev);
133 struct ntb_net_queue *q;
135 struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
136 ntb_net_tx_handler, ntb_net_event_handler };
139 ifp = sc->ifp = if_alloc(IFT_ETHER);
141 printf("ntb: Cannot allocate ifnet structure\n");
144 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
146 sc->num_queues = min(g_if_ntb_num_queues,
147 ntb_transport_queue_count(dev));
148 sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
149 M_DEVBUF, M_WAITOK | M_ZERO);
151 for (i = 0; i < sc->num_queues; i++) {
155 q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
158 sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
159 mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
160 q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
161 TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
162 q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
163 taskqueue_thread_enqueue, &q->tx_tq);
164 taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
165 device_get_nameunit(dev), i);
166 callout_init(&q->queue_full, 1);
169 device_printf(dev, "%d queue(s)\n", sc->num_queues);
171 ifp->if_init = ntb_net_init;
173 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
174 ifp->if_ioctl = ntb_ioctl;
175 ifp->if_transmit = ntb_transmit;
176 ifp->if_qflush = ntb_qflush;
177 create_random_local_eui48(sc->eaddr);
178 ether_ifattach(ifp, sc->eaddr);
179 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
180 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE;
181 ifp->if_capenable = IFCAP_JUMBO_MTU | IFCAP_LINKSTATE;
182 ifp->if_mtu = sc->mtu - ETHER_HDR_LEN;
184 ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
186 ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
187 ifmedia_set(&sc->media, NTB_MEDIATYPE);
189 for (i = 0; i < sc->num_queues; i++)
190 ntb_transport_link_up(sc->queues[i].qp);
195 ntb_net_detach(device_t dev)
197 struct ntb_net_ctx *sc = device_get_softc(dev);
198 struct ntb_net_queue *q;
201 for (i = 0; i < sc->num_queues; i++)
202 ntb_transport_link_down(sc->queues[i].qp);
203 ether_ifdetach(sc->ifp);
205 ifmedia_removeall(&sc->media);
206 for (i = 0; i < sc->num_queues; i++) {
208 ntb_transport_free_queue(q->qp);
209 buf_ring_free(q->br, M_DEVBUF);
210 callout_drain(&q->queue_full);
211 taskqueue_drain_all(q->tx_tq);
212 mtx_destroy(&q->tx_lock);
214 free(sc->queues, M_DEVBUF);
218 /* Network device interface */
221 ntb_net_init(void *arg)
223 struct ntb_net_ctx *sc = arg;
224 struct ifnet *ifp = sc->ifp;
226 ifp->if_drv_flags |= IFF_DRV_RUNNING;
227 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
228 if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
229 LINK_STATE_UP : LINK_STATE_DOWN);
233 ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
235 struct ntb_net_ctx *sc = ifp->if_softc;
236 struct ifreq *ifr = (struct ifreq *)data;
247 if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
252 ifp->if_mtu = ifr->ifr_mtu;
258 error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
262 if (ifr->ifr_reqcap & IFCAP_RXCSUM)
263 ifp->if_capenable |= IFCAP_RXCSUM;
265 ifp->if_capenable &= ~IFCAP_RXCSUM;
266 if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
267 ifp->if_capenable |= IFCAP_TXCSUM;
268 ifp->if_hwassist |= NTB_CSUM_FEATURES;
270 ifp->if_capenable &= ~IFCAP_TXCSUM;
271 ifp->if_hwassist &= ~NTB_CSUM_FEATURES;
273 if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
274 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
276 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
277 if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
278 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
279 ifp->if_hwassist |= NTB_CSUM_FEATURES6;
281 ifp->if_capenable &= ~IFCAP_TXCSUM_IPV6;
282 ifp->if_hwassist &= ~NTB_CSUM_FEATURES6;
287 error = ether_ioctl(ifp, command, data);
295 ntb_ifmedia_upd(struct ifnet *ifp)
297 struct ntb_net_ctx *sc = ifp->if_softc;
298 struct ifmedia *ifm = &sc->media;
300 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
307 ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
309 struct ntb_net_ctx *sc = ifp->if_softc;
311 ifmr->ifm_status = IFM_AVALID;
312 ifmr->ifm_active = NTB_MEDIATYPE;
313 if (ntb_transport_link_query(sc->queues[0].qp))
314 ifmr->ifm_status |= IFM_ACTIVE;
318 ntb_transmit_locked(struct ntb_net_queue *q)
320 struct ifnet *ifp = q->ifp;
325 CTR0(KTR_NTB, "TX: ntb_transmit_locked");
326 while ((m = drbr_peek(ifp, q->br)) != NULL) {
327 CTR1(KTR_NTB, "TX: start mbuf %p", m);
328 ETHER_BPF_MTAP(ifp, m);
329 len = m->m_pkthdr.len;
331 rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
333 CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
335 drbr_putback(ifp, q->br, m);
336 callout_reset_sbt(&q->queue_full,
337 SBT_1MS / 4, SBT_1MS / 4,
341 drbr_advance(ifp, q->br);
342 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
346 drbr_advance(ifp, q->br);
347 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
348 if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
349 if (mflags & M_MCAST)
350 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
355 ntb_transmit(struct ifnet *ifp, struct mbuf *m)
357 struct ntb_net_ctx *sc = ifp->if_softc;
358 struct ntb_net_queue *q;
361 CTR0(KTR_NTB, "TX: ntb_transmit");
362 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
363 i = m->m_pkthdr.flowid % sc->num_queues;
365 i = curcpu % sc->num_queues;
368 error = drbr_enqueue(ifp, q->br, m);
372 if (mtx_trylock(&q->tx_lock)) {
373 ntb_transmit_locked(q);
374 mtx_unlock(&q->tx_lock);
376 taskqueue_enqueue(q->tx_tq, &q->tx_task);
381 ntb_handle_tx(void *arg, int pending)
383 struct ntb_net_queue *q = arg;
385 mtx_lock(&q->tx_lock);
386 ntb_transmit_locked(q);
387 mtx_unlock(&q->tx_lock);
391 ntb_qp_full(void *arg)
393 struct ntb_net_queue *q = arg;
395 CTR0(KTR_NTB, "TX: qp_full callout");
396 if (ntb_transport_tx_free_entry(q->qp) > 0)
397 taskqueue_enqueue(q->tx_tq, &q->tx_task);
399 callout_schedule_sbt(&q->queue_full,
400 SBT_1MS / 4, SBT_1MS / 4, 0);
404 ntb_qflush(struct ifnet *ifp)
406 struct ntb_net_ctx *sc = ifp->if_softc;
407 struct ntb_net_queue *q;
411 for (i = 0; i < sc->num_queues; i++) {
413 mtx_lock(&q->tx_lock);
414 while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
416 mtx_unlock(&q->tx_lock);
421 /* Network Device Callbacks */
423 ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
428 CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
432 ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
435 struct ntb_net_queue *q = qp_data;
436 struct ntb_net_ctx *sc = q->sc;
437 struct mbuf *m = data;
438 struct ifnet *ifp = q->ifp;
441 CTR1(KTR_NTB, "RX: rx handler (%d)", len);
443 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
447 m->m_pkthdr.rcvif = ifp;
448 if (sc->num_queues > 1) {
449 m->m_pkthdr.flowid = q - sc->queues;
450 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
452 if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
453 m_copydata(m, 12, 2, (void *)&proto);
454 switch (ntohs(proto)) {
456 if (ifp->if_capenable & IFCAP_RXCSUM) {
457 m->m_pkthdr.csum_data = 0xffff;
458 m->m_pkthdr.csum_flags = NTB_CSUM_SET;
462 if (ifp->if_capenable & IFCAP_RXCSUM_IPV6) {
463 m->m_pkthdr.csum_data = 0xffff;
464 m->m_pkthdr.csum_flags = NTB_CSUM_SET;
469 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
470 ifp->if_input(ifp, m);
474 ntb_net_event_handler(void *data, enum ntb_link_event status)
476 struct ntb_net_queue *q = data;
481 new_state = LINK_STATE_DOWN;
484 new_state = LINK_STATE_UP;
487 new_state = LINK_STATE_UNKNOWN;
490 if_link_state_change(q->ifp, new_state);
493 /* Helper functions */
494 /* TODO: This too should really be part of the kernel */
495 #define EUI48_MULTICAST 1 << 0
496 #define EUI48_LOCALLY_ADMINISTERED 1 << 1
498 create_random_local_eui48(u_char *eaddr)
500 static uint8_t counter = 0;
502 eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
503 arc4rand(&eaddr[1], 4, 0);
504 eaddr[5] = counter++;
507 static device_method_t ntb_net_methods[] = {
508 /* Device interface */
509 DEVMETHOD(device_probe, ntb_net_probe),
510 DEVMETHOD(device_attach, ntb_net_attach),
511 DEVMETHOD(device_detach, ntb_net_detach),
515 devclass_t ntb_net_devclass;
516 static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
517 sizeof(struct ntb_net_ctx));
518 DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass,
520 MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
521 MODULE_VERSION(if_ntb, 1);