2 * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include "opt_inet6.h"
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/devctl.h>
35 #include <sys/eventhandler.h>
36 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/socket.h>
40 #include <sys/sysctl.h>
43 #include <net/ethernet.h>
44 #include <net/infiniband.h>
46 #include <net/if_var.h>
47 #include <net/if_dl.h>
48 #include <net/if_media.h>
49 #include <net/if_lagg.h>
50 #include <net/if_llatbl.h>
51 #include <net/if_types.h>
52 #include <net/netisr.h>
53 #include <net/route.h>
54 #include <netinet/if_ether.h>
55 #include <netinet/in.h>
56 #include <netinet/ip6.h>
57 #include <netinet6/in6_var.h>
58 #include <netinet6/nd6.h>
60 #include <security/mac/mac_framework.h>
62 /* if_lagg(4) support */
63 struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
67 infiniband_ipv4_multicast_map(uint32_t addr,
68 const uint8_t *broadcast, uint8_t *buf)
73 scope = broadcast[5] & 0xF;
80 buf[5] = 0x10 | scope;
83 buf[8] = broadcast[8];
84 buf[9] = broadcast[9];
91 buf[16] = (addr >> 24) & 0xff;
92 buf[17] = (addr >> 16) & 0xff;
93 buf[18] = (addr >> 8) & 0xff;
94 buf[19] = addr & 0xff;
100 infiniband_ipv6_multicast_map(const struct in6_addr *addr,
101 const uint8_t *broadcast, uint8_t *buf)
105 scope = broadcast[5] & 0xF;
112 buf[5] = 0x10 | scope;
115 buf[8] = broadcast[8];
116 buf[9] = broadcast[9];
117 memcpy(&buf[10], &addr->s6_addr[6], 10);
122 * This is for clients that have an infiniband_header in the mbuf.
125 infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
127 struct infiniband_header *ibh;
128 struct ether_header eh;
130 if (mb->m_len < sizeof(*ibh))
133 ibh = mtod(mb, struct infiniband_header *);
134 eh.ether_type = ibh->ib_protocol;
135 memset(eh.ether_shost, 0, ETHER_ADDR_LEN);
136 memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN);
137 mb->m_data += sizeof(*ibh);
138 mb->m_len -= sizeof(*ibh);
139 mb->m_pkthdr.len -= sizeof(*ibh);
140 bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
141 mb->m_data -= sizeof(*ibh);
142 mb->m_len += sizeof(*ibh);
143 mb->m_pkthdr.len += sizeof(*ibh);
147 * Infiniband output routine.
150 infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
153 uint8_t edst[INFINIBAND_ADDR_LEN];
154 #if defined(INET) || defined(INET6)
155 struct llentry *lle = NULL;
157 struct infiniband_header *ibh;
164 is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0);
167 error = mac_ifnet_check_transmit(ifp, m);
173 if (ifp->if_flags & IFF_MONITOR) {
177 if (!((ifp->if_flags & IFF_UP) &&
178 (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
183 switch (dst->sa_family) {
188 if (lle != NULL && (lle->la_flags & LLE_VALID)) {
189 memcpy(edst, lle->ll_addr, sizeof(edst));
190 } else if (m->m_flags & M_MCAST) {
191 infiniband_ipv4_multicast_map(
192 ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
193 ifp->if_broadcastaddr, edst);
195 error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
197 if (error == EWOULDBLOCK)
199 m = NULL; /* mbuf is consumed by resolver */
203 type = htons(ETHERTYPE_IP);
208 if (m->m_len < sizeof(*ah)) {
213 ah = mtod(m, struct arphdr *);
215 if (m->m_len < arphdr_len(ah)) {
219 ah->ar_hrd = htons(ARPHRD_INFINIBAND);
221 switch (ntohs(ah->ar_op)) {
222 case ARPOP_REVREQUEST:
224 type = htons(ETHERTYPE_REVARP);
229 type = htons(ETHERTYPE_ARP);
233 if (m->m_flags & M_BCAST) {
234 memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
236 if (ah->ar_hln != INFINIBAND_ADDR_LEN) {
240 memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN);
247 const struct ip6_hdr *ip6;
249 ip6 = mtod(m, const struct ip6_hdr *);
250 if (m->m_len < sizeof(*ip6)) {
253 } else if (lle != NULL && (lle->la_flags & LLE_VALID)) {
254 memcpy(edst, lle->ll_addr, sizeof(edst));
255 } else if (m->m_flags & M_MCAST) {
256 infiniband_ipv6_multicast_map(
257 &((const struct sockaddr_in6 *)dst)->sin6_addr,
258 ifp->if_broadcastaddr, edst);
259 } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) {
260 memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
262 error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
264 if (error == EWOULDBLOCK)
266 m = NULL; /* mbuf is consumed by resolver */
270 type = htons(ETHERTYPE_IPV6);
275 error = EAFNOSUPPORT;
280 * Add local net header. If no space in first mbuf,
283 M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
288 ibh = mtod(m, struct infiniband_header *);
290 ibh->ib_protocol = type;
291 memcpy(ibh->ib_hwaddr, edst, sizeof(edst));
294 * Queue message on interface, update output statistics if
295 * successful, and start output if interface not yet active.
298 return (ifp->if_transmit(ifp, m));
306 * Process a received Infiniband packet.
309 infiniband_input(struct ifnet *ifp, struct mbuf *m)
311 struct infiniband_header *ibh;
312 struct epoch_tracker et;
315 CURVNET_SET_QUIET(ifp->if_vnet);
317 if ((ifp->if_flags & IFF_UP) == 0) {
318 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
323 ibh = mtod(m, struct infiniband_header *);
326 * Reset layer specific mbuf flags to avoid confusing upper
329 m->m_flags &= ~M_VLANTAG;
332 if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) {
333 if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr,
334 ifp->if_addrlen) == 0)
335 m->m_flags |= M_BCAST;
337 m->m_flags |= M_MCAST;
338 if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
341 /* Let BPF have it before we strip the header. */
342 INFINIBAND_BPF_MTAP(ifp, m);
344 /* Allow monitor mode to claim this frame, after stats are updated. */
345 if (ifp->if_flags & IFF_MONITOR) {
350 /* Direct packet to correct FIB based on interface config. */
351 M_SETFIB(m, ifp->if_fib);
353 /* Handle input from a lagg<N> port */
354 if (ifp->if_type == IFT_INFINIBANDLAG) {
355 KASSERT(lagg_input_infiniband_p != NULL,
356 ("%s: if_lagg not loaded!", __func__));
357 m = (*lagg_input_infiniband_p)(ifp, m);
358 if (__predict_false(m == NULL))
360 ifp = m->m_pkthdr.rcvif;
364 * Dispatch frame to upper layer.
366 switch (ibh->ib_protocol) {
368 case htons(ETHERTYPE_IP):
372 case htons(ETHERTYPE_ARP):
373 if (ifp->if_flags & IFF_NOARP) {
374 /* Discard packet if ARP is disabled on interface */
382 case htons(ETHERTYPE_IPV6):
387 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
392 /* Strip off the Infiniband header. */
393 m_adj(m, INFINIBAND_HDR_LEN);
397 * Tag the mbuf with an appropriate MAC label before any other
398 * consumers can get to it.
400 mac_ifnet_create_mbuf(ifp, m);
402 /* Allow monitor mode to claim this frame, after stats are updated. */
404 netisr_dispatch(isr, m);
411 infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
414 struct sockaddr_dl *sdl;
416 struct sockaddr_in *sin;
419 struct sockaddr_in6 *sin6;
423 switch (sa->sa_family) {
426 * No mapping needed. Just check that it's a valid MC address.
428 sdl = (struct sockaddr_dl *)sa;
429 e_addr = LLADDR(sdl);
430 if (!INFINIBAND_IS_MULTICAST(e_addr))
431 return (EADDRNOTAVAIL);
437 sin = (struct sockaddr_in *)sa;
438 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
439 return (EADDRNOTAVAIL);
440 sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
441 sdl->sdl_alen = INFINIBAND_ADDR_LEN;
442 e_addr = LLADDR(sdl);
443 infiniband_ipv4_multicast_map(
444 sin->sin_addr.s_addr, ifp->if_broadcastaddr, e_addr);
445 *llsa = (struct sockaddr *)sdl;
450 sin6 = (struct sockaddr_in6 *)sa;
452 * An IP6 address of 0 means listen to all of the
453 * multicast address used for IP6. This has no meaning
456 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
457 return (EADDRNOTAVAIL);
458 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
459 return (EADDRNOTAVAIL);
460 sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
461 sdl->sdl_alen = INFINIBAND_ADDR_LEN;
462 e_addr = LLADDR(sdl);
463 infiniband_ipv6_multicast_map(
464 &sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
465 *llsa = (struct sockaddr *)sdl;
469 return (EAFNOSUPPORT);
474 infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
476 struct sockaddr_dl *sdl;
480 ifp->if_addrlen = INFINIBAND_ADDR_LEN;
481 ifp->if_hdrlen = INFINIBAND_HDR_LEN;
482 ifp->if_mtu = INFINIBAND_MTU;
484 ifp->if_output = infiniband_output;
485 ifp->if_input = infiniband_input;
486 ifp->if_resolvemulti = infiniband_resolvemulti;
488 if (ifp->if_baudrate == 0)
489 ifp->if_baudrate = IF_Gbps(10); /* default value */
491 ifp->if_broadcastaddr = llb;
494 KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
495 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
496 sdl->sdl_type = IFT_INFINIBAND;
497 sdl->sdl_alen = ifp->if_addrlen;
500 memcpy(LLADDR(sdl), lla, ifp->if_addrlen);
502 if (ifp->if_hw_addr != NULL)
503 memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen);
508 /* Attach ethernet compatible network device */
509 bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
511 /* Announce Infiniband MAC address if non-zero. */
512 for (i = 0; i < ifp->if_addrlen; i++)
515 if (i != ifp->if_addrlen)
516 if_printf(ifp, "Infiniband address: %20D\n", lla, ":");
518 /* Add necessary bits are setup; announce it now. */
519 EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp);
521 if (IS_DEFAULT_VNET(curvnet))
522 devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL);
526 * Perform common duties while detaching an Infiniband interface
529 infiniband_ifdetach(struct ifnet *ifp)
536 infiniband_modevent(module_t mod, int type, void *data)
547 static moduledata_t infiniband_mod = {
548 .name = "if_infiniband",
549 .evhand = &infiniband_modevent,
552 DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
553 MODULE_VERSION(if_infiniband, 1);