2 * Copyright (c) 1998 Luigi Rizzo
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * This code implements bridging in FreeBSD. It only acts on ethernet
29 * type of interfaces (others are still usable for routing).
30 * A bridging table holds the source MAC address/dest. interface for each
31 * known node. The table is indexed using an hash of the source address.
33 * Input packets are tapped near the end of the input routine in each
34 * driver (near the call to bpf_mtap, or before the call to ether_input)
35 * and analysed calling bridge_in(). Depending on the result, the packet
36 * can be forwarded to one or more output interfaces using bdg_forward(),
37 * and/or sent to the upper layer (e.g. in case of multicast).
39 * Output packets are intercepted near the end of ether_output(),
40 * the correct destination is selected calling bdg_dst_lookup(),
41 * and then forwarding is done using bdg_forward().
42 * Bridging is controlled by the sysctl variable net.link.ether.bridge
44 * The arp code is also modified to let a machine answer to requests
45 * irrespective of the port the request came from.
47 * In case of loops in the bridging topology, the bridge detects this
48 * event and temporarily mutes output bridging on one of the ports.
49 * Periodically, interfaces are unmuted by bdg_timeout(). (For the
50 * mute flag i am temporarily using IFF_LINK2 but this has to
51 * change.) Muting is only implemented as a safety measure, and also as
52 * a mechanism to support a user-space implementation of the spanning
53 * tree algorithm. In the final release, unmuting will only occur
54 * because of explicit action of the user-level daemon.
56 * To build a bridging kernel, use the following option
58 * and then at runtime set the sysctl variable to enable bridging.
60 * Only one interface is supposed to have addresses set (but
61 * there are no problems in practice if you set addresses for more
62 * than one interface).
63 * Bridging will act before routing, but nothing prevents a machine
64 * from doing both (modulo bugs in the implementation...).
67 * - bridging requires some (small) modifications to the interface
68 * driver. Currently (980911) the "ed", "de", "tx", "lnc" drivers
69 * have been modified and tested. "fxp", "ep", "fe" have been
70 * modified but not tested. See the "ed" and "de" drivers as
71 * examples on how to operate.
72 * - bridging is incompatible with multicast routing on the same
73 * machine. There is not an easy fix to this.
74 * - loop detection is still not very robust.
75 * - the interface of bdg_forward() could be improved.
78 #include <sys/param.h>
80 #include <sys/malloc.h>
81 #include <sys/systm.h>
82 #include <sys/socket.h> /* for net/if.h */
83 #include <sys/kernel.h>
84 #include <sys/sysctl.h>
87 #include <net/if_types.h>
89 #include <netinet/in.h> /* for struct arpcom */
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/if_ether.h> /* for struct arpcom */
98 #if defined(IPFIREWALL)
99 #include <net/route.h>
100 #include <netinet/ip_fw.h>
101 #if defined(DUMMYNET)
102 #include <netinet/ip_dummynet.h>
106 #include <net/bridge.h>
109 * For debugging, you can use the following macros.
110 * remember, rdtsc() only works on Pentium-class machines
113 DDB(ticks = rdtsc();)
114 ... interesting code ...
115 DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;)
124 * System initialization
127 static void bdginit(void *);
128 static void flush_table(void);
130 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_SECOND, bdginit, NULL)
132 static int bdg_ipfw = 0 ;
134 bdg_hash_table *bdg_table = NULL ;
137 * we need additional info for the bridge. The bdg_ifp2sc[] array
138 * provides a pointer to this struct using the if_index.
139 * bdg_softc has a backpointer to the struct ifnet, the bridge
140 * flags, and a group (bridging occurs only between port of the
145 /* ((struct arpcom *)ifp)->ac_enaddr is the eth. addr */
150 static struct bdg_softc **ifp2sc = NULL ;
152 #if 0 /* new code using ifp2sc */
153 #define SAMEGROUP(ifp,src) (src == NULL || \
154 ifp2sc[ifp->if_index]->group == ifp2sc[src->if_index]->group )
155 #define MUTED(ifp) (ifp2sc[ifp->if_index]->flags & IFF_MUTE)
156 #define MUTE(ifp) ifp2sc[ifp->if_index]->flags |= IFF_MUTE
157 #define UNMUTE(ifp) ifp2sc[ifp->if_index]->flags &= ~IFF_MUTE
159 #define SAMEGROUP(a,b) 1
160 #define MUTED(ifp) (ifp->if_flags & IFF_MUTE)
161 #define MUTE(ifp) ifp->if_flags |= IFF_MUTE
162 #define UNMUTE(ifp) ifp->if_flags &= ~IFF_MUTE
166 sysctl_bdg SYSCTL_HANDLER_ARGS
168 int error, oldval = do_bridge ;
170 error = sysctl_handle_int(oidp,
171 oidp->oid_arg1, oidp->oid_arg2, req);
172 printf("called sysctl for bridge name %s arg2 %d val %d->%d\n",
173 oidp->oid_name, oidp->oid_arg2,
175 if (bdg_table == NULL)
177 if (oldval != do_bridge) {
183 SYSCTL_DECL(_net_link_ether);
184 SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW,
185 &do_bridge, 0, &sysctl_bdg, "I", "Bridging");
187 SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW, &bdg_ipfw,0,"");
188 #if 1 /* diagnostic vars */
189 int bdg_in_count = 0 , bdg_in_ticks = 0 , bdg_fw_count = 0, bdg_fw_ticks = 0 ;
190 SYSCTL_INT(_net_link_ether, OID_AUTO, bdginc, CTLFLAG_RW, &bdg_in_count,0,"");
191 SYSCTL_INT(_net_link_ether, OID_AUTO, bdgint, CTLFLAG_RW, &bdg_in_ticks,0,"");
192 SYSCTL_INT(_net_link_ether, OID_AUTO, bdgfwc, CTLFLAG_RW, &bdg_fw_count,0,"");
193 SYSCTL_INT(_net_link_ether, OID_AUTO, bdgfwt, CTLFLAG_RW, &bdg_fw_ticks,0,"");
195 static struct bdg_stats bdg_stats ;
196 SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats,
197 CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics");
199 static int bdg_loops ;
202 * completely flush the bridge table.
209 if (bdg_table == NULL)
212 for (i=0; i< HASH_SIZE; i++)
213 bdg_table[i].name= NULL; /* clear table */
218 * called periodically to flush entries etc.
221 bdg_timeout(void *dummy)
225 static int slowtimer = 0 ;
228 static int age_index = 0 ; /* index of table position to age */
229 int l = age_index + HASH_SIZE/4 ;
231 * age entries in the forwarding table.
235 for (; age_index < l ; age_index++)
236 if (bdg_table[age_index].used)
237 bdg_table[age_index].used = 0 ;
238 else if (bdg_table[age_index].name) {
239 /* printf("xx flushing stale entry %d\n", age_index); */
240 bdg_table[age_index].name = NULL ;
242 if (age_index >= HASH_SIZE)
245 if (--slowtimer <= 0 ) {
248 for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) {
249 if (ifp->if_type != IFT_ETHER)
251 if ( 0 == ( ifp->if_flags & IFF_UP) ) {
256 if ( 0 == ( ifp->if_flags & IFF_PROMISC) ) {
259 ret = ifpromisc(ifp, 1);
261 printf(">> now %s%d flags 0x%x promisc %d\n",
262 ifp->if_name, ifp->if_unit,
266 printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit);
273 timeout(bdg_timeout, (void *)0, 2*hz );
277 * local MAC addresses are held in a small array. This makes comparisons
280 unsigned char bdg_addresses[6*BDG_MAX_PORTS];
284 * initialization of bridge code.
286 * This will have to change to support kldload.
298 * initialization of bridge code
300 s = splimp(); /* XXX does this matter? */
301 if (bdg_table == NULL)
302 bdg_table = (struct hash_table *)
303 malloc(HASH_SIZE * sizeof(struct hash_table),
307 ifp2sc = malloc(if_index * sizeof(struct bdg_softc *), M_IFADDR, M_WAITOK );
308 bzero(ifp2sc, if_index * sizeof(struct bdg_softc *) );
310 bzero(&bdg_stats, sizeof(bdg_stats) );
312 eth_addr = bdg_addresses ;
314 printf("BRIDGE 981214, have %d interfaces\n", if_index);
315 for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ;
316 i++, ifp = ifp->if_link.tqe_next)
317 if (ifp->if_type == IFT_ETHER) { /* ethernet ? */
318 ac = (struct arpcom *)ifp;
319 sprintf(bdg_stats.s[ifp->if_index].name,
320 "%s%d", ifp->if_name, ifp->if_unit);
321 printf("-- index %d %s type %d phy %d addrl %d addr %6D\n",
323 bdg_stats.s[ifp->if_index].name,
324 (int)ifp->if_type, (int) ifp->if_physical,
325 (int)ifp->if_addrlen,
326 ac->ac_enaddr, "." );
327 bcopy(ac->ac_enaddr, eth_addr, 6);
330 ifp2sc[bdg_ports] = malloc(sizeof(struct bdg_softc),
331 M_IFADDR, M_WAITOK );
332 ifp2sc[bdg_ports]->ifp = ifp ;
333 ifp2sc[bdg_ports]->flags = 0 ;
334 ifp2sc[bdg_ports]->group = 0 ;
343 * bridge_in() is invoked to perform bridging decision on input packets.
345 * m packet to be bridged. The mbuf need not to hold the
346 * whole packet, only the first 14 bytes suffice. We
347 * assume them to be contiguous. No alignment assumptions
348 * because they are not a problem on i386 class machines.
350 * On Return: destination of packet, one of
351 * BDG_BCAST broadcast
352 * BDG_MCAST multicast
353 * BDG_LOCAL is only for a local address (do not forward)
354 * BDG_DROP drop the packet
355 * ifp ifp of the destination interface.
357 * Forwarding is not done directly to give a chance to some drivers
358 * to fetch more of the packet, or simply drop it completely.
363 bridge_in(struct mbuf *m)
366 struct ifnet *ifp = m->m_pkthdr.rcvif, *dst , *old ;
367 int dropit = MUTED(ifp) ;
368 struct ether_header *eh;
370 eh = mtod(m, struct ether_header *);
373 * hash the source address
375 index= HASH_FN(eh->ether_shost);
376 bdg_table[index].used = 1 ;
377 old = bdg_table[index].name ;
378 if ( old ) { /* the entry is valid. */
379 if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) {
380 printf("collision at %d\n", index);
381 bdg_table[index].name = NULL ;
382 } else if (old != ifp) {
384 * found a loop. Either a machine has moved, or there
385 * is a misconfiguration/reconfiguration of the network.
386 * First, do not forward this packet!
387 * Record the relocation anyways; then, if loops persist,
388 * suspect a reconfiguration and disable forwarding
389 * from the old interface.
391 bdg_table[index].name = ifp ; /* relocate address */
392 printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
393 bdg_loops, eh->ether_shost, ".",
394 ifp->if_name, ifp->if_unit,
395 old->if_name, old->if_unit,
396 old->if_flags & IFF_MUTE ? "muted":"ignore");
399 if (++bdg_loops > 10)
406 * now write the source address into the table
408 if (bdg_table[index].name == NULL) {
409 DEB(printf("new addr %6D at %d for %s%d\n",
410 eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);)
411 bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6);
412 bdg_table[index].name = ifp ;
414 dst = bridge_dst_lookup(m);
416 * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
417 * For muted interfaces, the first 3 are changed in BDG_LOCAL,
418 * and others to BDG_DROP. Also, for incoming packets, ifp is changed
419 * to BDG_DROP in case ifp == src . These mods are not necessary
420 * for outgoing packets from ether_output().
422 BDG_STAT(ifp, BDG_IN);
427 case (int)BDG_UNKNOWN:
432 if (dst == ifp || dropit )
433 BDG_STAT(ifp, BDG_DROP);
435 BDG_STAT(ifp, BDG_FORWARD);
440 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL)
445 return (dst == ifp ? BDG_DROP : dst ) ;
450 * Forward to dst, excluding src port and (if not a single interface)
451 * muted interfaces. The packet is freed if marked as such
452 * and not for a local destination.
453 * A cleaner implementation would be to make bdg_forward()
454 * always consume the packet, leaving to the caller the task
455 * to make a copy if it needs it. As it is now, bdg_forward()
456 * can keep a copy alive in some cases.
459 bdg_forward (struct mbuf **m0, struct ifnet *dst)
461 struct ifnet *src = (*m0)->m_pkthdr.rcvif; /* could be NULL in output */
464 int once = 0; /* execute the loop only once */
465 int canfree = 1 ; /* can free the buf at the end */
469 struct ether_header *eh = mtod(*m0, struct ether_header *); /* XXX */
472 if (dst == BDG_DROP) { /* this should not happen */
473 printf("xx bdg_forward for BDG_DROP)\n");
478 if (dst == BDG_LOCAL) { /* this should not happen as well */
479 printf("xx ouch, bdg_forward for local pkt\n");
482 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
483 ifp = ifnet.tqh_first ;
485 if (dst != BDG_UNKNOWN)
489 once = 1 ; /* and also canfree */
493 * do filtering in a very similar way to what is done
494 * in ip_output. Only for IP packets, and only pass/fail/dummynet
495 * is supported. The tricky thing is to make sure that enough of
496 * the packet (basically, Eth+IP+TCP/UDP headers) is contiguous
497 * so that calls to m_pullup in ip_fw_chk will not kill the
502 struct ip_fw_chain *rule;
506 if (m->m_type == MT_DUMMYNET) {
508 * the packet was already tagged, so part of the
509 * processing was already done, and we need to go down.
511 rule = (struct ip_fw_chain *)(m->m_data) ;
512 (*m0) = m = m->m_next ;
514 src = m->m_pkthdr.rcvif; /* could be NULL in output */
515 eh = mtod(m, struct ether_header *); /* XXX */
516 canfree = 1 ; /* for sure, a copy is not needed later. */
517 goto forward; /* HACK! */
523 goto forward ; /* do not apply to packets from ether_output */
524 if (canfree == 0 ) /* need to make a copy */
525 m = m_copypacket(*m0, M_DONTWAIT);
533 * before calling the firewall, swap fields the same as IP does.
534 * here we assume the pkt is an IP one and the header is contiguous
536 eh = mtod(m, struct ether_header *);
537 ip = (struct ip *)(eh + 1 ) ;
543 * The third parameter to the firewall code is the dst. interface.
544 * Since we apply checks only on input pkts we use NULL.
546 off = (*ip_fw_chk_ptr)(NULL, 0, NULL, &dummy, &m, &rule, NULL) ;
547 if (m == NULL) { /* pkt discarded by firewall */
553 * on return, the mbuf pointer might have changed. Restore
554 * *m0 (if it was the same as m), eh, ip and then
555 * restore original ordering.
557 eh = mtod(m, struct ether_header *);
558 ip = (struct ip *)(eh + 1 ) ;
559 if (canfree) /* m was a reference to *m0, so update *m0 */
572 * pass the pkt to dummynet. Need to include m, dst, rule.
573 * Dummynet consumes the packet in all cases.
575 dummynet_io((off & 0xffff), DN_TO_BDG_FWD, m, dst, NULL, 0, rule);
576 if (canfree) /* dummynet has consumed the original one */
581 /* if none of the above matches, we have to drop the pkt */
584 if (canfree && m != *m0) {
591 #endif /* IPFIREWALL */
597 for ( ; ifp ; ifp = ifp->if_link.tqe_next ) {
598 if (ifp != src && ifp->if_type == IFT_ETHER &&
599 (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) &&
600 SAMEGROUP(ifp, src) && !MUTED(ifp) ) {
601 if (m == NULL) { /* do i need to make a copy ? */
602 if (canfree && ifp->if_link.tqe_next == NULL) /* last one! */
604 else /* on a P5-90, m_packetcopy takes 540 ticks */
605 m = m_copypacket(*m0, M_DONTWAIT);
607 printf("bdg_forward: sorry, m_copy failed!\n");
612 * execute last part of ether_output.
616 * Queue message on interface, and start output if interface
619 if (IF_QFULL(&ifp->if_snd)) {
620 IF_DROP(&ifp->if_snd);
621 MUTE(ifp); /* good measure... */
625 ifp->if_obytes += m->m_pkthdr.len ;
626 if (m->m_flags & M_MCAST)
628 IF_ENQUEUE(&ifp->if_snd, m);
629 if ((ifp->if_flags & IFF_OACTIVE) == 0)
630 (*ifp->if_start)(ifp);
633 *m0 = NULL ; /* the packet is gone... */
636 BDG_STAT(ifp, BDG_OUT);
642 /* cleanup any mbuf leftover. */
647 if (canfree && *m0) {