2 * Copyright (c) 1998 Luigi Rizzo
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * This code implements bridging in FreeBSD. It only acts on ethernet
30 * type of interfaces (others are still usable for routing).
31 * A bridging table holds the source MAC address/dest. interface for each
32 * known node. The table is indexed using an hash of the source address.
34 * Input packets are tapped near the end of the input routine in each
35 * driver (near the call to bpf_mtap, or before the call to ether_input)
36 * and analysed calling bridge_in(). Depending on the result, the packet
37 * can be forwarded to one or more output interfaces using bdg_forward(),
38 * and/or sent to the upper layer (e.g. in case of multicast).
40 * Output packets are intercepted near the end of ether_output(),
41 * the correct destination is selected calling bdg_dst_lookup(),
42 * and then forwarding is done using bdg_forward().
43 * Bridging is controlled by the sysctl variable net.link.ether.bridge
45 * The arp code is also modified to let a machine answer to requests
46 * irrespective of the port the request came from.
48 * In case of loops in the bridging topology, the bridge detects this
49 * event and temporarily mutes output bridging on one of the ports.
50 * Periodically, interfaces are unmuted by bdg_timeout(). (For the
51 * mute flag i am temporarily using IFF_LINK2 but this has to
52 * change.) Muting is only implemented as a safety measure, and also as
53 * a mechanism to support a user-space implementation of the spanning
54 * tree algorithm. In the final release, unmuting will only occur
55 * because of explicit action of the user-level daemon.
57 * To build a bridging kernel, use the following option
59 * and then at runtime set the sysctl variable to enable bridging.
61 * Only one interface is supposed to have addresses set (but
62 * there are no problems in practice if you set addresses for more
63 * than one interface).
64 * Bridging will act before routing, but nothing prevents a machine
65 * from doing both (modulo bugs in the implementation...).
68 * - bridging requires some (small) modifications to the interface
69 * driver. Currently (980911) the "ed", "de", "tx", "lnc" drivers
70 * have been modified and tested. "fxp", "ep", "fe" have been
71 * modified but not tested. See the "ed" and "de" drivers as
72 * examples on how to operate.
73 * - bridging is incompatible with multicast routing on the same
74 * machine. There is not an easy fix to this.
75 * - loop detection is still not very robust.
76 * - the interface of bdg_forward() could be improved.
79 #include <sys/param.h>
81 #include <sys/malloc.h>
82 #include <sys/systm.h>
83 #include <sys/socket.h> /* for net/if.h */
84 #include <sys/kernel.h>
85 #include <sys/sysctl.h>
88 #include <net/if_types.h>
90 #include <netinet/in.h> /* for struct arpcom */
91 #include <netinet/in_systm.h>
92 #include <netinet/in_var.h>
93 #include <netinet/ip.h>
94 #include <netinet/if_ether.h> /* for struct arpcom */
99 #if defined(IPFIREWALL)
100 #include <net/route.h>
101 #include <netinet/ip_fw.h>
102 #if defined(DUMMYNET)
103 #include <netinet/ip_dummynet.h>
107 #include <net/bridge.h>
110 * For debugging, you can use the following macros.
111 * remember, rdtsc() only works on Pentium-class machines
114 DDB(ticks = rdtsc();)
115 ... interesting code ...
116 DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;)
125 * System initialization
128 static void bdginit(void *);
129 static void flush_table(void);
131 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_SECOND, bdginit, NULL)
133 static int bdg_ipfw = 0 ;
135 bdg_hash_table *bdg_table = NULL ;
138 * we need additional info for the bridge. The bdg_ifp2sc[] array
139 * provides a pointer to this struct using the if_index.
140 * bdg_softc has a backpointer to the struct ifnet, the bridge
141 * flags, and a group (bridging occurs only between port of the
146 /* ((struct arpcom *)ifp)->ac_enaddr is the eth. addr */
151 static struct bdg_softc **ifp2sc = NULL ;
153 #if 0 /* new code using ifp2sc */
154 #define SAMEGROUP(ifp,src) (src == NULL || \
155 ifp2sc[ifp->if_index]->group == ifp2sc[src->if_index]->group )
156 #define MUTED(ifp) (ifp2sc[ifp->if_index]->flags & IFF_MUTE)
157 #define MUTE(ifp) ifp2sc[ifp->if_index]->flags |= IFF_MUTE
158 #define UNMUTE(ifp) ifp2sc[ifp->if_index]->flags &= ~IFF_MUTE
160 #define SAMEGROUP(a,b) 1
161 #define MUTED(ifp) (ifp->if_flags & IFF_MUTE)
162 #define MUTE(ifp) ifp->if_flags |= IFF_MUTE
163 #define UNMUTE(ifp) ifp->if_flags &= ~IFF_MUTE
167 sysctl_bdg SYSCTL_HANDLER_ARGS
169 int error, oldval = do_bridge ;
171 error = sysctl_handle_int(oidp,
172 oidp->oid_arg1, oidp->oid_arg2, req);
173 printf("called sysctl for bridge name %s arg2 %d val %d->%d\n",
174 oidp->oid_name, oidp->oid_arg2,
176 if (bdg_table == NULL)
178 if (oldval != do_bridge) {
184 SYSCTL_DECL(_net_link_ether);
185 SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW,
186 &do_bridge, 0, &sysctl_bdg, "I", "Bridging");
188 SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW, &bdg_ipfw,0,"");
189 #if 1 /* diagnostic vars */
190 int bdg_in_count = 0 , bdg_in_ticks = 0 , bdg_fw_count = 0, bdg_fw_ticks = 0 ;
191 SYSCTL_INT(_net_link_ether, OID_AUTO, bdginc, CTLFLAG_RW, &bdg_in_count,0,"");
192 SYSCTL_INT(_net_link_ether, OID_AUTO, bdgint, CTLFLAG_RW, &bdg_in_ticks,0,"");
193 SYSCTL_INT(_net_link_ether, OID_AUTO, bdgfwc, CTLFLAG_RW, &bdg_fw_count,0,"");
194 SYSCTL_INT(_net_link_ether, OID_AUTO, bdgfwt, CTLFLAG_RW, &bdg_fw_ticks,0,"");
196 static struct bdg_stats bdg_stats ;
197 SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats,
198 CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics");
200 static int bdg_loops ;
203 * completely flush the bridge table.
210 if (bdg_table == NULL)
213 for (i=0; i< HASH_SIZE; i++)
214 bdg_table[i].name= NULL; /* clear table */
219 * called periodically to flush entries etc.
222 bdg_timeout(void *dummy)
226 static int slowtimer = 0 ;
229 static int age_index = 0 ; /* index of table position to age */
230 int l = age_index + HASH_SIZE/4 ;
232 * age entries in the forwarding table.
236 for (; age_index < l ; age_index++)
237 if (bdg_table[age_index].used)
238 bdg_table[age_index].used = 0 ;
239 else if (bdg_table[age_index].name) {
240 /* printf("xx flushing stale entry %d\n", age_index); */
241 bdg_table[age_index].name = NULL ;
243 if (age_index >= HASH_SIZE)
246 if (--slowtimer <= 0 ) {
249 for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) {
250 if (ifp->if_type != IFT_ETHER)
252 if ( 0 == ( ifp->if_flags & IFF_UP) ) {
257 if ( 0 == ( ifp->if_flags & IFF_PROMISC) ) {
260 ret = ifpromisc(ifp, 1);
262 printf(">> now %s%d flags 0x%x promisc %d\n",
263 ifp->if_name, ifp->if_unit,
267 printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit);
274 timeout(bdg_timeout, (void *)0, 2*hz );
278 * local MAC addresses are held in a small array. This makes comparisons
281 unsigned char bdg_addresses[6*BDG_MAX_PORTS];
285 * initialization of bridge code.
287 * This will have to change to support kldload.
299 * initialization of bridge code
301 s = splimp(); /* XXX does this matter? */
302 if (bdg_table == NULL)
303 bdg_table = (struct hash_table *)
304 malloc(HASH_SIZE * sizeof(struct hash_table),
308 ifp2sc = malloc(if_index * sizeof(struct bdg_softc *), M_IFADDR, M_WAITOK );
309 bzero(ifp2sc, if_index * sizeof(struct bdg_softc *) );
311 bzero(&bdg_stats, sizeof(bdg_stats) );
313 eth_addr = bdg_addresses ;
315 printf("BRIDGE 981214, have %d interfaces\n", if_index);
316 for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ;
317 i++, ifp = ifp->if_link.tqe_next)
318 if (ifp->if_type == IFT_ETHER) { /* ethernet ? */
319 ac = (struct arpcom *)ifp;
320 sprintf(bdg_stats.s[ifp->if_index].name,
321 "%s%d", ifp->if_name, ifp->if_unit);
322 printf("-- index %d %s type %d phy %d addrl %d addr %6D\n",
324 bdg_stats.s[ifp->if_index].name,
325 (int)ifp->if_type, (int) ifp->if_physical,
326 (int)ifp->if_addrlen,
327 ac->ac_enaddr, "." );
328 bcopy(ac->ac_enaddr, eth_addr, 6);
331 ifp2sc[bdg_ports] = malloc(sizeof(struct bdg_softc),
332 M_IFADDR, M_WAITOK );
333 ifp2sc[bdg_ports]->ifp = ifp ;
334 ifp2sc[bdg_ports]->flags = 0 ;
335 ifp2sc[bdg_ports]->group = 0 ;
344 * bridge_in() is invoked to perform bridging decision on input packets.
346 * m packet to be bridged. The mbuf need not to hold the
347 * whole packet, only the first 14 bytes suffice. We
348 * assume them to be contiguous. No alignment assumptions
349 * because they are not a problem on i386 class machines.
351 * On Return: destination of packet, one of
352 * BDG_BCAST broadcast
353 * BDG_MCAST multicast
354 * BDG_LOCAL is only for a local address (do not forward)
355 * BDG_DROP drop the packet
356 * ifp ifp of the destination interface.
358 * Forwarding is not done directly to give a chance to some drivers
359 * to fetch more of the packet, or simply drop it completely.
364 bridge_in(struct mbuf *m)
367 struct ifnet *ifp = m->m_pkthdr.rcvif, *dst , *old ;
368 int dropit = MUTED(ifp) ;
369 struct ether_header *eh;
371 eh = mtod(m, struct ether_header *);
374 * hash the source address
376 index= HASH_FN(eh->ether_shost);
377 bdg_table[index].used = 1 ;
378 old = bdg_table[index].name ;
379 if ( old ) { /* the entry is valid. */
380 if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) {
381 printf("collision at %d\n", index);
382 bdg_table[index].name = NULL ;
383 } else if (old != ifp) {
385 * found a loop. Either a machine has moved, or there
386 * is a misconfiguration/reconfiguration of the network.
387 * First, do not forward this packet!
388 * Record the relocation anyways; then, if loops persist,
389 * suspect a reconfiguration and disable forwarding
390 * from the old interface.
392 bdg_table[index].name = ifp ; /* relocate address */
393 printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
394 bdg_loops, eh->ether_shost, ".",
395 ifp->if_name, ifp->if_unit,
396 old->if_name, old->if_unit,
397 old->if_flags & IFF_MUTE ? "muted":"ignore");
400 if (++bdg_loops > 10)
407 * now write the source address into the table
409 if (bdg_table[index].name == NULL) {
410 DEB(printf("new addr %6D at %d for %s%d\n",
411 eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);)
412 bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6);
413 bdg_table[index].name = ifp ;
415 dst = bridge_dst_lookup(m);
417 * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
418 * For muted interfaces, the first 3 are changed in BDG_LOCAL,
419 * and others to BDG_DROP. Also, for incoming packets, ifp is changed
420 * to BDG_DROP in case ifp == src . These mods are not necessary
421 * for outgoing packets from ether_output().
423 BDG_STAT(ifp, BDG_IN);
428 case (int)BDG_UNKNOWN:
433 if (dst == ifp || dropit )
434 BDG_STAT(ifp, BDG_DROP);
436 BDG_STAT(ifp, BDG_FORWARD);
441 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL)
446 return (dst == ifp ? BDG_DROP : dst ) ;
451 * Forward to dst, excluding src port and (if not a single interface)
452 * muted interfaces. The packet is freed if marked as such
453 * and not for a local destination.
454 * A cleaner implementation would be to make bdg_forward()
455 * always consume the packet, leaving to the caller the task
456 * to make a copy if it needs it. As it is now, bdg_forward()
457 * can keep a copy alive in some cases.
460 bdg_forward (struct mbuf **m0, struct ifnet *dst)
462 struct ifnet *src = (*m0)->m_pkthdr.rcvif; /* could be NULL in output */
465 int once = 0; /* execute the loop only once */
466 int canfree = 1 ; /* can free the buf at the end */
470 struct ether_header *eh = mtod(*m0, struct ether_header *); /* XXX */
473 if (dst == BDG_DROP) { /* this should not happen */
474 printf("xx bdg_forward for BDG_DROP)\n");
479 if (dst == BDG_LOCAL) { /* this should not happen as well */
480 printf("xx ouch, bdg_forward for local pkt\n");
483 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
484 ifp = ifnet.tqh_first ;
486 if (dst != BDG_UNKNOWN)
490 once = 1 ; /* and also canfree */
494 * do filtering in a very similar way to what is done
495 * in ip_output. Only for IP packets, and only pass/fail/dummynet
496 * is supported. The tricky thing is to make sure that enough of
497 * the packet (basically, Eth+IP+TCP/UDP headers) is contiguous
498 * so that calls to m_pullup in ip_fw_chk will not kill the
503 struct ip_fw_chain *rule;
508 if (m->m_type == MT_DUMMYNET) {
510 * the packet was already tagged, so part of the
511 * processing was already done, and we need to go down.
513 rule = (struct ip_fw_chain *)(m->m_data) ;
514 (*m0) = m = m->m_next ;
516 src = m->m_pkthdr.rcvif; /* could be NULL in output */
517 eh = mtod(m, struct ether_header *); /* XXX */
518 canfree = 1 ; /* for sure, a copy is not needed later. */
519 goto forward; /* HACK! */
526 goto forward ; /* do not apply to packets from ether_output */
527 if (canfree == 0 ) /* need to make a copy */
528 m = m_copypacket(*m0, M_DONTWAIT);
536 * before calling the firewall, swap fields the same as IP does.
537 * here we assume the pkt is an IP one and the header is contiguous
539 eh = mtod(m, struct ether_header *);
540 ip = (struct ip *)(eh + 1 ) ;
546 * The third parameter to the firewall code is the dst. interface.
547 * Since we apply checks only on input pkts we use NULL.
549 off = (*ip_fw_chk_ptr)(NULL, 0, NULL, &dummy, &m, &rule, NULL) ;
550 if (m == NULL) { /* pkt discarded by firewall */
556 * on return, the mbuf pointer might have changed. Restore
557 * *m0 (if it was the same as m), eh, ip and then
558 * restore original ordering.
560 eh = mtod(m, struct ether_header *);
561 ip = (struct ip *)(eh + 1 ) ;
562 if (canfree) /* m was a reference to *m0, so update *m0 */
575 * pass the pkt to dummynet. Need to include m, dst, rule.
576 * Dummynet consumes the packet in all cases.
578 dummynet_io((off & 0xffff), DN_TO_BDG_FWD, m, dst, NULL, 0, rule,
580 if (canfree) /* dummynet has consumed the original one */
585 /* if none of the above matches, we have to drop the pkt */
588 if (canfree && m != *m0) {
595 #endif /* IPFIREWALL */
601 for ( ; ifp ; ifp = ifp->if_link.tqe_next ) {
602 if (ifp != src && ifp->if_type == IFT_ETHER &&
603 (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) &&
604 SAMEGROUP(ifp, src) && !MUTED(ifp) ) {
605 if (m == NULL) { /* do i need to make a copy ? */
606 if (canfree && ifp->if_link.tqe_next == NULL) /* last one! */
608 else /* on a P5-90, m_packetcopy takes 540 ticks */
609 m = m_copypacket(*m0, M_DONTWAIT);
611 printf("bdg_forward: sorry, m_copy failed!\n");
616 * execute last part of ether_output.
620 * Queue message on interface, and start output if interface
623 if (IF_QFULL(&ifp->if_snd)) {
624 IF_DROP(&ifp->if_snd);
625 MUTE(ifp); /* good measure... */
629 ifp->if_obytes += m->m_pkthdr.len ;
630 if (m->m_flags & M_MCAST)
632 IF_ENQUEUE(&ifp->if_snd, m);
633 if ((ifp->if_flags & IFF_OACTIVE) == 0)
634 (*ifp->if_start)(ifp);
637 *m0 = NULL ; /* the packet is gone... */
640 BDG_STAT(ifp, BDG_OUT);
646 /* cleanup any mbuf leftover. */
651 if (canfree && *m0) {