2 * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved.
3 * Copyright (c) 2004-2008 Qing Li. All rights reserved.
4 * Copyright (c) 2008 Kip Macy. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
32 #include "opt_inet6.h"
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
38 #include <sys/syslog.h>
39 #include <sys/sysctl.h>
40 #include <sys/socket.h>
41 #include <sys/kernel.h>
43 #include <sys/mutex.h>
44 #include <sys/rwlock.h>
52 #include <netinet/in.h>
53 #include <net/if_llatbl.h>
55 #include <net/if_dl.h>
56 #include <net/if_var.h>
57 #include <net/route.h>
59 #include <netinet/if_ether.h>
60 #include <netinet6/in6_var.h>
61 #include <netinet6/nd6.h>
63 MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
65 static VNET_DEFINE(SLIST_HEAD(, lltable), lltables) =
66 SLIST_HEAD_INITIALIZER(lltables);
67 #define V_lltables VNET(lltables)
69 struct rwlock lltable_rwlock;
70 RW_SYSINIT(lltable_rwlock, &lltable_rwlock, "lltable_rwlock");
72 static void lltable_unlink(struct lltable *llt);
73 static void llentries_unlink(struct lltable *llt, struct llentries *head);
75 static void htable_unlink_entry(struct llentry *lle);
76 static void htable_link_entry(struct lltable *llt, struct llentry *lle);
77 static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
81 * Dump lle state for a specific address family.
84 lltable_dump_af(struct lltable *llt, struct sysctl_req *wr)
88 LLTABLE_LOCK_ASSERT();
90 if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
94 IF_AFDATA_RLOCK(llt->llt_ifp);
95 error = lltable_foreach_lle(llt,
96 (llt_foreach_cb_t *)llt->llt_dump_entry, wr);
97 IF_AFDATA_RUNLOCK(llt->llt_ifp);
103 * Dump arp state for a specific address family.
106 lltable_sysctl_dumparp(int af, struct sysctl_req *wr)
112 SLIST_FOREACH(llt, &V_lltables, llt_link) {
113 if (llt->llt_af == af) {
114 error = lltable_dump_af(llt, wr);
125 * Common function helpers for chained hash table.
129 * Runs specified callback for each entry in @llt.
130 * Caller does the locking.
134 htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
136 struct llentry *lle, *next;
141 for (i = 0; i < llt->llt_hsize; i++) {
142 LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
143 error = f(llt, lle, farg);
153 htable_link_entry(struct lltable *llt, struct llentry *lle)
155 struct llentries *lleh;
158 if ((lle->la_flags & LLE_LINKED) != 0)
161 IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);
163 hashidx = llt->llt_hash(lle, llt->llt_hsize);
164 lleh = &llt->lle_head[hashidx];
167 lle->lle_head = lleh;
168 lle->la_flags |= LLE_LINKED;
169 LIST_INSERT_HEAD(lleh, lle, lle_next);
173 htable_unlink_entry(struct llentry *lle)
176 if ((lle->la_flags & LLE_LINKED) != 0) {
177 IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
178 LIST_REMOVE(lle, lle_next);
179 lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
182 lle->lle_head = NULL;
187 struct prefix_match_data {
188 const struct sockaddr *addr;
189 const struct sockaddr *mask;
190 struct llentries dchain;
195 htable_prefix_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
197 struct prefix_match_data *pmd;
199 pmd = (struct prefix_match_data *)farg;
201 if (llt->llt_match_prefix(pmd->addr, pmd->mask, pmd->flags, lle)) {
203 LIST_INSERT_HEAD(&pmd->dchain, lle, lle_chain);
210 htable_prefix_free(struct lltable *llt, const struct sockaddr *addr,
211 const struct sockaddr *mask, u_int flags)
213 struct llentry *lle, *next;
214 struct prefix_match_data pmd;
216 bzero(&pmd, sizeof(pmd));
220 LIST_INIT(&pmd.dchain);
222 IF_AFDATA_WLOCK(llt->llt_ifp);
223 /* Push matching lles to chain */
224 lltable_foreach_lle(llt, htable_prefix_free_cb, &pmd);
226 llentries_unlink(llt, &pmd.dchain);
227 IF_AFDATA_WUNLOCK(llt->llt_ifp);
229 LIST_FOREACH_SAFE(lle, &pmd.dchain, lle_chain, next)
230 lltable_free_entry(llt, lle);
234 htable_free_tbl(struct lltable *llt)
237 free(llt->lle_head, M_LLTABLE);
238 free(llt, M_LLTABLE);
242 llentries_unlink(struct lltable *llt, struct llentries *head)
244 struct llentry *lle, *next;
246 LIST_FOREACH_SAFE(lle, head, lle_chain, next)
247 llt->llt_unlink_entry(lle);
251 * Helper function used to drop all mbufs in hold queue.
253 * Returns the number of held packets, if any, that were dropped.
256 lltable_drop_entry_queue(struct llentry *lle)
261 LLE_WLOCK_ASSERT(lle);
264 while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
265 next = lle->la_hold->m_nextpkt;
266 m_freem(lle->la_hold);
272 KASSERT(lle->la_numheld == 0,
273 ("%s: la_numheld %d > 0, pkts_droped %zd", __func__,
274 lle->la_numheld, pkts_dropped));
276 return (pkts_dropped);
280 lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
281 const char *linkhdr, size_t linkhdrsize, int lladdr_off)
284 memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
285 lle->r_hdrlen = linkhdrsize;
286 lle->ll_addr = &lle->r_linkdata[lladdr_off];
287 lle->la_flags |= LLE_VALID;
288 lle->r_flags |= RLLE_VALID;
292 * Tries to update @lle link-level address.
293 * Since update requires AFDATA WLOCK, function
294 * drops @lle lock, acquires AFDATA lock and then acquires
295 * @lle lock to maintain lock order.
297 * Returns 1 on success.
300 lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
301 const char *linkhdr, size_t linkhdrsize, int lladdr_off)
304 /* Perform real LLE update */
305 /* use afdata WLOCK to update fields */
306 LLE_WLOCK_ASSERT(lle);
309 IF_AFDATA_WLOCK(ifp);
313 * Since we droppped LLE lock, other thread might have deleted
314 * this lle. Check and return
316 if ((lle->la_flags & LLE_DELETED) != 0) {
317 IF_AFDATA_WUNLOCK(ifp);
318 LLE_FREE_LOCKED(lle);
323 lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off);
325 IF_AFDATA_WUNLOCK(ifp);
333 * Helper function used to pre-compute full/partial link-layer
334 * header data suitable for feeding into if_output().
337 lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
338 char *buf, size_t *bufsize, int *lladdr_off)
340 struct if_encap_req ereq;
343 bzero(buf, *bufsize);
344 bzero(&ereq, sizeof(ereq));
346 ereq.bufsize = *bufsize;
347 ereq.rtype = IFENCAP_LL;
348 ereq.family = family;
349 ereq.lladdr = lladdr;
350 ereq.lladdr_len = ifp->if_addrlen;
351 error = ifp->if_requestencap(ifp, &ereq);
353 *bufsize = ereq.bufsize;
354 *lladdr_off = ereq.lladdr_off;
361 * Update link-layer header for given @lle after
362 * interface lladdr was changed.
365 llentry_update_ifaddr(struct lltable *llt, struct llentry *lle, void *farg)
368 u_char linkhdr[LLE_MAX_LINKHDR];
373 ifp = (struct ifnet *)farg;
375 lladdr = lle->ll_addr;
378 if ((lle->la_flags & LLE_VALID) == 0) {
383 if ((lle->la_flags & LLE_IFADDR) != 0)
384 lladdr = IF_LLADDR(ifp);
386 linkhdrsize = sizeof(linkhdr);
387 lltable_calc_llheader(ifp, llt->llt_af, lladdr, linkhdr, &linkhdrsize,
389 memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
396 * Update all calculated headers for given @llt
399 lltable_update_ifaddr(struct lltable *llt)
402 if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
405 IF_AFDATA_WLOCK(llt->llt_ifp);
406 lltable_foreach_lle(llt, llentry_update_ifaddr, llt->llt_ifp);
407 IF_AFDATA_WUNLOCK(llt->llt_ifp);
412 * Performs generic cleanup routines and frees lle.
414 * Called for non-linked entries, with callouts and
415 * other AF-specific cleanups performed.
417 * @lle must be passed WLOCK'ed
419 * Returns the number of held packets, if any, that were dropped.
422 llentry_free(struct llentry *lle)
426 LLE_WLOCK_ASSERT(lle);
428 KASSERT((lle->la_flags & LLE_LINKED) == 0, ("freeing linked lle"));
430 pkts_dropped = lltable_drop_entry_queue(lle);
433 if (callout_stop(&lle->lle_timer) > 0)
435 LLE_FREE_LOCKED(lle);
437 return (pkts_dropped);
441 * (al)locate an llentry for address dst (equivalent to rtalloc for new-arp).
443 * If found the llentry * is returned referenced and unlocked.
446 llentry_alloc(struct ifnet *ifp, struct lltable *lt,
447 struct sockaddr_storage *dst)
449 struct llentry *la, *la_tmp;
451 IF_AFDATA_RLOCK(ifp);
452 la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
453 IF_AFDATA_RUNLOCK(ifp);
461 if ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
462 la = lltable_alloc_entry(lt, 0, (struct sockaddr *)dst);
465 IF_AFDATA_WLOCK(ifp);
467 /* Prefer any existing LLE over newly-created one */
468 la_tmp = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
470 lltable_link_entry(lt, la);
471 IF_AFDATA_WUNLOCK(ifp);
472 if (la_tmp != NULL) {
473 lltable_free_entry(lt, la);
484 * Free all entries from given table and free itself.
488 lltable_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
490 struct llentries *dchain;
492 dchain = (struct llentries *)farg;
495 LIST_INSERT_HEAD(dchain, lle, lle_chain);
501 * Free all entries from given table and free itself.
504 lltable_free(struct lltable *llt)
506 struct llentry *lle, *next;
507 struct llentries dchain;
509 KASSERT(llt != NULL, ("%s: llt is NULL", __func__));
514 IF_AFDATA_WLOCK(llt->llt_ifp);
515 /* Push all lles to @dchain */
516 lltable_foreach_lle(llt, lltable_free_cb, &dchain);
517 llentries_unlink(llt, &dchain);
518 IF_AFDATA_WUNLOCK(llt->llt_ifp);
520 LIST_FOREACH_SAFE(lle, &dchain, lle_chain, next) {
524 llt->llt_free_tbl(llt);
529 lltable_drain(int af)
536 SLIST_FOREACH(llt, &V_lltables, llt_link) {
537 if (llt->llt_af != af)
540 for (i=0; i < llt->llt_hsize; i++) {
541 LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
544 m_freem(lle->la_hold);
556 * Deletes an address from given lltable.
557 * Used for userland interaction to remove
558 * individual entries. Skips entries added by OS.
561 lltable_delete_addr(struct lltable *llt, u_int flags,
562 const struct sockaddr *l3addr)
568 IF_AFDATA_WLOCK(ifp);
569 lle = lla_lookup(llt, LLE_EXCLUSIVE, l3addr);
572 IF_AFDATA_WUNLOCK(ifp);
575 if ((lle->la_flags & LLE_IFADDR) != 0 && (flags & LLE_IFADDR) == 0) {
576 IF_AFDATA_WUNLOCK(ifp);
581 lltable_unlink_entry(llt, lle);
582 IF_AFDATA_WUNLOCK(ifp);
584 llt->llt_delete_entry(llt, lle);
590 lltable_prefix_free(int af, struct sockaddr *addr, struct sockaddr *mask,
596 SLIST_FOREACH(llt, &V_lltables, llt_link) {
597 if (llt->llt_af != af)
600 llt->llt_prefix_free(llt, addr, mask, flags);
606 lltable_allocate_htbl(uint32_t hsize)
611 llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK | M_ZERO);
612 llt->llt_hsize = hsize;
613 llt->lle_head = malloc(sizeof(struct llentries) * hsize,
614 M_LLTABLE, M_WAITOK | M_ZERO);
616 for (i = 0; i < llt->llt_hsize; i++)
617 LIST_INIT(&llt->lle_head[i]);
619 /* Set some default callbacks */
620 llt->llt_link_entry = htable_link_entry;
621 llt->llt_unlink_entry = htable_unlink_entry;
622 llt->llt_prefix_free = htable_prefix_free;
623 llt->llt_foreach_entry = htable_foreach_lle;
624 llt->llt_free_tbl = htable_free_tbl;
630 * Links lltable to global llt list.
633 lltable_link(struct lltable *llt)
637 SLIST_INSERT_HEAD(&V_lltables, llt, llt_link);
642 lltable_unlink(struct lltable *llt)
646 SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
652 * External methods used by lltable consumers
656 lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
659 return (llt->llt_foreach_entry(llt, f, farg));
663 lltable_alloc_entry(struct lltable *llt, u_int flags,
664 const struct sockaddr *l3addr)
667 return (llt->llt_alloc_entry(llt, flags, l3addr));
671 lltable_free_entry(struct lltable *llt, struct llentry *lle)
674 llt->llt_free_entry(llt, lle);
678 lltable_link_entry(struct lltable *llt, struct llentry *lle)
681 llt->llt_link_entry(llt, lle);
685 lltable_unlink_entry(struct lltable *llt, struct llentry *lle)
688 llt->llt_unlink_entry(lle);
692 lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
697 llt->llt_fill_sa_entry(lle, sa);
701 lltable_get_ifp(const struct lltable *llt)
704 return (llt->llt_ifp);
708 lltable_get_af(const struct lltable *llt)
711 return (llt->llt_af);
715 * Called in route_output when rtm_flags contains RTF_LLDATA.
718 lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
720 struct sockaddr_dl *dl =
721 (struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY];
722 struct sockaddr *dst = (struct sockaddr *)info->rti_info[RTAX_DST];
725 struct llentry *lle, *lle_tmp;
726 uint8_t linkhdr[LLE_MAX_LINKHDR];
732 KASSERT(dl != NULL && dl->sdl_family == AF_LINK,
733 ("%s: invalid dl\n", __func__));
735 ifp = ifnet_byindex(dl->sdl_index);
737 log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
738 __func__, dl->sdl_index);
742 /* XXX linked list may be too expensive */
744 SLIST_FOREACH(llt, &V_lltables, llt_link) {
745 if (llt->llt_af == dst->sa_family &&
750 KASSERT(llt != NULL, ("Yep, ugly hacks are bad\n"));
754 switch (rtm->rtm_type) {
758 if (rtm->rtm_rmx.rmx_expire == 0)
759 laflags = LLE_STATIC;
760 lle = lltable_alloc_entry(llt, laflags, dst);
764 linkhdrsize = sizeof(linkhdr);
765 if (lltable_calc_llheader(ifp, dst->sa_family, LLADDR(dl),
766 linkhdr, &linkhdrsize, &lladdr_off) != 0)
768 lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
770 if ((rtm->rtm_flags & RTF_ANNOUNCE))
771 lle->la_flags |= LLE_PUB;
772 lle->la_expire = rtm->rtm_rmx.rmx_expire;
774 laflags = lle->la_flags;
776 /* Try to link new entry */
778 IF_AFDATA_WLOCK(ifp);
780 lle_tmp = lla_lookup(llt, LLE_EXCLUSIVE, dst);
781 if (lle_tmp != NULL) {
782 /* Check if we are trying to replace immutable entry */
783 if ((lle_tmp->la_flags & LLE_IFADDR) != 0) {
784 IF_AFDATA_WUNLOCK(ifp);
785 LLE_WUNLOCK(lle_tmp);
786 lltable_free_entry(llt, lle);
789 /* Unlink existing entry from table */
790 lltable_unlink_entry(llt, lle_tmp);
792 lltable_link_entry(llt, lle);
793 IF_AFDATA_WUNLOCK(ifp);
795 if (lle_tmp != NULL) {
796 EVENTHANDLER_INVOKE(lle_event, lle_tmp,LLENTRY_EXPIRED);
797 lltable_free_entry(llt, lle_tmp);
801 * By invoking LLE handler here we might get
802 * two events on static LLE entry insertion
803 * in routing socket. However, since we might have
804 * other subscribers we need to generate this event.
806 EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED);
810 if ((laflags & LLE_PUB) && dst->sa_family == AF_INET)
812 &((struct sockaddr_in *)dst)->sin_addr,
813 &((struct sockaddr_in *)dst)->sin_addr,
814 (u_char *)LLADDR(dl));
820 return (lltable_delete_addr(llt, 0, dst));
832 struct sockaddr l3_addr;
836 llatbl_lle_show(struct llentry_sa *la)
842 db_printf("lle=%p\n", lle);
843 db_printf(" lle_next=%p\n", lle->lle_next.le_next);
844 db_printf(" lle_lock=%p\n", &lle->lle_lock);
845 db_printf(" lle_tbl=%p\n", lle->lle_tbl);
846 db_printf(" lle_head=%p\n", lle->lle_head);
847 db_printf(" la_hold=%p\n", lle->la_hold);
848 db_printf(" la_numheld=%d\n", lle->la_numheld);
849 db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire);
850 db_printf(" la_flags=0x%04x\n", lle->la_flags);
851 db_printf(" la_asked=%u\n", lle->la_asked);
852 db_printf(" la_preempt=%u\n", lle->la_preempt);
853 db_printf(" ln_state=%d\n", lle->ln_state);
854 db_printf(" ln_router=%u\n", lle->ln_router);
855 db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick);
856 db_printf(" lle_refcnt=%d\n", lle->lle_refcnt);
857 bcopy(lle->ll_addr, octet, sizeof(octet));
858 db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n",
859 octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]);
860 db_printf(" lle_timer=%p\n", &lle->lle_timer);
862 switch (la->l3_addr.sa_family) {
866 struct sockaddr_in *sin;
867 char l3s[INET_ADDRSTRLEN];
869 sin = (struct sockaddr_in *)&la->l3_addr;
870 inet_ntoa_r(sin->sin_addr, l3s);
871 db_printf(" l3_addr=%s\n", l3s);
878 struct sockaddr_in6 *sin6;
879 char l3s[INET6_ADDRSTRLEN];
881 sin6 = (struct sockaddr_in6 *)&la->l3_addr;
882 ip6_sprintf(l3s, &sin6->sin6_addr);
883 db_printf(" l3_addr=%s\n", l3s);
888 db_printf(" l3_addr=N/A (af=%d)\n", la->l3_addr.sa_family);
893 DB_SHOW_COMMAND(llentry, db_show_llentry)
897 db_printf("usage: show llentry <struct llentry *>\n");
901 llatbl_lle_show((struct llentry_sa *)addr);
905 llatbl_llt_show(struct lltable *llt)
910 db_printf("llt=%p llt_af=%d llt_ifp=%p\n",
911 llt, llt->llt_af, llt->llt_ifp);
913 for (i = 0; i < llt->llt_hsize; i++) {
914 LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
916 llatbl_lle_show((struct llentry_sa *)lle);
923 DB_SHOW_COMMAND(lltable, db_show_lltable)
927 db_printf("usage: show lltable <struct lltable *>\n");
931 llatbl_llt_show((struct lltable *)addr);
934 DB_SHOW_ALL_COMMAND(lltables, db_show_all_lltables)
936 VNET_ITERATOR_DECL(vnet_iter);
939 VNET_FOREACH(vnet_iter) {
940 CURVNET_SET_QUIET(vnet_iter);
942 db_printf("vnet=%p\n", curvnet);
944 SLIST_FOREACH(llt, &V_lltables, llt_link) {
945 db_printf("llt=%p llt_af=%d llt_ifp=%p(%s)\n",
946 llt, llt->llt_af, llt->llt_ifp,
947 (llt->llt_ifp != NULL) ?
948 llt->llt_ifp->if_xname : "?");
949 if (have_addr && addr != 0) /* verbose */
950 llatbl_llt_show(llt);