2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved.
5 * Copyright (c) 2004-2008 Qing Li. All rights reserved.
6 * Copyright (c) 2008 Kip Macy. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
34 #include "opt_inet6.h"
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/eventhandler.h>
39 #include <sys/malloc.h>
41 #include <sys/syslog.h>
42 #include <sys/sysctl.h>
43 #include <sys/socket.h>
44 #include <sys/kernel.h>
46 #include <sys/mutex.h>
47 #include <sys/rwlock.h>
55 #include <netinet/in.h>
56 #include <net/if_llatbl.h>
58 #include <net/if_dl.h>
59 #include <net/if_var.h>
60 #include <net/route.h>
61 #include <net/route/route_ctl.h>
63 #include <netinet/if_ether.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/nd6.h>
67 MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
69 VNET_DEFINE_STATIC(SLIST_HEAD(, lltable), lltables) =
70 SLIST_HEAD_INITIALIZER(lltables);
71 #define V_lltables VNET(lltables)
73 static struct rwlock lltable_list_lock;
74 RW_SYSINIT(lltable_list_lock, &lltable_list_lock, "lltable_list_lock");
75 #define LLTABLE_LIST_RLOCK() rw_rlock(&lltable_list_lock)
76 #define LLTABLE_LIST_RUNLOCK() rw_runlock(&lltable_list_lock)
77 #define LLTABLE_LIST_WLOCK() rw_wlock(&lltable_list_lock)
78 #define LLTABLE_LIST_WUNLOCK() rw_wunlock(&lltable_list_lock)
79 #define LLTABLE_LIST_LOCK_ASSERT() rw_assert(&lltable_list_lock, RA_LOCKED)
81 static void lltable_unlink(struct lltable *llt);
82 static void llentries_unlink(struct lltable *llt, struct llentries *head);
85 * Dump lle state for a specific address family.
88 lltable_dump_af(struct lltable *llt, struct sysctl_req *wr)
90 struct epoch_tracker et;
93 LLTABLE_LIST_LOCK_ASSERT();
95 if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
100 error = lltable_foreach_lle(llt,
101 (llt_foreach_cb_t *)llt->llt_dump_entry, wr);
108 * Dump arp state for a specific address family.
111 lltable_sysctl_dumparp(int af, struct sysctl_req *wr)
116 LLTABLE_LIST_RLOCK();
117 SLIST_FOREACH(llt, &V_lltables, llt_link) {
118 if (llt->llt_af == af) {
119 error = lltable_dump_af(llt, wr);
125 LLTABLE_LIST_RUNLOCK();
130 * Common function helpers for chained hash table.
134 * Runs specified callback for each entry in @llt.
135 * Caller does the locking.
139 htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
141 struct llentry *lle, *next;
146 for (i = 0; i < llt->llt_hsize; i++) {
147 CK_LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
148 error = f(llt, lle, farg);
158 * The htable_[un]link_entry() functions return:
159 * 0 if the entry was (un)linked already and nothing changed,
160 * 1 if the entry was added/removed to/from the table, and
161 * -1 on error (e.g., not being able to add the entry due to limits reached).
162 * While the "unlink" operation should never error, callers of
163 * lltable_link_entry() need to check for errors and handle them.
166 htable_link_entry(struct lltable *llt, struct llentry *lle)
168 struct llentries *lleh;
171 if ((lle->la_flags & LLE_LINKED) != 0)
174 IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);
176 if (llt->llt_maxentries > 0 &&
177 llt->llt_entries >= llt->llt_maxentries)
180 hashidx = llt->llt_hash(lle, llt->llt_hsize);
181 lleh = &llt->lle_head[hashidx];
184 lle->lle_head = lleh;
185 lle->la_flags |= LLE_LINKED;
186 CK_LIST_INSERT_HEAD(lleh, lle, lle_next);
193 htable_unlink_entry(struct llentry *lle)
197 if ((lle->la_flags & LLE_LINKED) == 0)
201 IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);
202 KASSERT(llt->llt_entries > 0, ("%s: lltable %p (%s) entries %d <= 0",
203 __func__, llt, if_name(llt->llt_ifp), llt->llt_entries));
205 CK_LIST_REMOVE(lle, lle_next);
206 lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
209 lle->lle_head = NULL;
216 struct prefix_match_data {
217 const struct sockaddr *addr;
218 const struct sockaddr *mask;
219 struct llentries dchain;
224 htable_prefix_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
226 struct prefix_match_data *pmd;
228 pmd = (struct prefix_match_data *)farg;
230 if (llt->llt_match_prefix(pmd->addr, pmd->mask, pmd->flags, lle)) {
232 CK_LIST_INSERT_HEAD(&pmd->dchain, lle, lle_chain);
239 htable_prefix_free(struct lltable *llt, const struct sockaddr *addr,
240 const struct sockaddr *mask, u_int flags)
242 struct llentry *lle, *next;
243 struct prefix_match_data pmd;
245 bzero(&pmd, sizeof(pmd));
249 CK_LIST_INIT(&pmd.dchain);
251 IF_AFDATA_WLOCK(llt->llt_ifp);
252 /* Push matching lles to chain */
253 lltable_foreach_lle(llt, htable_prefix_free_cb, &pmd);
255 llentries_unlink(llt, &pmd.dchain);
256 IF_AFDATA_WUNLOCK(llt->llt_ifp);
258 CK_LIST_FOREACH_SAFE(lle, &pmd.dchain, lle_chain, next)
259 lltable_free_entry(llt, lle);
263 htable_free_tbl(struct lltable *llt)
266 free(llt->lle_head, M_LLTABLE);
267 free(llt, M_LLTABLE);
271 llentries_unlink(struct lltable *llt, struct llentries *head)
273 struct llentry *lle, *next;
275 CK_LIST_FOREACH_SAFE(lle, head, lle_chain, next)
276 llt->llt_unlink_entry(lle);
280 * Helper function used to drop all mbufs in hold queue.
282 * Returns the number of held packets, if any, that were dropped.
285 lltable_drop_entry_queue(struct llentry *lle)
290 LLE_WLOCK_ASSERT(lle);
293 while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
294 next = lle->la_hold->m_nextpkt;
295 m_freem(lle->la_hold);
301 KASSERT(lle->la_numheld == 0,
302 ("%s: la_numheld %d > 0, pkts_droped %zd", __func__,
303 lle->la_numheld, pkts_dropped));
305 return (pkts_dropped);
309 lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
310 const char *linkhdr, size_t linkhdrsize, int lladdr_off)
313 memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
314 lle->r_hdrlen = linkhdrsize;
315 lle->ll_addr = &lle->r_linkdata[lladdr_off];
316 lle->la_flags |= LLE_VALID;
317 lle->r_flags |= RLLE_VALID;
321 * Tries to update @lle link-level address.
322 * Since update requires AFDATA WLOCK, function
323 * drops @lle lock, acquires AFDATA lock and then acquires
324 * @lle lock to maintain lock order.
326 * Returns 1 on success.
329 lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
330 const char *linkhdr, size_t linkhdrsize, int lladdr_off)
333 /* Perform real LLE update */
334 /* use afdata WLOCK to update fields */
335 LLE_WLOCK_ASSERT(lle);
338 IF_AFDATA_WLOCK(ifp);
342 * Since we droppped LLE lock, other thread might have deleted
343 * this lle. Check and return
345 if ((lle->la_flags & LLE_DELETED) != 0) {
346 IF_AFDATA_WUNLOCK(ifp);
347 LLE_FREE_LOCKED(lle);
352 lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off);
354 IF_AFDATA_WUNLOCK(ifp);
362 * Helper function used to pre-compute full/partial link-layer
363 * header data suitable for feeding into if_output().
366 lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
367 char *buf, size_t *bufsize, int *lladdr_off)
369 struct if_encap_req ereq;
372 bzero(buf, *bufsize);
373 bzero(&ereq, sizeof(ereq));
375 ereq.bufsize = *bufsize;
376 ereq.rtype = IFENCAP_LL;
377 ereq.family = family;
378 ereq.lladdr = lladdr;
379 ereq.lladdr_len = ifp->if_addrlen;
380 error = ifp->if_requestencap(ifp, &ereq);
382 *bufsize = ereq.bufsize;
383 *lladdr_off = ereq.lladdr_off;
390 * Update link-layer header for given @lle after
391 * interface lladdr was changed.
394 llentry_update_ifaddr(struct lltable *llt, struct llentry *lle, void *farg)
397 u_char linkhdr[LLE_MAX_LINKHDR];
402 ifp = (struct ifnet *)farg;
404 lladdr = lle->ll_addr;
407 if ((lle->la_flags & LLE_VALID) == 0) {
412 if ((lle->la_flags & LLE_IFADDR) != 0)
413 lladdr = IF_LLADDR(ifp);
415 linkhdrsize = sizeof(linkhdr);
416 lltable_calc_llheader(ifp, llt->llt_af, lladdr, linkhdr, &linkhdrsize,
418 memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
425 * Update all calculated headers for given @llt
428 lltable_update_ifaddr(struct lltable *llt)
431 if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
434 IF_AFDATA_WLOCK(llt->llt_ifp);
435 lltable_foreach_lle(llt, llentry_update_ifaddr, llt->llt_ifp);
436 IF_AFDATA_WUNLOCK(llt->llt_ifp);
441 * Performs generic cleanup routines and frees lle.
443 * Called for non-linked entries, with callouts and
444 * other AF-specific cleanups performed.
446 * @lle must be passed WLOCK'ed
448 * Returns the number of held packets, if any, that were dropped.
451 llentry_free(struct llentry *lle)
455 LLE_WLOCK_ASSERT(lle);
457 KASSERT((lle->la_flags & LLE_LINKED) == 0, ("freeing linked lle"));
459 pkts_dropped = lltable_drop_entry_queue(lle);
462 if (callout_stop(&lle->lle_timer) > 0)
464 LLE_FREE_LOCKED(lle);
466 return (pkts_dropped);
470 * Free all entries from given table and free itself.
474 lltable_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
476 struct llentries *dchain;
478 dchain = (struct llentries *)farg;
481 CK_LIST_INSERT_HEAD(dchain, lle, lle_chain);
487 * Free all entries from given table and free itself.
490 lltable_free(struct lltable *llt)
492 struct llentry *lle, *next;
493 struct llentries dchain;
495 KASSERT(llt != NULL, ("%s: llt is NULL", __func__));
499 CK_LIST_INIT(&dchain);
500 IF_AFDATA_WLOCK(llt->llt_ifp);
501 /* Push all lles to @dchain */
502 lltable_foreach_lle(llt, lltable_free_cb, &dchain);
503 llentries_unlink(llt, &dchain);
504 IF_AFDATA_WUNLOCK(llt->llt_ifp);
506 CK_LIST_FOREACH_SAFE(lle, &dchain, lle_chain, next) {
510 KASSERT(llt->llt_entries == 0, ("%s: lltable %p (%s) entires not 0: %d",
511 __func__, llt, llt->llt_ifp->if_xname, llt->llt_entries));
513 llt->llt_free_tbl(llt);
517 * Deletes an address from given lltable.
518 * Used for userland interaction to remove
519 * individual entries. Skips entries added by OS.
522 lltable_delete_addr(struct lltable *llt, u_int flags,
523 const struct sockaddr *l3addr)
529 IF_AFDATA_WLOCK(ifp);
530 lle = lla_lookup(llt, LLE_EXCLUSIVE, l3addr);
533 IF_AFDATA_WUNLOCK(ifp);
536 if ((lle->la_flags & LLE_IFADDR) != 0 && (flags & LLE_IFADDR) == 0) {
537 IF_AFDATA_WUNLOCK(ifp);
542 lltable_unlink_entry(llt, lle);
543 IF_AFDATA_WUNLOCK(ifp);
545 llt->llt_delete_entry(llt, lle);
551 lltable_prefix_free(int af, struct sockaddr *addr, struct sockaddr *mask,
556 LLTABLE_LIST_RLOCK();
557 SLIST_FOREACH(llt, &V_lltables, llt_link) {
558 if (llt->llt_af != af)
561 llt->llt_prefix_free(llt, addr, mask, flags);
563 LLTABLE_LIST_RUNLOCK();
567 lltable_allocate_htbl(uint32_t hsize)
572 llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK | M_ZERO);
573 llt->llt_hsize = hsize;
574 llt->lle_head = malloc(sizeof(struct llentries) * hsize,
575 M_LLTABLE, M_WAITOK | M_ZERO);
577 for (i = 0; i < llt->llt_hsize; i++)
578 CK_LIST_INIT(&llt->lle_head[i]);
580 /* Set some default callbacks */
581 llt->llt_link_entry = htable_link_entry;
582 llt->llt_unlink_entry = htable_unlink_entry;
583 llt->llt_prefix_free = htable_prefix_free;
584 llt->llt_foreach_entry = htable_foreach_lle;
585 llt->llt_free_tbl = htable_free_tbl;
591 * Links lltable to global llt list.
594 lltable_link(struct lltable *llt)
597 LLTABLE_LIST_WLOCK();
598 SLIST_INSERT_HEAD(&V_lltables, llt, llt_link);
599 LLTABLE_LIST_WUNLOCK();
603 lltable_unlink(struct lltable *llt)
606 LLTABLE_LIST_WLOCK();
607 SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
608 LLTABLE_LIST_WUNLOCK();
613 * External methods used by lltable consumers
617 lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
620 return (llt->llt_foreach_entry(llt, f, farg));
624 lltable_alloc_entry(struct lltable *llt, u_int flags,
625 const struct sockaddr *l3addr)
628 return (llt->llt_alloc_entry(llt, flags, l3addr));
632 lltable_free_entry(struct lltable *llt, struct llentry *lle)
635 llt->llt_free_entry(llt, lle);
639 lltable_link_entry(struct lltable *llt, struct llentry *lle)
642 return (llt->llt_link_entry(llt, lle));
646 lltable_unlink_entry(struct lltable *llt, struct llentry *lle)
649 return (llt->llt_unlink_entry(lle));
653 lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
658 llt->llt_fill_sa_entry(lle, sa);
662 lltable_get_ifp(const struct lltable *llt)
665 return (llt->llt_ifp);
669 lltable_get_af(const struct lltable *llt)
672 return (llt->llt_af);
676 * Called in route_output when rtm_flags contains RTF_LLDATA.
679 lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
681 struct sockaddr_dl *dl =
682 (struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY];
683 struct sockaddr *dst = (struct sockaddr *)info->rti_info[RTAX_DST];
686 struct llentry *lle, *lle_tmp;
687 uint8_t linkhdr[LLE_MAX_LINKHDR];
693 if (dl == NULL || dl->sdl_family != AF_LINK)
696 ifp = ifnet_byindex(dl->sdl_index);
698 log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
699 __func__, dl->sdl_index);
703 /* XXX linked list may be too expensive */
704 LLTABLE_LIST_RLOCK();
705 SLIST_FOREACH(llt, &V_lltables, llt_link) {
706 if (llt->llt_af == dst->sa_family &&
710 LLTABLE_LIST_RUNLOCK();
716 switch (rtm->rtm_type) {
720 if (rtm->rtm_rmx.rmx_expire == 0)
721 laflags = LLE_STATIC;
722 lle = lltable_alloc_entry(llt, laflags, dst);
726 linkhdrsize = sizeof(linkhdr);
727 if (lltable_calc_llheader(ifp, dst->sa_family, LLADDR(dl),
728 linkhdr, &linkhdrsize, &lladdr_off) != 0)
730 lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
732 if ((rtm->rtm_flags & RTF_ANNOUNCE))
733 lle->la_flags |= LLE_PUB;
734 lle->la_expire = rtm->rtm_rmx.rmx_expire;
736 laflags = lle->la_flags;
738 /* Try to link new entry */
740 IF_AFDATA_WLOCK(ifp);
742 lle_tmp = lla_lookup(llt, LLE_EXCLUSIVE, dst);
743 if (lle_tmp != NULL) {
744 /* Check if we are trying to replace immutable entry */
745 if ((lle_tmp->la_flags & LLE_IFADDR) != 0) {
746 IF_AFDATA_WUNLOCK(ifp);
747 LLE_WUNLOCK(lle_tmp);
748 lltable_free_entry(llt, lle);
751 /* Unlink existing entry from table */
752 lltable_unlink_entry(llt, lle_tmp);
754 lltable_link_entry(llt, lle);
755 IF_AFDATA_WUNLOCK(ifp);
757 if (lle_tmp != NULL) {
758 EVENTHANDLER_INVOKE(lle_event, lle_tmp,LLENTRY_EXPIRED);
759 lltable_free_entry(llt, lle_tmp);
763 * By invoking LLE handler here we might get
764 * two events on static LLE entry insertion
765 * in routing socket. However, since we might have
766 * other subscribers we need to generate this event.
768 EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED);
772 if ((laflags & LLE_PUB) && dst->sa_family == AF_INET)
774 &((struct sockaddr_in *)dst)->sin_addr,
775 &((struct sockaddr_in *)dst)->sin_addr,
776 (u_char *)LLADDR(dl));
782 return (lltable_delete_addr(llt, 0, dst));
794 struct sockaddr l3_addr;
798 llatbl_lle_show(struct llentry_sa *la)
804 db_printf("lle=%p\n", lle);
805 db_printf(" lle_next=%p\n", lle->lle_next.cle_next);
806 db_printf(" lle_lock=%p\n", &lle->lle_lock);
807 db_printf(" lle_tbl=%p\n", lle->lle_tbl);
808 db_printf(" lle_head=%p\n", lle->lle_head);
809 db_printf(" la_hold=%p\n", lle->la_hold);
810 db_printf(" la_numheld=%d\n", lle->la_numheld);
811 db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire);
812 db_printf(" la_flags=0x%04x\n", lle->la_flags);
813 db_printf(" la_asked=%u\n", lle->la_asked);
814 db_printf(" la_preempt=%u\n", lle->la_preempt);
815 db_printf(" ln_state=%d\n", lle->ln_state);
816 db_printf(" ln_router=%u\n", lle->ln_router);
817 db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick);
818 db_printf(" lle_refcnt=%d\n", lle->lle_refcnt);
819 bcopy(lle->ll_addr, octet, sizeof(octet));
820 db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n",
821 octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]);
822 db_printf(" lle_timer=%p\n", &lle->lle_timer);
824 switch (la->l3_addr.sa_family) {
828 struct sockaddr_in *sin;
829 char l3s[INET_ADDRSTRLEN];
831 sin = (struct sockaddr_in *)&la->l3_addr;
832 inet_ntoa_r(sin->sin_addr, l3s);
833 db_printf(" l3_addr=%s\n", l3s);
840 struct sockaddr_in6 *sin6;
841 char l3s[INET6_ADDRSTRLEN];
843 sin6 = (struct sockaddr_in6 *)&la->l3_addr;
844 ip6_sprintf(l3s, &sin6->sin6_addr);
845 db_printf(" l3_addr=%s\n", l3s);
850 db_printf(" l3_addr=N/A (af=%d)\n", la->l3_addr.sa_family);
855 DB_SHOW_COMMAND(llentry, db_show_llentry)
859 db_printf("usage: show llentry <struct llentry *>\n");
863 llatbl_lle_show((struct llentry_sa *)addr);
867 llatbl_llt_show(struct lltable *llt)
872 db_printf("llt=%p llt_af=%d llt_ifp=%p\n",
873 llt, llt->llt_af, llt->llt_ifp);
875 for (i = 0; i < llt->llt_hsize; i++) {
876 CK_LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
877 llatbl_lle_show((struct llentry_sa *)lle);
884 DB_SHOW_COMMAND(lltable, db_show_lltable)
888 db_printf("usage: show lltable <struct lltable *>\n");
892 llatbl_llt_show((struct lltable *)addr);
895 DB_SHOW_ALL_COMMAND(lltables, db_show_all_lltables)
897 VNET_ITERATOR_DECL(vnet_iter);
900 VNET_FOREACH(vnet_iter) {
901 CURVNET_SET_QUIET(vnet_iter);
903 db_printf("vnet=%p\n", curvnet);
905 SLIST_FOREACH(llt, &V_lltables, llt_link) {
906 db_printf("llt=%p llt_af=%d llt_ifp=%p(%s)\n",
907 llt, llt->llt_af, llt->llt_ifp,
908 (llt->llt_ifp != NULL) ?
909 llt->llt_ifp->if_xname : "?");
910 if (have_addr && addr != 0) /* verbose */
911 llatbl_llt_show(llt);