1 /**************************************************************************
3 Copyright (c) 2008-2010, BitGravity Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the BitGravity Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include "opt_route.h"
31 #include "opt_mpath.h"
34 #include "opt_inet6.h"
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
39 #include <sys/param.h>
40 #include <sys/types.h>
41 #include <sys/bitstring.h>
42 #include <sys/condvar.h>
43 #include <sys/callout.h>
44 #include <sys/kernel.h>
45 #include <sys/kthread.h>
46 #include <sys/limits.h>
47 #include <sys/malloc.h>
51 #include <sys/sched.h>
53 #include <sys/socket.h>
54 #include <sys/syslog.h>
55 #include <sys/sysctl.h>
58 #include <net/if_llatbl.h>
59 #include <net/if_var.h>
60 #include <net/route.h>
61 #include <net/flowtable.h>
64 #include <netinet/in.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in_var.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip.h>
70 #include <netinet/ip6.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 #include <netinet/sctp.h>
76 #include <libkern/jenkins.h>
80 uint16_t ip_sport; /* source port */
81 uint16_t ip_dport; /* destination port */
82 in_addr_t ip_saddr; /* source address */
83 in_addr_t ip_daddr; /* destination address */
87 struct ipv4_tuple ipf_ipt;
92 uint16_t ip_sport; /* source port */
93 uint16_t ip_dport; /* destination port */
94 struct in6_addr ip_saddr; /* source address */
95 struct in6_addr ip_daddr; /* destination address */
99 struct ipv6_tuple ipf_ipt;
104 volatile uint32_t f_fhash; /* hash flowing forward */
105 uint16_t f_flags; /* flow flags */
107 uint8_t f_proto; /* protocol */
108 uint32_t f_fibnum; /* fib index */
109 uint32_t f_uptime; /* uptime at last access */
110 struct flentry *f_next; /* pointer to collision entry */
111 volatile struct rtentry *f_rt; /* rtentry for flow */
112 volatile struct llentry *f_lle; /* llentry for flow */
116 struct flentry fl_entry;
117 union ipv4_flow fl_flow;
121 struct flentry fl_entry;
122 union ipv6_flow fl_flow;
125 #define fl_fhash fl_entry.fl_fhash
126 #define fl_flags fl_entry.fl_flags
127 #define fl_proto fl_entry.fl_proto
128 #define fl_uptime fl_entry.fl_uptime
129 #define fl_rt fl_entry.fl_rt
130 #define fl_lle fl_entry.fl_lle
132 #define SECS_PER_HOUR 3600
133 #define SECS_PER_DAY (24*SECS_PER_HOUR)
137 #define FIN_WAIT_IDLE 600
138 #define TCP_IDLE SECS_PER_DAY
141 typedef void fl_lock_t(struct flowtable *, uint32_t);
142 typedef void fl_rtalloc_t(struct route *, uint32_t, u_int);
145 struct flentry **global;
146 struct flentry **pcpu[MAXCPU];
149 struct flowtable_stats {
150 uint64_t ft_collisions;
151 uint64_t ft_allocated;
153 uint64_t ft_max_depth;
154 uint64_t ft_free_checks;
158 } __aligned(CACHE_LINE_SIZE);
161 struct flowtable_stats ft_stats[MAXCPU];
167 fl_lock_t *ft_unlock;
168 fl_rtalloc_t *ft_rtalloc;
170 * XXX need to pad out
172 struct mtx *ft_locks;
173 union flentryp ft_table;
174 bitstr_t *ft_masks[MAXCPU];
175 bitstr_t *ft_tmpmask;
176 struct flowtable *ft_next;
178 uint32_t ft_count __aligned(CACHE_LINE_SIZE);
179 uint32_t ft_udp_idle __aligned(CACHE_LINE_SIZE);
180 uint32_t ft_fin_wait_idle;
181 uint32_t ft_syn_idle;
182 uint32_t ft_tcp_idle;
184 } __aligned(CACHE_LINE_SIZE);
186 static struct proc *flowcleanerproc;
187 static VNET_DEFINE(struct flowtable *, flow_list_head);
188 static VNET_DEFINE(uint32_t, flow_hashjitter);
189 static VNET_DEFINE(uma_zone_t, flow_ipv4_zone);
190 static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
192 #define V_flow_list_head VNET(flow_list_head)
193 #define V_flow_hashjitter VNET(flow_hashjitter)
194 #define V_flow_ipv4_zone VNET(flow_ipv4_zone)
195 #define V_flow_ipv6_zone VNET(flow_ipv6_zone)
198 static struct cv flowclean_cv;
199 static struct mtx flowclean_lock;
200 static uint32_t flowclean_cycles;
201 static uint32_t flowclean_freq;
203 #ifdef FLOWTABLE_DEBUG
204 #define FLDPRINTF(ft, flags, fmt, ...) \
206 if ((ft)->ft_flags & (flags)) \
207 printf((fmt), __VA_ARGS__); \
211 #define FLDPRINTF(ft, flags, fmt, ...)
218 * - Make flowtable stats per-cpu, aggregated at sysctl call time,
219 * to avoid extra cache evictions caused by incrementing a shared
221 * - add sysctls to resize && flush flow tables
222 * - Add per flowtable sysctls for statistics and configuring timeouts
223 * - add saturation counter to rtentry to support per-packet load-balancing
224 * add flag to indicate round-robin flow, add list lookup from head
226 * - add sysctl / device node / syscall to support exporting and importing
227 * of flows with flag to indicate that a flow was imported so should
228 * not be considered for auto-cleaning
229 * - support explicit connection state (currently only ad-hoc for DSR)
230 * - idetach() cleanup for options VIMAGE builds.
232 VNET_DEFINE(int, flowtable_enable) = 1;
233 static VNET_DEFINE(int, flowtable_debug);
234 static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
235 static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
236 static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
237 static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE;
238 static VNET_DEFINE(int, flowtable_nmbflows);
239 static VNET_DEFINE(int, flowtable_ready) = 0;
241 #define V_flowtable_enable VNET(flowtable_enable)
242 #define V_flowtable_debug VNET(flowtable_debug)
243 #define V_flowtable_syn_expire VNET(flowtable_syn_expire)
244 #define V_flowtable_udp_expire VNET(flowtable_udp_expire)
245 #define V_flowtable_fin_wait_expire VNET(flowtable_fin_wait_expire)
246 #define V_flowtable_tcp_expire VNET(flowtable_tcp_expire)
247 #define V_flowtable_nmbflows VNET(flowtable_nmbflows)
248 #define V_flowtable_ready VNET(flowtable_ready)
250 SYSCTL_NODE(_net_inet, OID_AUTO, flowtable, CTLFLAG_RD, NULL, "flowtable");
251 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, debug, CTLFLAG_RW,
252 &VNET_NAME(flowtable_debug), 0, "print debug info.");
253 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, enable, CTLFLAG_RW,
254 &VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
257 * XXX This does not end up updating timeouts at runtime
258 * and only reflects the value for the last table added :-/
260 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, syn_expire, CTLFLAG_RW,
261 &VNET_NAME(flowtable_syn_expire), 0,
262 "seconds after which to remove syn allocated flow.");
263 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, udp_expire, CTLFLAG_RW,
264 &VNET_NAME(flowtable_udp_expire), 0,
265 "seconds after which to remove flow allocated to UDP.");
266 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, fin_wait_expire, CTLFLAG_RW,
267 &VNET_NAME(flowtable_fin_wait_expire), 0,
268 "seconds after which to remove a flow in FIN_WAIT.");
269 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, tcp_expire, CTLFLAG_RW,
270 &VNET_NAME(flowtable_tcp_expire), 0,
271 "seconds after which to remove flow allocated to a TCP connection.");
275 * Maximum number of flows that can be allocated of a given type.
277 * The table is allocated at boot time (for the pure caching case
278 * there is no reason why this could not be changed at runtime)
279 * and thus (currently) needs to be set with a tunable.
282 sysctl_nmbflows(SYSCTL_HANDLER_ARGS)
284 int error, newnmbflows;
286 newnmbflows = V_flowtable_nmbflows;
287 error = sysctl_handle_int(oidp, &newnmbflows, 0, req);
288 if (error == 0 && req->newptr) {
289 if (newnmbflows > V_flowtable_nmbflows) {
290 V_flowtable_nmbflows = newnmbflows;
291 uma_zone_set_max(V_flow_ipv4_zone,
292 V_flowtable_nmbflows);
293 uma_zone_set_max(V_flow_ipv6_zone,
294 V_flowtable_nmbflows);
300 SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
301 CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_nmbflows, "IU",
302 "Maximum number of flows allowed");
306 #define FS_PRINT(sb, field) sbuf_printf((sb), "\t%s: %jd\n", #field, fs->ft_##field)
309 fs_print(struct sbuf *sb, struct flowtable_stats *fs)
312 FS_PRINT(sb, collisions);
313 FS_PRINT(sb, allocated);
314 FS_PRINT(sb, misses);
315 FS_PRINT(sb, max_depth);
316 FS_PRINT(sb, free_checks);
319 FS_PRINT(sb, lookups);
323 flowtable_show_stats(struct sbuf *sb, struct flowtable *ft)
326 struct flowtable_stats fs, *pfs;
328 if (ft->ft_flags & FL_PCPU) {
329 bzero(&fs, sizeof(fs));
331 for (i = 0; i <= mp_maxid; i++) {
334 pfs->ft_collisions += ft->ft_stats[i].ft_collisions;
335 pfs->ft_allocated += ft->ft_stats[i].ft_allocated;
336 pfs->ft_misses += ft->ft_stats[i].ft_misses;
337 pfs->ft_free_checks += ft->ft_stats[i].ft_free_checks;
338 pfs->ft_frees += ft->ft_stats[i].ft_frees;
339 pfs->ft_hits += ft->ft_stats[i].ft_hits;
340 pfs->ft_lookups += ft->ft_stats[i].ft_lookups;
341 if (ft->ft_stats[i].ft_max_depth > pfs->ft_max_depth)
342 pfs->ft_max_depth = ft->ft_stats[i].ft_max_depth;
345 pfs = &ft->ft_stats[0];
351 sysctl_flowtable_stats(SYSCTL_HANDLER_ARGS)
353 struct flowtable *ft;
357 sb = sbuf_new(NULL, NULL, 64*1024, SBUF_FIXEDLEN);
359 ft = V_flow_list_head;
361 sbuf_printf(sb, "\ntable name: %s\n", ft->ft_name);
362 flowtable_show_stats(sb, ft);
366 error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
371 SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
372 NULL, 0, sysctl_flowtable_stats, "A", "flowtable statistics");
377 in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
380 rtalloc_ign_fib(ro, 0, fibnum);
385 flowtable_global_lock(struct flowtable *table, uint32_t hash)
387 int lock_index = (hash)&(table->ft_lock_count - 1);
389 mtx_lock(&table->ft_locks[lock_index]);
393 flowtable_global_unlock(struct flowtable *table, uint32_t hash)
395 int lock_index = (hash)&(table->ft_lock_count - 1);
397 mtx_unlock(&table->ft_locks[lock_index]);
401 flowtable_pcpu_lock(struct flowtable *table, uint32_t hash)
408 flowtable_pcpu_unlock(struct flowtable *table, uint32_t hash)
414 #define FL_ENTRY_INDEX(table, hash)((hash) % (table)->ft_size)
415 #define FL_ENTRY(table, hash) *flowtable_entry((table), (hash))
416 #define FL_ENTRY_LOCK(table, hash) (table)->ft_lock((table), (hash))
417 #define FL_ENTRY_UNLOCK(table, hash) (table)->ft_unlock((table), (hash))
419 #define FL_STALE (1<<8)
420 #define FL_IPV6 (1<<9)
421 #define FL_OVERWRITE (1<<10)
424 flow_invalidate(struct flentry *fle)
427 fle->f_flags |= FL_STALE;
431 proto_to_flags(uint8_t proto)
454 flags_to_proto(int flags)
456 int proto, protoflags;
458 protoflags = flags & (FL_TCP|FL_SCTP|FL_UDP);
459 switch (protoflags) {
464 proto = IPPROTO_SCTP;
477 #ifdef FLOWTABLE_DEBUG
479 ipv4_flow_print_tuple(int flags, int proto, struct sockaddr_in *ssin,
480 struct sockaddr_in *dsin)
482 char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
484 if (flags & FL_HASH_ALL) {
485 inet_ntoa_r(ssin->sin_addr, saddr);
486 inet_ntoa_r(dsin->sin_addr, daddr);
487 printf("proto=%d %s:%d->%s:%d\n",
488 proto, saddr, ntohs(ssin->sin_port), daddr,
489 ntohs(dsin->sin_port));
491 inet_ntoa_r(*(struct in_addr *) &dsin->sin_addr, daddr);
492 printf("proto=%d %s\n", proto, daddr);
499 ipv4_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
500 struct sockaddr_in *ssin, struct sockaddr_in *dsin, uint16_t *flags)
508 uint16_t sport, dport;
510 proto = sport = dport = 0;
511 ip = mtod(m, struct ip *);
512 dsin->sin_family = AF_INET;
513 dsin->sin_len = sizeof(*dsin);
514 dsin->sin_addr = ip->ip_dst;
515 ssin->sin_family = AF_INET;
516 ssin->sin_len = sizeof(*ssin);
517 ssin->sin_addr = ip->ip_src;
520 if ((*flags & FL_HASH_ALL) == 0) {
521 FLDPRINTF(ft, FL_DEBUG_ALL, "skip port check flags=0x%x ",
526 iphlen = ip->ip_hl << 2; /* XXX options? */
530 th = (struct tcphdr *)((caddr_t)ip + iphlen);
531 sport = th->th_sport;
532 dport = th->th_dport;
533 if ((*flags & FL_HASH_ALL) &&
534 (th->th_flags & (TH_RST|TH_FIN)))
538 uh = (struct udphdr *)((caddr_t)ip + iphlen);
539 sport = uh->uh_sport;
540 dport = uh->uh_dport;
543 sh = (struct sctphdr *)((caddr_t)ip + iphlen);
544 sport = sh->src_port;
545 dport = sh->dest_port;
548 FLDPRINTF(ft, FL_DEBUG_ALL, "proto=0x%x not supported\n", proto);
550 /* no port - hence not a protocol we care about */
556 *flags |= proto_to_flags(proto);
557 ssin->sin_port = sport;
558 dsin->sin_port = dport;
563 ipv4_flow_lookup_hash_internal(
564 struct sockaddr_in *ssin, struct sockaddr_in *dsin,
565 uint32_t *key, uint16_t flags)
567 uint16_t sport, dport;
571 if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
573 proto = flags_to_proto(flags);
574 sport = dport = key[2] = key[1] = key[0] = 0;
575 if ((ssin != NULL) && (flags & FL_HASH_ALL)) {
576 key[1] = ssin->sin_addr.s_addr;
577 sport = ssin->sin_port;
580 key[2] = dsin->sin_addr.s_addr;
581 dport = dsin->sin_port;
583 if (flags & FL_HASH_ALL) {
584 ((uint16_t *)key)[0] = sport;
585 ((uint16_t *)key)[1] = dport;
587 offset = V_flow_hashjitter + proto;
589 return (jenkins_hashword(key, 3, offset));
592 static struct flentry *
593 flowtable_lookup_mbuf4(struct flowtable *ft, struct mbuf *m)
595 struct sockaddr_storage ssa, dsa;
597 struct sockaddr_in *dsin, *ssin;
599 dsin = (struct sockaddr_in *)&dsa;
600 ssin = (struct sockaddr_in *)&ssa;
601 flags = ft->ft_flags;
602 if (ipv4_mbuf_demarshal(ft, m, ssin, dsin, &flags) != 0)
605 return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
609 flow_to_route(struct flentry *fle, struct route *ro)
611 uint32_t *hashkey = NULL;
612 struct sockaddr_in *sin;
614 sin = (struct sockaddr_in *)&ro->ro_dst;
615 sin->sin_family = AF_INET;
616 sin->sin_len = sizeof(*sin);
617 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
618 sin->sin_addr.s_addr = hashkey[2];
619 ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
620 ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
626 * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
627 * then it sets p to point at the offset "len" in the mbuf. WARNING: the
628 * pointer might become stale after other pullups (but we never use it
631 #define PULLUP_TO(_len, p, T) \
633 int x = (_len) + sizeof(T); \
634 if ((m)->m_len < x) { \
635 goto receive_failed; \
637 p = (mtod(m, char *) + (_len)); \
640 #define TCP(p) ((struct tcphdr *)(p))
641 #define SCTP(p) ((struct sctphdr *)(p))
642 #define UDP(p) ((struct udphdr *)(p))
645 ipv6_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
646 struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, uint16_t *flags)
651 uint16_t src_port, dst_port;
655 offset = hlen = src_port = dst_port = 0;
657 ip6 = mtod(m, struct ip6_hdr *);
658 hlen = sizeof(struct ip6_hdr);
659 proto = ip6->ip6_nxt;
661 if ((*flags & FL_HASH_ALL) == 0)
664 while (ulp == NULL) {
667 case IPPROTO_OSPFIGP:
675 PULLUP_TO(hlen, ulp, struct tcphdr);
676 dst_port = TCP(ulp)->th_dport;
677 src_port = TCP(ulp)->th_sport;
678 if ((*flags & FL_HASH_ALL) &&
679 (TCP(ulp)->th_flags & (TH_RST|TH_FIN)))
683 PULLUP_TO(hlen, ulp, struct sctphdr);
684 src_port = SCTP(ulp)->src_port;
685 dst_port = SCTP(ulp)->dest_port;
688 PULLUP_TO(hlen, ulp, struct udphdr);
689 dst_port = UDP(ulp)->uh_dport;
690 src_port = UDP(ulp)->uh_sport;
692 case IPPROTO_HOPOPTS: /* RFC 2460 */
693 PULLUP_TO(hlen, ulp, struct ip6_hbh);
694 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
695 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
698 case IPPROTO_ROUTING: /* RFC 2460 */
699 PULLUP_TO(hlen, ulp, struct ip6_rthdr);
700 hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
701 proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
704 case IPPROTO_FRAGMENT: /* RFC 2460 */
705 PULLUP_TO(hlen, ulp, struct ip6_frag);
706 hlen += sizeof (struct ip6_frag);
707 proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
708 offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
712 case IPPROTO_DSTOPTS: /* RFC 2460 */
713 PULLUP_TO(hlen, ulp, struct ip6_hbh);
714 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
715 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
718 case IPPROTO_AH: /* RFC 2402 */
719 PULLUP_TO(hlen, ulp, struct ip6_ext);
720 hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
721 proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
725 PULLUP_TO(hlen, ulp, struct ip6_ext);
736 dsin6->sin6_family = AF_INET6;
737 dsin6->sin6_len = sizeof(*dsin6);
738 dsin6->sin6_port = dst_port;
739 memcpy(&dsin6->sin6_addr, &ip6->ip6_dst, sizeof(struct in6_addr));
741 ssin6->sin6_family = AF_INET6;
742 ssin6->sin6_len = sizeof(*ssin6);
743 ssin6->sin6_port = src_port;
744 memcpy(&ssin6->sin6_addr, &ip6->ip6_src, sizeof(struct in6_addr));
745 *flags |= proto_to_flags(proto);
750 #define zero_key(key) \
764 ipv6_flow_lookup_hash_internal(
765 struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6,
766 uint32_t *key, uint16_t flags)
768 uint16_t sport, dport;
772 if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
775 proto = flags_to_proto(flags);
779 memcpy(&key[1], &dsin6->sin6_addr, sizeof(struct in6_addr));
780 dport = dsin6->sin6_port;
782 if ((ssin6 != NULL) && (flags & FL_HASH_ALL)) {
783 memcpy(&key[5], &ssin6->sin6_addr, sizeof(struct in6_addr));
784 sport = ssin6->sin6_port;
786 if (flags & FL_HASH_ALL) {
787 ((uint16_t *)key)[0] = sport;
788 ((uint16_t *)key)[1] = dport;
790 offset = V_flow_hashjitter + proto;
792 return (jenkins_hashword(key, 9, offset));
795 static struct flentry *
796 flowtable_lookup_mbuf6(struct flowtable *ft, struct mbuf *m)
798 struct sockaddr_storage ssa, dsa;
799 struct sockaddr_in6 *dsin6, *ssin6;
802 dsin6 = (struct sockaddr_in6 *)&dsa;
803 ssin6 = (struct sockaddr_in6 *)&ssa;
804 flags = ft->ft_flags;
806 if (ipv6_mbuf_demarshal(ft, m, ssin6, dsin6, &flags) != 0)
809 return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
813 flow_to_route_in6(struct flentry *fle, struct route_in6 *ro)
815 uint32_t *hashkey = NULL;
816 struct sockaddr_in6 *sin6;
818 sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
820 sin6->sin6_family = AF_INET6;
821 sin6->sin6_len = sizeof(*sin6);
822 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
823 memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
824 ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
825 ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
831 flowtable_mask(struct flowtable *ft)
835 if (ft->ft_flags & FL_PCPU)
836 mask = ft->ft_masks[curcpu];
838 mask = ft->ft_masks[0];
843 static struct flentry **
844 flowtable_entry(struct flowtable *ft, uint32_t hash)
846 struct flentry **fle;
847 int index = (hash % ft->ft_size);
849 if (ft->ft_flags & FL_PCPU) {
850 KASSERT(&ft->ft_table.pcpu[curcpu][0] != NULL, ("pcpu not set"));
851 fle = &ft->ft_table.pcpu[curcpu][index];
853 KASSERT(&ft->ft_table.global[0] != NULL, ("global not set"));
854 fle = &ft->ft_table.global[index];
861 flow_stale(struct flowtable *ft, struct flentry *fle)
865 if ((fle->f_fhash == 0)
866 || ((fle->f_rt->rt_flags & RTF_HOST) &&
867 ((fle->f_rt->rt_flags & (RTF_UP))
869 || (fle->f_rt->rt_ifp == NULL))
872 idle_time = time_uptime - fle->f_uptime;
874 if ((fle->f_flags & FL_STALE) ||
875 ((fle->f_flags & (TH_SYN|TH_ACK|TH_FIN)) == 0
876 && (idle_time > ft->ft_udp_idle)) ||
877 ((fle->f_flags & TH_FIN)
878 && (idle_time > ft->ft_fin_wait_idle)) ||
879 ((fle->f_flags & (TH_SYN|TH_ACK)) == TH_SYN
880 && (idle_time > ft->ft_syn_idle)) ||
881 ((fle->f_flags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)
882 && (idle_time > ft->ft_tcp_idle)) ||
883 ((fle->f_rt->rt_flags & RTF_UP) == 0 ||
884 (fle->f_rt->rt_ifp == NULL)))
891 flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
896 if (fle->f_flags & FL_IPV6) {
898 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
901 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
904 for (i = 0; i < nwords; i++)
908 static struct flentry *
909 flow_alloc(struct flowtable *ft)
911 struct flentry *newfle;
915 zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
917 newfle = uma_zalloc(zone, M_NOWAIT | M_ZERO);
919 atomic_add_int(&ft->ft_count, 1);
924 flow_free(struct flentry *fle, struct flowtable *ft)
928 zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
929 atomic_add_int(&ft->ft_count, -1);
930 uma_zfree(zone, fle);
934 flow_full(struct flowtable *ft)
940 count = ft->ft_count;
942 if (full && (count < (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 3))))
944 else if (!full && (count > (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 5))))
947 if (full && !ft->ft_full) {
948 flowclean_freq = 4*hz;
949 if ((ft->ft_flags & FL_HASH_ALL) == 0)
950 ft->ft_udp_idle = ft->ft_fin_wait_idle =
951 ft->ft_syn_idle = ft->ft_tcp_idle = 5;
952 cv_broadcast(&flowclean_cv);
953 } else if (!full && ft->ft_full) {
954 flowclean_freq = 20*hz;
955 if ((ft->ft_flags & FL_HASH_ALL) == 0)
956 ft->ft_udp_idle = ft->ft_fin_wait_idle =
957 ft->ft_syn_idle = ft->ft_tcp_idle = 30;
960 return (ft->ft_full);
964 flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
965 uint32_t fibnum, struct route *ro, uint16_t flags)
967 struct flentry *fle, *fletail, *newfle, **flep;
968 struct flowtable_stats *fs = &ft->ft_stats[curcpu];
973 newfle = flow_alloc(ft);
977 newfle->f_flags |= (flags & FL_IPV6);
978 proto = flags_to_proto(flags);
980 FL_ENTRY_LOCK(ft, hash);
981 mask = flowtable_mask(ft);
982 flep = flowtable_entry(ft, hash);
983 fletail = fle = *flep;
986 bit_set(mask, FL_ENTRY_INDEX(ft, hash));
987 *flep = fle = newfle;
994 * find end of list and make sure that we were not
995 * preempted by another thread handling this flow
997 while (fle != NULL) {
998 if (fle->f_fhash == hash && !flow_stale(ft, fle)) {
1000 * there was either a hash collision
1001 * or we lost a race to insert
1003 FL_ENTRY_UNLOCK(ft, hash);
1004 flow_free(newfle, ft);
1006 if (flags & FL_OVERWRITE)
1011 * re-visit this double condition XXX
1013 if (fletail->f_next != NULL)
1014 fletail = fle->f_next;
1020 if (depth > fs->ft_max_depth)
1021 fs->ft_max_depth = depth;
1022 fletail->f_next = newfle;
1025 flowtable_set_hashkey(fle, key);
1027 fle->f_proto = proto;
1028 fle->f_rt = ro->ro_rt;
1029 fle->f_lle = ro->ro_lle;
1030 fle->f_fhash = hash;
1031 fle->f_fibnum = fibnum;
1032 fle->f_uptime = time_uptime;
1033 FL_ENTRY_UNLOCK(ft, hash);
1038 kern_flowtable_insert(struct flowtable *ft,
1039 struct sockaddr_storage *ssa, struct sockaddr_storage *dsa,
1040 struct route *ro, uint32_t fibnum, int flags)
1042 uint32_t key[9], hash;
1044 flags = (ft->ft_flags | flags | FL_OVERWRITE);
1048 if (ssa->ss_family == AF_INET)
1049 hash = ipv4_flow_lookup_hash_internal((struct sockaddr_in *)ssa,
1050 (struct sockaddr_in *)dsa, key, flags);
1053 if (ssa->ss_family == AF_INET6)
1054 hash = ipv6_flow_lookup_hash_internal((struct sockaddr_in6 *)ssa,
1055 (struct sockaddr_in6 *)dsa, key, flags);
1057 if (ro->ro_rt == NULL || ro->ro_lle == NULL)
1060 FLDPRINTF(ft, FL_DEBUG,
1061 "kern_flowtable_insert: key=%x:%x:%x hash=%x fibnum=%d flags=%x\n",
1062 key[0], key[1], key[2], hash, fibnum, flags);
1063 return (flowtable_insert(ft, hash, key, fibnum, ro, flags));
1067 flowtable_key_equal(struct flentry *fle, uint32_t *key)
1072 if (fle->f_flags & FL_IPV6) {
1074 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
1077 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
1080 for (i = 0; i < nwords; i++)
1081 if (hashkey[i] != key[i])
1088 flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af)
1090 struct flentry *fle = NULL;
1094 fle = flowtable_lookup_mbuf4(ft, m);
1098 fle = flowtable_lookup_mbuf6(ft, m);
1100 if (fle != NULL && m != NULL && (m->m_flags & M_FLOWID) == 0) {
1101 m->m_flags |= M_FLOWID;
1102 m->m_pkthdr.flowid = fle->f_fhash;
1108 flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
1109 struct sockaddr_storage *dsa, uint32_t fibnum, int flags)
1111 uint32_t key[9], hash;
1112 struct flentry *fle;
1113 struct flowtable_stats *fs = &ft->ft_stats[curcpu];
1117 struct llentry *lle;
1118 struct route sro, *ro;
1119 struct route_in6 sro6;
1121 sro.ro_rt = sro6.ro_rt = NULL;
1122 sro.ro_lle = sro6.ro_lle = NULL;
1125 flags |= ft->ft_flags;
1126 proto = flags_to_proto(flags);
1128 if (ssa->ss_family == AF_INET) {
1129 struct sockaddr_in *ssin, *dsin;
1132 memcpy(&ro->ro_dst, dsa, sizeof(struct sockaddr_in));
1133 dsin = (struct sockaddr_in *)dsa;
1134 ssin = (struct sockaddr_in *)ssa;
1135 if ((dsin->sin_addr.s_addr == ssin->sin_addr.s_addr) ||
1136 (ntohl(dsin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1137 (ntohl(ssin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
1140 hash = ipv4_flow_lookup_hash_internal(ssin, dsin, key, flags);
1144 if (ssa->ss_family == AF_INET6) {
1145 struct sockaddr_in6 *ssin6, *dsin6;
1147 ro = (struct route *)&sro6;
1148 memcpy(&sro6.ro_dst, dsa,
1149 sizeof(struct sockaddr_in6));
1150 dsin6 = (struct sockaddr_in6 *)dsa;
1151 ssin6 = (struct sockaddr_in6 *)ssa;
1154 hash = ipv6_flow_lookup_hash_internal(ssin6, dsin6, key, flags);
1158 * Ports are zero and this isn't a transmit cache
1159 * - thus not a protocol for which we need to keep
1161 * FL_HASH_ALL => key[0] != 0 for TCP || UDP || SCTP
1163 if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_ALL)))
1167 FL_ENTRY_LOCK(ft, hash);
1168 if ((fle = FL_ENTRY(ft, hash)) == NULL) {
1169 FL_ENTRY_UNLOCK(ft, hash);
1173 rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
1174 lle = __DEVOLATILE(struct llentry *, fle->f_lle);
1176 && fle->f_fhash == hash
1177 && flowtable_key_equal(fle, key)
1178 && (proto == fle->f_proto)
1179 && (fibnum == fle->f_fibnum)
1180 && (rt->rt_flags & RTF_UP)
1181 && (rt->rt_ifp != NULL)) {
1183 fle->f_uptime = time_uptime;
1184 fle->f_flags |= flags;
1185 FL_ENTRY_UNLOCK(ft, hash);
1187 } else if (fle->f_next != NULL) {
1191 FL_ENTRY_UNLOCK(ft, hash);
1193 if (flags & FL_NOAUTO || flow_full(ft))
1198 * This bit of code ends up locking the
1199 * same route 3 times (just like ip_output + ether_output)
1201 * - in rt_check when called by arpresolve
1202 * - dropping the refcount for the rtentry
1204 * This could be consolidated to one if we wrote a variant
1205 * of arpresolve with an rt_check variant that expected to
1206 * receive the route locked
1210 if ((ro->ro_dst.sa_family != AF_INET) &&
1211 (ro->ro_dst.sa_family != AF_INET6))
1212 panic("sa_family == %d\n", ro->ro_dst.sa_family);
1215 ft->ft_rtalloc(ro, hash, fibnum);
1216 if (ro->ro_rt == NULL)
1217 error = ENETUNREACH;
1219 struct llentry *lle = NULL;
1220 struct sockaddr_storage *l3addr;
1221 struct rtentry *rt = ro->ro_rt;
1222 struct ifnet *ifp = rt->rt_ifp;
1224 if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
1230 if (ssa->ss_family == AF_INET6) {
1231 struct sockaddr_in6 *dsin6;
1233 dsin6 = (struct sockaddr_in6 *)dsa;
1234 if (in6_localaddr(&dsin6->sin6_addr)) {
1240 if (rt->rt_flags & RTF_GATEWAY)
1241 l3addr = (struct sockaddr_storage *)rt->rt_gateway;
1244 l3addr = (struct sockaddr_storage *)&ro->ro_dst;
1245 llentry_update(&lle, LLTABLE6(ifp), l3addr, ifp);
1249 if (ssa->ss_family == AF_INET) {
1250 if (rt->rt_flags & RTF_GATEWAY)
1251 l3addr = (struct sockaddr_storage *)rt->rt_gateway;
1253 l3addr = (struct sockaddr_storage *)&ro->ro_dst;
1254 llentry_update(&lle, LLTABLE(ifp), l3addr, ifp);
1265 error = flowtable_insert(ft, hash, key, fibnum, ro, flags);
1275 return ((error) ? NULL : fle);
1279 * used by the bit_alloc macro
1281 #define calloc(count, size) malloc((count)*(size), M_DEVBUF, M_WAITOK|M_ZERO)
1284 flowtable_alloc(char *name, int nentry, int flags)
1286 struct flowtable *ft, *fttail;
1289 if (V_flow_hashjitter == 0)
1290 V_flow_hashjitter = arc4random();
1292 KASSERT(nentry > 0, ("nentry must be > 0, is %d\n", nentry));
1294 ft = malloc(sizeof(struct flowtable),
1295 M_RTABLE, M_WAITOK | M_ZERO);
1298 ft->ft_flags = flags;
1299 ft->ft_size = nentry;
1301 ft->ft_rtalloc = rtalloc_mpath_fib;
1303 ft->ft_rtalloc = in_rtalloc_ign_wrapper;
1305 if (flags & FL_PCPU) {
1306 ft->ft_lock = flowtable_pcpu_lock;
1307 ft->ft_unlock = flowtable_pcpu_unlock;
1309 for (i = 0; i <= mp_maxid; i++) {
1310 ft->ft_table.pcpu[i] =
1311 malloc(nentry*sizeof(struct flentry *),
1312 M_RTABLE, M_WAITOK | M_ZERO);
1313 ft->ft_masks[i] = bit_alloc(nentry);
1316 ft->ft_lock_count = 2*(powerof2(mp_maxid + 1) ? (mp_maxid + 1):
1317 (fls(mp_maxid + 1) << 1));
1319 ft->ft_lock = flowtable_global_lock;
1320 ft->ft_unlock = flowtable_global_unlock;
1321 ft->ft_table.global =
1322 malloc(nentry*sizeof(struct flentry *),
1323 M_RTABLE, M_WAITOK | M_ZERO);
1324 ft->ft_locks = malloc(ft->ft_lock_count*sizeof(struct mtx),
1325 M_RTABLE, M_WAITOK | M_ZERO);
1326 for (i = 0; i < ft->ft_lock_count; i++)
1327 mtx_init(&ft->ft_locks[i], "flow", NULL, MTX_DEF|MTX_DUPOK);
1329 ft->ft_masks[0] = bit_alloc(nentry);
1331 ft->ft_tmpmask = bit_alloc(nentry);
1334 * In the local transmit case the table truly is
1335 * just a cache - so everything is eligible for
1336 * replacement after 5s of non-use
1338 if (flags & FL_HASH_ALL) {
1339 ft->ft_udp_idle = V_flowtable_udp_expire;
1340 ft->ft_syn_idle = V_flowtable_syn_expire;
1341 ft->ft_fin_wait_idle = V_flowtable_fin_wait_expire;
1342 ft->ft_tcp_idle = V_flowtable_fin_wait_expire;
1344 ft->ft_udp_idle = ft->ft_fin_wait_idle =
1345 ft->ft_syn_idle = ft->ft_tcp_idle = 30;
1350 * hook in to the cleaner list
1352 if (V_flow_list_head == NULL)
1353 V_flow_list_head = ft;
1355 fttail = V_flow_list_head;
1356 while (fttail->ft_next != NULL)
1357 fttail = fttail->ft_next;
1358 fttail->ft_next = ft;
1365 * The rest of the code is devoted to garbage collection of expired entries.
1366 * It is a new additon made necessary by the switch to dynamically allocating
1371 fle_free(struct flentry *fle, struct flowtable *ft)
1374 struct llentry *lle;
1376 rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
1377 lle = __DEVOLATILE(struct llentry *, fle->f_lle);
1384 flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
1386 int curbit = 0, count;
1387 struct flentry *fle, **flehead, *fleprev;
1388 struct flentry *flefreehead, *flefreetail, *fletmp;
1389 bitstr_t *mask, *tmpmask;
1390 struct flowtable_stats *fs = &ft->ft_stats[curcpu];
1392 flefreehead = flefreetail = NULL;
1393 mask = flowtable_mask(ft);
1394 tmpmask = ft->ft_tmpmask;
1395 memcpy(tmpmask, mask, ft->ft_size/8);
1397 * XXX Note to self, bit_ffs operates at the byte level
1398 * and thus adds gratuitous overhead
1400 bit_ffs(tmpmask, ft->ft_size, &curbit);
1401 while (curbit != -1) {
1402 if (curbit >= ft->ft_size || curbit < -1) {
1404 "warning: bad curbit value %d \n",
1409 FL_ENTRY_LOCK(ft, curbit);
1410 flehead = flowtable_entry(ft, curbit);
1411 fle = fleprev = *flehead;
1413 fs->ft_free_checks++;
1415 if (fle == NULL && curbit > 0) {
1417 "warning bit=%d set, but no fle found\n",
1421 while (fle != NULL) {
1423 if (__DEVOLATILE(struct rtentry *, fle->f_rt) != rt) {
1428 } else if (!flow_stale(ft, fle)) {
1434 * delete head of the list
1436 if (fleprev == *flehead) {
1438 if (fle == fleprev) {
1439 fleprev = *flehead = fle->f_next;
1441 fleprev = *flehead = fle;
1445 * don't advance fleprev
1448 fleprev->f_next = fle->f_next;
1449 fle = fleprev->f_next;
1452 if (flefreehead == NULL)
1453 flefreehead = flefreetail = fletmp;
1455 flefreetail->f_next = fletmp;
1456 flefreetail = fletmp;
1458 fletmp->f_next = NULL;
1460 if (*flehead == NULL)
1461 bit_clear(mask, curbit);
1462 FL_ENTRY_UNLOCK(ft, curbit);
1463 bit_clear(tmpmask, curbit);
1464 bit_ffs(tmpmask, ft->ft_size, &curbit);
1467 while ((fle = flefreehead) != NULL) {
1468 flefreehead = fle->f_next;
1473 if (V_flowtable_debug && count)
1474 log(LOG_DEBUG, "freed %d flow entries\n", count);
1478 flowtable_route_flush(struct flowtable *ft, struct rtentry *rt)
1482 if (ft->ft_flags & FL_PCPU) {
1483 for (i = 0; i <= mp_maxid; i++) {
1487 if (smp_started == 1) {
1488 thread_lock(curthread);
1489 sched_bind(curthread, i);
1490 thread_unlock(curthread);
1493 flowtable_free_stale(ft, rt);
1495 if (smp_started == 1) {
1496 thread_lock(curthread);
1497 sched_unbind(curthread);
1498 thread_unlock(curthread);
1502 flowtable_free_stale(ft, rt);
1507 flowtable_clean_vnet(void)
1509 struct flowtable *ft;
1512 ft = V_flow_list_head;
1513 while (ft != NULL) {
1514 if (ft->ft_flags & FL_PCPU) {
1515 for (i = 0; i <= mp_maxid; i++) {
1519 if (smp_started == 1) {
1520 thread_lock(curthread);
1521 sched_bind(curthread, i);
1522 thread_unlock(curthread);
1525 flowtable_free_stale(ft, NULL);
1527 if (smp_started == 1) {
1528 thread_lock(curthread);
1529 sched_unbind(curthread);
1530 thread_unlock(curthread);
1534 flowtable_free_stale(ft, NULL);
1541 flowtable_cleaner(void)
1543 VNET_ITERATOR_DECL(vnet_iter);
1546 log(LOG_INFO, "flowtable cleaner started\n");
1549 VNET_FOREACH(vnet_iter) {
1550 CURVNET_SET(vnet_iter);
1551 flowtable_clean_vnet();
1554 VNET_LIST_RUNLOCK();
1558 * The 10 second interval between cleaning checks
1561 mtx_lock(&flowclean_lock);
1562 cv_broadcast(&flowclean_cv);
1563 cv_timedwait(&flowclean_cv, &flowclean_lock, flowclean_freq);
1564 mtx_unlock(&flowclean_lock);
1569 flowtable_flush(void *unused __unused)
1573 mtx_lock(&flowclean_lock);
1574 start = flowclean_cycles;
1575 while (start == flowclean_cycles) {
1576 cv_broadcast(&flowclean_cv);
1577 cv_wait(&flowclean_cv, &flowclean_lock);
1579 mtx_unlock(&flowclean_lock);
1582 static struct kproc_desc flow_kp = {
1587 SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
1590 flowtable_init_vnet(const void *unused __unused)
1593 V_flowtable_nmbflows = 1024 + maxusers * 64 * mp_ncpus;
1594 V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
1595 NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
1596 V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
1597 NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
1598 uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
1599 uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
1600 V_flowtable_ready = 1;
1602 VNET_SYSINIT(flowtable_init_vnet, SI_SUB_SMP, SI_ORDER_ANY,
1603 flowtable_init_vnet, NULL);
1606 flowtable_init(const void *unused __unused)
1609 cv_init(&flowclean_cv, "flowcleanwait");
1610 mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
1611 EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
1612 EVENTHANDLER_PRI_ANY);
1613 flowclean_freq = 20*hz;
1615 SYSINIT(flowtable_init, SI_SUB_SMP, SI_ORDER_MIDDLE,
1616 flowtable_init, NULL);
1621 flowtable_uninit(const void *unused __unused)
1624 V_flowtable_ready = 0;
1625 uma_zdestroy(V_flow_ipv4_zone);
1626 uma_zdestroy(V_flow_ipv6_zone);
1629 VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
1630 flowtable_uninit, NULL);
1635 flowtable_get_hashkey(struct flentry *fle)
1639 if (fle->f_flags & FL_IPV6)
1640 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
1642 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
1648 flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
1652 if (ft->ft_flags & FL_PCPU)
1653 mask = ft->ft_masks[cpuid];
1655 mask = ft->ft_masks[0];
1660 static struct flentry **
1661 flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
1663 struct flentry **fle;
1664 int index = (hash % ft->ft_size);
1666 if (ft->ft_flags & FL_PCPU) {
1667 fle = &ft->ft_table.pcpu[cpuid][index];
1669 fle = &ft->ft_table.global[index];
1676 flow_show(struct flowtable *ft, struct flentry *fle)
1679 int rt_valid, ifp_valid;
1680 uint16_t sport, dport;
1682 char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
1683 volatile struct rtentry *rt;
1684 struct ifnet *ifp = NULL;
1686 idle_time = (int)(time_uptime - fle->f_uptime);
1688 rt_valid = rt != NULL;
1691 ifp_valid = ifp != NULL;
1692 hashkey = flowtable_get_hashkey(fle);
1693 if (fle->f_flags & FL_IPV6)
1696 inet_ntoa_r(*(struct in_addr *) &hashkey[2], daddr);
1697 if (ft->ft_flags & FL_HASH_ALL) {
1698 inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr);
1699 sport = ntohs(((uint16_t *)hashkey)[0]);
1700 dport = ntohs(((uint16_t *)hashkey)[1]);
1701 db_printf("%s:%d->%s:%d",
1702 saddr, sport, daddr,
1705 db_printf("%s ", daddr);
1708 if (fle->f_flags & FL_STALE)
1709 db_printf(" FL_STALE ");
1710 if (fle->f_flags & FL_TCP)
1711 db_printf(" FL_TCP ");
1712 if (fle->f_flags & FL_UDP)
1713 db_printf(" FL_UDP ");
1715 if (rt->rt_flags & RTF_UP)
1716 db_printf(" RTF_UP ");
1719 if (ifp->if_flags & IFF_LOOPBACK)
1720 db_printf(" IFF_LOOPBACK ");
1721 if (ifp->if_flags & IFF_UP)
1722 db_printf(" IFF_UP ");
1723 if (ifp->if_flags & IFF_POINTOPOINT)
1724 db_printf(" IFF_POINTOPOINT ");
1726 if (fle->f_flags & FL_IPV6)
1727 db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x",
1728 hashkey[0], hashkey[1], hashkey[2],
1729 hashkey[3], hashkey[4], hashkey[5],
1730 hashkey[6], hashkey[7], hashkey[8]);
1732 db_printf("\n\tkey=%08x:%08x:%08x ",
1733 hashkey[0], hashkey[1], hashkey[2]);
1734 db_printf("hash=%08x idle_time=%03d"
1735 "\n\tfibnum=%02d rt=%p",
1736 fle->f_fhash, idle_time, fle->f_fibnum, fle->f_rt);
1741 flowtable_show(struct flowtable *ft, int cpuid)
1744 struct flentry *fle, **flehead;
1745 bitstr_t *mask, *tmpmask;
1748 db_printf("cpu: %d\n", cpuid);
1749 mask = flowtable_mask_pcpu(ft, cpuid);
1750 tmpmask = ft->ft_tmpmask;
1751 memcpy(tmpmask, mask, ft->ft_size/8);
1753 * XXX Note to self, bit_ffs operates at the byte level
1754 * and thus adds gratuitous overhead
1756 bit_ffs(tmpmask, ft->ft_size, &curbit);
1757 while (curbit != -1) {
1758 if (curbit >= ft->ft_size || curbit < -1) {
1759 db_printf("warning: bad curbit value %d \n",
1764 flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
1767 while (fle != NULL) {
1772 bit_clear(tmpmask, curbit);
1773 bit_ffs(tmpmask, ft->ft_size, &curbit);
1778 flowtable_show_vnet(void)
1780 struct flowtable *ft;
1783 ft = V_flow_list_head;
1784 while (ft != NULL) {
1785 printf("name: %s\n", ft->ft_name);
1786 if (ft->ft_flags & FL_PCPU) {
1787 for (i = 0; i <= mp_maxid; i++) {
1790 flowtable_show(ft, i);
1793 flowtable_show(ft, -1);
1799 DB_SHOW_COMMAND(flowtables, db_show_flowtables)
1801 VNET_ITERATOR_DECL(vnet_iter);
1803 VNET_FOREACH(vnet_iter) {
1804 CURVNET_SET(vnet_iter);
1805 flowtable_show_vnet();