1 /**************************************************************************
3 Copyright (c) 2008-2010, BitGravity Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the BitGravity Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include "opt_route.h"
31 #include "opt_mpath.h"
34 #include "opt_inet6.h"
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
39 #include <sys/param.h>
40 #include <sys/types.h>
41 #include <sys/bitstring.h>
42 #include <sys/condvar.h>
43 #include <sys/callout.h>
44 #include <sys/kernel.h>
45 #include <sys/kthread.h>
46 #include <sys/limits.h>
47 #include <sys/malloc.h>
51 #include <sys/sched.h>
53 #include <sys/socket.h>
54 #include <sys/syslog.h>
55 #include <sys/sysctl.h>
58 #include <net/if_llatbl.h>
59 #include <net/if_var.h>
60 #include <net/route.h>
61 #include <net/flowtable.h>
64 #include <netinet/in.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in_var.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip.h>
70 #include <netinet/ip6.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 #include <netinet/sctp.h>
76 #include <libkern/jenkins.h>
80 uint16_t ip_sport; /* source port */
81 uint16_t ip_dport; /* destination port */
82 in_addr_t ip_saddr; /* source address */
83 in_addr_t ip_daddr; /* destination address */
87 struct ipv4_tuple ipf_ipt;
92 uint16_t ip_sport; /* source port */
93 uint16_t ip_dport; /* destination port */
94 struct in6_addr ip_saddr; /* source address */
95 struct in6_addr ip_daddr; /* destination address */
99 struct ipv6_tuple ipf_ipt;
104 volatile uint32_t f_fhash; /* hash flowing forward */
105 uint16_t f_flags; /* flow flags */
107 uint8_t f_proto; /* protocol */
108 uint32_t f_fibnum; /* fib index */
109 uint32_t f_uptime; /* uptime at last access */
110 struct flentry *f_next; /* pointer to collision entry */
111 volatile struct rtentry *f_rt; /* rtentry for flow */
112 volatile struct llentry *f_lle; /* llentry for flow */
116 struct flentry fl_entry;
117 union ipv4_flow fl_flow;
121 struct flentry fl_entry;
122 union ipv6_flow fl_flow;
125 #define fl_fhash fl_entry.fl_fhash
126 #define fl_flags fl_entry.fl_flags
127 #define fl_proto fl_entry.fl_proto
128 #define fl_uptime fl_entry.fl_uptime
129 #define fl_rt fl_entry.fl_rt
130 #define fl_lle fl_entry.fl_lle
132 #define SECS_PER_HOUR 3600
133 #define SECS_PER_DAY (24*SECS_PER_HOUR)
137 #define FIN_WAIT_IDLE 600
138 #define TCP_IDLE SECS_PER_DAY
141 typedef void fl_lock_t(struct flowtable *, uint32_t);
142 typedef void fl_rtalloc_t(struct route *, uint32_t, u_int);
145 struct flentry **global;
146 struct flentry **pcpu[MAXCPU];
149 struct flowtable_stats {
150 uint64_t ft_collisions;
151 uint64_t ft_allocated;
153 uint64_t ft_max_depth;
154 uint64_t ft_free_checks;
158 } __aligned(CACHE_LINE_SIZE);
161 struct flowtable_stats ft_stats[MAXCPU];
167 fl_lock_t *ft_unlock;
168 fl_rtalloc_t *ft_rtalloc;
170 * XXX need to pad out
172 struct mtx *ft_locks;
173 union flentryp ft_table;
174 bitstr_t *ft_masks[MAXCPU];
175 bitstr_t *ft_tmpmask;
176 struct flowtable *ft_next;
178 uint32_t ft_count __aligned(CACHE_LINE_SIZE);
179 uint32_t ft_udp_idle __aligned(CACHE_LINE_SIZE);
180 uint32_t ft_fin_wait_idle;
181 uint32_t ft_syn_idle;
182 uint32_t ft_tcp_idle;
184 } __aligned(CACHE_LINE_SIZE);
186 static struct proc *flowcleanerproc;
187 static VNET_DEFINE(struct flowtable *, flow_list_head);
188 static VNET_DEFINE(uint32_t, flow_hashjitter);
189 static VNET_DEFINE(uma_zone_t, flow_ipv4_zone);
190 static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
192 #define V_flow_list_head VNET(flow_list_head)
193 #define V_flow_hashjitter VNET(flow_hashjitter)
194 #define V_flow_ipv4_zone VNET(flow_ipv4_zone)
195 #define V_flow_ipv6_zone VNET(flow_ipv6_zone)
198 static struct cv flowclean_cv;
199 static struct mtx flowclean_lock;
200 static uint32_t flowclean_cycles;
201 static uint32_t flowclean_freq;
203 #ifdef FLOWTABLE_DEBUG
204 #define FLDPRINTF(ft, flags, fmt, ...) \
206 if ((ft)->ft_flags & (flags)) \
207 printf((fmt), __VA_ARGS__); \
211 #define FLDPRINTF(ft, flags, fmt, ...)
218 * - Make flowtable stats per-cpu, aggregated at sysctl call time,
219 * to avoid extra cache evictions caused by incrementing a shared
221 * - add sysctls to resize && flush flow tables
222 * - Add per flowtable sysctls for statistics and configuring timeouts
223 * - add saturation counter to rtentry to support per-packet load-balancing
224 * add flag to indicate round-robin flow, add list lookup from head
226 * - add sysctl / device node / syscall to support exporting and importing
227 * of flows with flag to indicate that a flow was imported so should
228 * not be considered for auto-cleaning
229 * - support explicit connection state (currently only ad-hoc for DSR)
230 * - idetach() cleanup for options VIMAGE builds.
232 VNET_DEFINE(int, flowtable_enable) = 1;
233 static VNET_DEFINE(int, flowtable_debug);
234 static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
235 static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
236 static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
237 static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE;
238 static VNET_DEFINE(int, flowtable_nmbflows);
239 static VNET_DEFINE(int, flowtable_ready) = 0;
241 #define V_flowtable_enable VNET(flowtable_enable)
242 #define V_flowtable_debug VNET(flowtable_debug)
243 #define V_flowtable_syn_expire VNET(flowtable_syn_expire)
244 #define V_flowtable_udp_expire VNET(flowtable_udp_expire)
245 #define V_flowtable_fin_wait_expire VNET(flowtable_fin_wait_expire)
246 #define V_flowtable_tcp_expire VNET(flowtable_tcp_expire)
247 #define V_flowtable_nmbflows VNET(flowtable_nmbflows)
248 #define V_flowtable_ready VNET(flowtable_ready)
250 SYSCTL_NODE(_net_inet, OID_AUTO, flowtable, CTLFLAG_RD, NULL, "flowtable");
251 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, debug, CTLFLAG_RW,
252 &VNET_NAME(flowtable_debug), 0, "print debug info.");
253 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, enable, CTLFLAG_RW,
254 &VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
257 * XXX This does not end up updating timeouts at runtime
258 * and only reflects the value for the last table added :-/
260 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, syn_expire, CTLFLAG_RW,
261 &VNET_NAME(flowtable_syn_expire), 0,
262 "seconds after which to remove syn allocated flow.");
263 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, udp_expire, CTLFLAG_RW,
264 &VNET_NAME(flowtable_udp_expire), 0,
265 "seconds after which to remove flow allocated to UDP.");
266 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, fin_wait_expire, CTLFLAG_RW,
267 &VNET_NAME(flowtable_fin_wait_expire), 0,
268 "seconds after which to remove a flow in FIN_WAIT.");
269 SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, tcp_expire, CTLFLAG_RW,
270 &VNET_NAME(flowtable_tcp_expire), 0,
271 "seconds after which to remove flow allocated to a TCP connection.");
275 * Maximum number of flows that can be allocated of a given type.
277 * The table is allocated at boot time (for the pure caching case
278 * there is no reason why this could not be changed at runtime)
279 * and thus (currently) needs to be set with a tunable.
282 sysctl_nmbflows(SYSCTL_HANDLER_ARGS)
284 int error, newnmbflows;
286 newnmbflows = V_flowtable_nmbflows;
287 error = sysctl_handle_int(oidp, &newnmbflows, 0, req);
288 if (error == 0 && req->newptr) {
289 if (newnmbflows > V_flowtable_nmbflows) {
290 V_flowtable_nmbflows = newnmbflows;
291 uma_zone_set_max(V_flow_ipv4_zone,
292 V_flowtable_nmbflows);
293 uma_zone_set_max(V_flow_ipv6_zone,
294 V_flowtable_nmbflows);
300 SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
301 CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_nmbflows, "IU",
302 "Maximum number of flows allowed");
306 #define FS_PRINT(sb, field) sbuf_printf((sb), "\t%s: %jd\n", #field, fs->ft_##field)
309 fs_print(struct sbuf *sb, struct flowtable_stats *fs)
312 FS_PRINT(sb, collisions);
313 FS_PRINT(sb, allocated);
314 FS_PRINT(sb, misses);
315 FS_PRINT(sb, max_depth);
316 FS_PRINT(sb, free_checks);
319 FS_PRINT(sb, lookups);
323 flowtable_show_stats(struct sbuf *sb, struct flowtable *ft)
326 struct flowtable_stats fs, *pfs;
328 if (ft->ft_flags & FL_PCPU) {
329 bzero(&fs, sizeof(fs));
331 for (i = 0; i <= mp_maxid; i++) {
334 pfs->ft_collisions += ft->ft_stats[i].ft_collisions;
335 pfs->ft_allocated += ft->ft_stats[i].ft_allocated;
336 pfs->ft_misses += ft->ft_stats[i].ft_misses;
337 pfs->ft_free_checks += ft->ft_stats[i].ft_free_checks;
338 pfs->ft_frees += ft->ft_stats[i].ft_frees;
339 pfs->ft_hits += ft->ft_stats[i].ft_hits;
340 pfs->ft_lookups += ft->ft_stats[i].ft_lookups;
341 if (ft->ft_stats[i].ft_max_depth > pfs->ft_max_depth)
342 pfs->ft_max_depth = ft->ft_stats[i].ft_max_depth;
345 pfs = &ft->ft_stats[0];
351 sysctl_flowtable_stats(SYSCTL_HANDLER_ARGS)
353 struct flowtable *ft;
357 sb = sbuf_new(NULL, NULL, 64*1024, SBUF_FIXEDLEN);
359 ft = V_flow_list_head;
361 sbuf_printf(sb, "\ntable name: %s\n", ft->ft_name);
362 flowtable_show_stats(sb, ft);
366 error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
371 SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
372 NULL, 0, sysctl_flowtable_stats, "A", "flowtable statistics");
377 in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
380 rtalloc_ign_fib(ro, 0, fibnum);
385 flowtable_global_lock(struct flowtable *table, uint32_t hash)
387 int lock_index = (hash)&(table->ft_lock_count - 1);
389 mtx_lock(&table->ft_locks[lock_index]);
393 flowtable_global_unlock(struct flowtable *table, uint32_t hash)
395 int lock_index = (hash)&(table->ft_lock_count - 1);
397 mtx_unlock(&table->ft_locks[lock_index]);
401 flowtable_pcpu_lock(struct flowtable *table, uint32_t hash)
408 flowtable_pcpu_unlock(struct flowtable *table, uint32_t hash)
414 #define FL_ENTRY_INDEX(table, hash)((hash) % (table)->ft_size)
415 #define FL_ENTRY(table, hash) *flowtable_entry((table), (hash))
416 #define FL_ENTRY_LOCK(table, hash) (table)->ft_lock((table), (hash))
417 #define FL_ENTRY_UNLOCK(table, hash) (table)->ft_unlock((table), (hash))
419 #define FL_STALE (1<<8)
420 #define FL_IPV6 (1<<9)
421 #define FL_OVERWRITE (1<<10)
424 flow_invalidate(struct flentry *fle)
427 fle->f_flags |= FL_STALE;
431 proto_to_flags(uint8_t proto)
454 flags_to_proto(int flags)
456 int proto, protoflags;
458 protoflags = flags & (FL_TCP|FL_SCTP|FL_UDP);
459 switch (protoflags) {
464 proto = IPPROTO_SCTP;
477 #ifdef FLOWTABLE_DEBUG
479 ipv4_flow_print_tuple(int flags, int proto, struct sockaddr_in *ssin,
480 struct sockaddr_in *dsin)
482 char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
484 if (flags & FL_HASH_ALL) {
485 inet_ntoa_r(ssin->sin_addr, saddr);
486 inet_ntoa_r(dsin->sin_addr, daddr);
487 printf("proto=%d %s:%d->%s:%d\n",
488 proto, saddr, ntohs(ssin->sin_port), daddr,
489 ntohs(dsin->sin_port));
491 inet_ntoa_r(*(struct in_addr *) &dsin->sin_addr, daddr);
492 printf("proto=%d %s\n", proto, daddr);
499 ipv4_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
500 struct sockaddr_in *ssin, struct sockaddr_in *dsin, uint16_t *flags)
508 uint16_t sport, dport;
510 proto = sport = dport = 0;
511 ip = mtod(m, struct ip *);
512 dsin->sin_family = AF_INET;
513 dsin->sin_len = sizeof(*dsin);
514 dsin->sin_addr = ip->ip_dst;
515 ssin->sin_family = AF_INET;
516 ssin->sin_len = sizeof(*ssin);
517 ssin->sin_addr = ip->ip_src;
520 if ((*flags & FL_HASH_ALL) == 0) {
521 FLDPRINTF(ft, FL_DEBUG_ALL, "skip port check flags=0x%x ",
526 iphlen = ip->ip_hl << 2; /* XXX options? */
530 th = (struct tcphdr *)((caddr_t)ip + iphlen);
531 sport = th->th_sport;
532 dport = th->th_dport;
533 if ((*flags & FL_HASH_ALL) &&
534 (th->th_flags & (TH_RST|TH_FIN)))
538 uh = (struct udphdr *)((caddr_t)ip + iphlen);
539 sport = uh->uh_sport;
540 dport = uh->uh_dport;
543 sh = (struct sctphdr *)((caddr_t)ip + iphlen);
544 sport = sh->src_port;
545 dport = sh->dest_port;
548 FLDPRINTF(ft, FL_DEBUG_ALL, "proto=0x%x not supported\n", proto);
550 /* no port - hence not a protocol we care about */
556 *flags |= proto_to_flags(proto);
557 ssin->sin_port = sport;
558 dsin->sin_port = dport;
563 ipv4_flow_lookup_hash_internal(
564 struct sockaddr_in *ssin, struct sockaddr_in *dsin,
565 uint32_t *key, uint16_t flags)
567 uint16_t sport, dport;
571 if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
573 proto = flags_to_proto(flags);
574 sport = dport = key[2] = key[1] = key[0] = 0;
575 if ((ssin != NULL) && (flags & FL_HASH_ALL)) {
576 key[1] = ssin->sin_addr.s_addr;
577 sport = ssin->sin_port;
580 key[2] = dsin->sin_addr.s_addr;
581 dport = dsin->sin_port;
583 if (flags & FL_HASH_ALL) {
584 ((uint16_t *)key)[0] = sport;
585 ((uint16_t *)key)[1] = dport;
587 offset = V_flow_hashjitter + proto;
589 return (jenkins_hashword(key, 3, offset));
592 static struct flentry *
593 flowtable_lookup_mbuf4(struct flowtable *ft, struct mbuf *m)
595 struct sockaddr_storage ssa, dsa;
597 struct sockaddr_in *dsin, *ssin;
599 dsin = (struct sockaddr_in *)&dsa;
600 ssin = (struct sockaddr_in *)&ssa;
601 bzero(dsin, sizeof(*dsin));
602 bzero(ssin, sizeof(*ssin));
603 flags = ft->ft_flags;
604 if (ipv4_mbuf_demarshal(ft, m, ssin, dsin, &flags) != 0)
607 return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
611 flow_to_route(struct flentry *fle, struct route *ro)
613 uint32_t *hashkey = NULL;
614 struct sockaddr_in *sin;
616 sin = (struct sockaddr_in *)&ro->ro_dst;
617 sin->sin_family = AF_INET;
618 sin->sin_len = sizeof(*sin);
619 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
620 sin->sin_addr.s_addr = hashkey[2];
621 ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
622 ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
628 * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
629 * then it sets p to point at the offset "len" in the mbuf. WARNING: the
630 * pointer might become stale after other pullups (but we never use it
633 #define PULLUP_TO(_len, p, T) \
635 int x = (_len) + sizeof(T); \
636 if ((m)->m_len < x) { \
637 goto receive_failed; \
639 p = (mtod(m, char *) + (_len)); \
642 #define TCP(p) ((struct tcphdr *)(p))
643 #define SCTP(p) ((struct sctphdr *)(p))
644 #define UDP(p) ((struct udphdr *)(p))
647 ipv6_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
648 struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, uint16_t *flags)
653 uint16_t src_port, dst_port;
657 offset = hlen = src_port = dst_port = 0;
659 ip6 = mtod(m, struct ip6_hdr *);
660 hlen = sizeof(struct ip6_hdr);
661 proto = ip6->ip6_nxt;
663 if ((*flags & FL_HASH_ALL) == 0)
666 while (ulp == NULL) {
669 case IPPROTO_OSPFIGP:
677 PULLUP_TO(hlen, ulp, struct tcphdr);
678 dst_port = TCP(ulp)->th_dport;
679 src_port = TCP(ulp)->th_sport;
680 if ((*flags & FL_HASH_ALL) &&
681 (TCP(ulp)->th_flags & (TH_RST|TH_FIN)))
685 PULLUP_TO(hlen, ulp, struct sctphdr);
686 src_port = SCTP(ulp)->src_port;
687 dst_port = SCTP(ulp)->dest_port;
690 PULLUP_TO(hlen, ulp, struct udphdr);
691 dst_port = UDP(ulp)->uh_dport;
692 src_port = UDP(ulp)->uh_sport;
694 case IPPROTO_HOPOPTS: /* RFC 2460 */
695 PULLUP_TO(hlen, ulp, struct ip6_hbh);
696 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
697 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
700 case IPPROTO_ROUTING: /* RFC 2460 */
701 PULLUP_TO(hlen, ulp, struct ip6_rthdr);
702 hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
703 proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
706 case IPPROTO_FRAGMENT: /* RFC 2460 */
707 PULLUP_TO(hlen, ulp, struct ip6_frag);
708 hlen += sizeof (struct ip6_frag);
709 proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
710 offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
714 case IPPROTO_DSTOPTS: /* RFC 2460 */
715 PULLUP_TO(hlen, ulp, struct ip6_hbh);
716 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
717 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
720 case IPPROTO_AH: /* RFC 2402 */
721 PULLUP_TO(hlen, ulp, struct ip6_ext);
722 hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
723 proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
727 PULLUP_TO(hlen, ulp, struct ip6_ext);
738 dsin6->sin6_family = AF_INET6;
739 dsin6->sin6_len = sizeof(*dsin6);
740 dsin6->sin6_port = dst_port;
741 memcpy(&dsin6->sin6_addr, &ip6->ip6_dst, sizeof(struct in6_addr));
743 ssin6->sin6_family = AF_INET6;
744 ssin6->sin6_len = sizeof(*ssin6);
745 ssin6->sin6_port = src_port;
746 memcpy(&ssin6->sin6_addr, &ip6->ip6_src, sizeof(struct in6_addr));
747 *flags |= proto_to_flags(proto);
752 #define zero_key(key) \
766 ipv6_flow_lookup_hash_internal(
767 struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6,
768 uint32_t *key, uint16_t flags)
770 uint16_t sport, dport;
774 if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
777 proto = flags_to_proto(flags);
781 memcpy(&key[1], &dsin6->sin6_addr, sizeof(struct in6_addr));
782 dport = dsin6->sin6_port;
784 if ((ssin6 != NULL) && (flags & FL_HASH_ALL)) {
785 memcpy(&key[5], &ssin6->sin6_addr, sizeof(struct in6_addr));
786 sport = ssin6->sin6_port;
788 if (flags & FL_HASH_ALL) {
789 ((uint16_t *)key)[0] = sport;
790 ((uint16_t *)key)[1] = dport;
792 offset = V_flow_hashjitter + proto;
794 return (jenkins_hashword(key, 9, offset));
797 static struct flentry *
798 flowtable_lookup_mbuf6(struct flowtable *ft, struct mbuf *m)
800 struct sockaddr_storage ssa, dsa;
801 struct sockaddr_in6 *dsin6, *ssin6;
804 dsin6 = (struct sockaddr_in6 *)&dsa;
805 ssin6 = (struct sockaddr_in6 *)&ssa;
806 bzero(dsin6, sizeof(*dsin6));
807 bzero(ssin6, sizeof(*ssin6));
808 flags = ft->ft_flags;
810 if (ipv6_mbuf_demarshal(ft, m, ssin6, dsin6, &flags) != 0)
813 return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
817 flow_to_route_in6(struct flentry *fle, struct route_in6 *ro)
819 uint32_t *hashkey = NULL;
820 struct sockaddr_in6 *sin6;
822 sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
824 sin6->sin6_family = AF_INET6;
825 sin6->sin6_len = sizeof(*sin6);
826 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
827 memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
828 ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
829 ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
835 flowtable_mask(struct flowtable *ft)
839 if (ft->ft_flags & FL_PCPU)
840 mask = ft->ft_masks[curcpu];
842 mask = ft->ft_masks[0];
847 static struct flentry **
848 flowtable_entry(struct flowtable *ft, uint32_t hash)
850 struct flentry **fle;
851 int index = (hash % ft->ft_size);
853 if (ft->ft_flags & FL_PCPU) {
854 KASSERT(&ft->ft_table.pcpu[curcpu][0] != NULL, ("pcpu not set"));
855 fle = &ft->ft_table.pcpu[curcpu][index];
857 KASSERT(&ft->ft_table.global[0] != NULL, ("global not set"));
858 fle = &ft->ft_table.global[index];
865 flow_stale(struct flowtable *ft, struct flentry *fle)
869 if ((fle->f_fhash == 0)
870 || ((fle->f_rt->rt_flags & RTF_HOST) &&
871 ((fle->f_rt->rt_flags & (RTF_UP))
873 || (fle->f_rt->rt_ifp == NULL))
876 idle_time = time_uptime - fle->f_uptime;
878 if ((fle->f_flags & FL_STALE) ||
879 ((fle->f_flags & (TH_SYN|TH_ACK|TH_FIN)) == 0
880 && (idle_time > ft->ft_udp_idle)) ||
881 ((fle->f_flags & TH_FIN)
882 && (idle_time > ft->ft_fin_wait_idle)) ||
883 ((fle->f_flags & (TH_SYN|TH_ACK)) == TH_SYN
884 && (idle_time > ft->ft_syn_idle)) ||
885 ((fle->f_flags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)
886 && (idle_time > ft->ft_tcp_idle)) ||
887 ((fle->f_rt->rt_flags & RTF_UP) == 0 ||
888 (fle->f_rt->rt_ifp == NULL)))
895 flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
900 if (fle->f_flags & FL_IPV6) {
902 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
905 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
908 for (i = 0; i < nwords; i++)
912 static struct flentry *
913 flow_alloc(struct flowtable *ft)
915 struct flentry *newfle;
919 zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
921 newfle = uma_zalloc(zone, M_NOWAIT | M_ZERO);
923 atomic_add_int(&ft->ft_count, 1);
928 flow_free(struct flentry *fle, struct flowtable *ft)
932 zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
933 atomic_add_int(&ft->ft_count, -1);
934 uma_zfree(zone, fle);
938 flow_full(struct flowtable *ft)
944 count = ft->ft_count;
946 if (full && (count < (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 3))))
948 else if (!full && (count > (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 5))))
951 if (full && !ft->ft_full) {
952 flowclean_freq = 4*hz;
953 if ((ft->ft_flags & FL_HASH_ALL) == 0)
954 ft->ft_udp_idle = ft->ft_fin_wait_idle =
955 ft->ft_syn_idle = ft->ft_tcp_idle = 5;
956 cv_broadcast(&flowclean_cv);
957 } else if (!full && ft->ft_full) {
958 flowclean_freq = 20*hz;
959 if ((ft->ft_flags & FL_HASH_ALL) == 0)
960 ft->ft_udp_idle = ft->ft_fin_wait_idle =
961 ft->ft_syn_idle = ft->ft_tcp_idle = 30;
964 return (ft->ft_full);
968 flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
969 uint32_t fibnum, struct route *ro, uint16_t flags)
971 struct flentry *fle, *fletail, *newfle, **flep;
972 struct flowtable_stats *fs = &ft->ft_stats[curcpu];
977 newfle = flow_alloc(ft);
981 newfle->f_flags |= (flags & FL_IPV6);
982 proto = flags_to_proto(flags);
984 FL_ENTRY_LOCK(ft, hash);
985 mask = flowtable_mask(ft);
986 flep = flowtable_entry(ft, hash);
987 fletail = fle = *flep;
990 bit_set(mask, FL_ENTRY_INDEX(ft, hash));
991 *flep = fle = newfle;
998 * find end of list and make sure that we were not
999 * preempted by another thread handling this flow
1001 while (fle != NULL) {
1002 if (fle->f_fhash == hash && !flow_stale(ft, fle)) {
1004 * there was either a hash collision
1005 * or we lost a race to insert
1007 FL_ENTRY_UNLOCK(ft, hash);
1008 flow_free(newfle, ft);
1010 if (flags & FL_OVERWRITE)
1015 * re-visit this double condition XXX
1017 if (fletail->f_next != NULL)
1018 fletail = fle->f_next;
1024 if (depth > fs->ft_max_depth)
1025 fs->ft_max_depth = depth;
1026 fletail->f_next = newfle;
1029 flowtable_set_hashkey(fle, key);
1031 fle->f_proto = proto;
1032 fle->f_rt = ro->ro_rt;
1033 fle->f_lle = ro->ro_lle;
1034 fle->f_fhash = hash;
1035 fle->f_fibnum = fibnum;
1036 fle->f_uptime = time_uptime;
1037 FL_ENTRY_UNLOCK(ft, hash);
1042 kern_flowtable_insert(struct flowtable *ft,
1043 struct sockaddr_storage *ssa, struct sockaddr_storage *dsa,
1044 struct route *ro, uint32_t fibnum, int flags)
1046 uint32_t key[9], hash;
1048 flags = (ft->ft_flags | flags | FL_OVERWRITE);
1052 if (ssa->ss_family == AF_INET)
1053 hash = ipv4_flow_lookup_hash_internal((struct sockaddr_in *)ssa,
1054 (struct sockaddr_in *)dsa, key, flags);
1057 if (ssa->ss_family == AF_INET6)
1058 hash = ipv6_flow_lookup_hash_internal((struct sockaddr_in6 *)ssa,
1059 (struct sockaddr_in6 *)dsa, key, flags);
1061 if (ro->ro_rt == NULL || ro->ro_lle == NULL)
1064 FLDPRINTF(ft, FL_DEBUG,
1065 "kern_flowtable_insert: key=%x:%x:%x hash=%x fibnum=%d flags=%x\n",
1066 key[0], key[1], key[2], hash, fibnum, flags);
1067 return (flowtable_insert(ft, hash, key, fibnum, ro, flags));
1071 flowtable_key_equal(struct flentry *fle, uint32_t *key)
1076 if (fle->f_flags & FL_IPV6) {
1078 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
1081 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
1084 for (i = 0; i < nwords; i++)
1085 if (hashkey[i] != key[i])
1092 flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af)
1094 struct flentry *fle = NULL;
1098 fle = flowtable_lookup_mbuf4(ft, m);
1102 fle = flowtable_lookup_mbuf6(ft, m);
1104 if (fle != NULL && m != NULL && (m->m_flags & M_FLOWID) == 0) {
1105 m->m_flags |= M_FLOWID;
1106 m->m_pkthdr.flowid = fle->f_fhash;
1112 flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
1113 struct sockaddr_storage *dsa, uint32_t fibnum, int flags)
1115 uint32_t key[9], hash;
1116 struct flentry *fle;
1117 struct flowtable_stats *fs = &ft->ft_stats[curcpu];
1121 struct llentry *lle;
1122 struct route sro, *ro;
1123 struct route_in6 sro6;
1125 sro.ro_rt = sro6.ro_rt = NULL;
1126 sro.ro_lle = sro6.ro_lle = NULL;
1129 flags |= ft->ft_flags;
1130 proto = flags_to_proto(flags);
1132 if (ssa->ss_family == AF_INET) {
1133 struct sockaddr_in *ssin, *dsin;
1136 memcpy(&ro->ro_dst, dsa, sizeof(struct sockaddr_in));
1138 * The harvested source and destination addresses
1139 * may contain port information if the packet is
1140 * from a transport protocol (e.g. TCP/UDP). The
1141 * port field must be cleared before performing
1144 ((struct sockaddr_in *)&ro->ro_dst)->sin_port = 0;
1145 dsin = (struct sockaddr_in *)dsa;
1146 ssin = (struct sockaddr_in *)ssa;
1147 if ((dsin->sin_addr.s_addr == ssin->sin_addr.s_addr) ||
1148 (ntohl(dsin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1149 (ntohl(ssin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
1152 hash = ipv4_flow_lookup_hash_internal(ssin, dsin, key, flags);
1156 if (ssa->ss_family == AF_INET6) {
1157 struct sockaddr_in6 *ssin6, *dsin6;
1159 ro = (struct route *)&sro6;
1160 memcpy(&sro6.ro_dst, dsa,
1161 sizeof(struct sockaddr_in6));
1162 ((struct sockaddr_in6 *)&ro->ro_dst)->sin6_port = 0;
1163 dsin6 = (struct sockaddr_in6 *)dsa;
1164 ssin6 = (struct sockaddr_in6 *)ssa;
1167 hash = ipv6_flow_lookup_hash_internal(ssin6, dsin6, key, flags);
1171 * Ports are zero and this isn't a transmit cache
1172 * - thus not a protocol for which we need to keep
1174 * FL_HASH_ALL => key[0] != 0 for TCP || UDP || SCTP
1176 if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_ALL)))
1180 FL_ENTRY_LOCK(ft, hash);
1181 if ((fle = FL_ENTRY(ft, hash)) == NULL) {
1182 FL_ENTRY_UNLOCK(ft, hash);
1186 rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
1187 lle = __DEVOLATILE(struct llentry *, fle->f_lle);
1189 && fle->f_fhash == hash
1190 && flowtable_key_equal(fle, key)
1191 && (proto == fle->f_proto)
1192 && (fibnum == fle->f_fibnum)
1193 && (rt->rt_flags & RTF_UP)
1194 && (rt->rt_ifp != NULL)) {
1196 fle->f_uptime = time_uptime;
1197 fle->f_flags |= flags;
1198 FL_ENTRY_UNLOCK(ft, hash);
1200 } else if (fle->f_next != NULL) {
1204 FL_ENTRY_UNLOCK(ft, hash);
1206 if (flags & FL_NOAUTO || flow_full(ft))
1211 * This bit of code ends up locking the
1212 * same route 3 times (just like ip_output + ether_output)
1214 * - in rt_check when called by arpresolve
1215 * - dropping the refcount for the rtentry
1217 * This could be consolidated to one if we wrote a variant
1218 * of arpresolve with an rt_check variant that expected to
1219 * receive the route locked
1223 if ((ro->ro_dst.sa_family != AF_INET) &&
1224 (ro->ro_dst.sa_family != AF_INET6))
1225 panic("sa_family == %d\n", ro->ro_dst.sa_family);
1228 ft->ft_rtalloc(ro, hash, fibnum);
1229 if (ro->ro_rt == NULL)
1230 error = ENETUNREACH;
1232 struct llentry *lle = NULL;
1233 struct sockaddr_storage *l3addr;
1234 struct rtentry *rt = ro->ro_rt;
1235 struct ifnet *ifp = rt->rt_ifp;
1237 if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
1243 if (ssa->ss_family == AF_INET6) {
1244 struct sockaddr_in6 *dsin6;
1246 dsin6 = (struct sockaddr_in6 *)dsa;
1247 if (in6_localaddr(&dsin6->sin6_addr)) {
1253 if (rt->rt_flags & RTF_GATEWAY)
1254 l3addr = (struct sockaddr_storage *)rt->rt_gateway;
1257 l3addr = (struct sockaddr_storage *)&ro->ro_dst;
1258 llentry_update(&lle, LLTABLE6(ifp), l3addr, ifp);
1262 if (ssa->ss_family == AF_INET) {
1263 if (rt->rt_flags & RTF_GATEWAY)
1264 l3addr = (struct sockaddr_storage *)rt->rt_gateway;
1266 l3addr = (struct sockaddr_storage *)&ro->ro_dst;
1267 llentry_update(&lle, LLTABLE(ifp), l3addr, ifp);
1278 error = flowtable_insert(ft, hash, key, fibnum, ro, flags);
1288 return ((error) ? NULL : fle);
1292 * used by the bit_alloc macro
1294 #define calloc(count, size) malloc((count)*(size), M_DEVBUF, M_WAITOK|M_ZERO)
1297 flowtable_alloc(char *name, int nentry, int flags)
1299 struct flowtable *ft, *fttail;
1302 if (V_flow_hashjitter == 0)
1303 V_flow_hashjitter = arc4random();
1305 KASSERT(nentry > 0, ("nentry must be > 0, is %d\n", nentry));
1307 ft = malloc(sizeof(struct flowtable),
1308 M_RTABLE, M_WAITOK | M_ZERO);
1311 ft->ft_flags = flags;
1312 ft->ft_size = nentry;
1314 ft->ft_rtalloc = rtalloc_mpath_fib;
1316 ft->ft_rtalloc = in_rtalloc_ign_wrapper;
1318 if (flags & FL_PCPU) {
1319 ft->ft_lock = flowtable_pcpu_lock;
1320 ft->ft_unlock = flowtable_pcpu_unlock;
1322 for (i = 0; i <= mp_maxid; i++) {
1323 ft->ft_table.pcpu[i] =
1324 malloc(nentry*sizeof(struct flentry *),
1325 M_RTABLE, M_WAITOK | M_ZERO);
1326 ft->ft_masks[i] = bit_alloc(nentry);
1329 ft->ft_lock_count = 2*(powerof2(mp_maxid + 1) ? (mp_maxid + 1):
1330 (fls(mp_maxid + 1) << 1));
1332 ft->ft_lock = flowtable_global_lock;
1333 ft->ft_unlock = flowtable_global_unlock;
1334 ft->ft_table.global =
1335 malloc(nentry*sizeof(struct flentry *),
1336 M_RTABLE, M_WAITOK | M_ZERO);
1337 ft->ft_locks = malloc(ft->ft_lock_count*sizeof(struct mtx),
1338 M_RTABLE, M_WAITOK | M_ZERO);
1339 for (i = 0; i < ft->ft_lock_count; i++)
1340 mtx_init(&ft->ft_locks[i], "flow", NULL, MTX_DEF|MTX_DUPOK);
1342 ft->ft_masks[0] = bit_alloc(nentry);
1344 ft->ft_tmpmask = bit_alloc(nentry);
1347 * In the local transmit case the table truly is
1348 * just a cache - so everything is eligible for
1349 * replacement after 5s of non-use
1351 if (flags & FL_HASH_ALL) {
1352 ft->ft_udp_idle = V_flowtable_udp_expire;
1353 ft->ft_syn_idle = V_flowtable_syn_expire;
1354 ft->ft_fin_wait_idle = V_flowtable_fin_wait_expire;
1355 ft->ft_tcp_idle = V_flowtable_fin_wait_expire;
1357 ft->ft_udp_idle = ft->ft_fin_wait_idle =
1358 ft->ft_syn_idle = ft->ft_tcp_idle = 30;
1363 * hook in to the cleaner list
1365 if (V_flow_list_head == NULL)
1366 V_flow_list_head = ft;
1368 fttail = V_flow_list_head;
1369 while (fttail->ft_next != NULL)
1370 fttail = fttail->ft_next;
1371 fttail->ft_next = ft;
1378 * The rest of the code is devoted to garbage collection of expired entries.
1379 * It is a new additon made necessary by the switch to dynamically allocating
1384 fle_free(struct flentry *fle, struct flowtable *ft)
1387 struct llentry *lle;
1389 rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
1390 lle = __DEVOLATILE(struct llentry *, fle->f_lle);
1397 flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
1399 int curbit = 0, count;
1400 struct flentry *fle, **flehead, *fleprev;
1401 struct flentry *flefreehead, *flefreetail, *fletmp;
1402 bitstr_t *mask, *tmpmask;
1403 struct flowtable_stats *fs = &ft->ft_stats[curcpu];
1405 flefreehead = flefreetail = NULL;
1406 mask = flowtable_mask(ft);
1407 tmpmask = ft->ft_tmpmask;
1408 memcpy(tmpmask, mask, ft->ft_size/8);
1410 * XXX Note to self, bit_ffs operates at the byte level
1411 * and thus adds gratuitous overhead
1413 bit_ffs(tmpmask, ft->ft_size, &curbit);
1414 while (curbit != -1) {
1415 if (curbit >= ft->ft_size || curbit < -1) {
1417 "warning: bad curbit value %d \n",
1422 FL_ENTRY_LOCK(ft, curbit);
1423 flehead = flowtable_entry(ft, curbit);
1424 fle = fleprev = *flehead;
1426 fs->ft_free_checks++;
1428 if (fle == NULL && curbit > 0) {
1430 "warning bit=%d set, but no fle found\n",
1434 while (fle != NULL) {
1436 if (__DEVOLATILE(struct rtentry *, fle->f_rt) != rt) {
1441 } else if (!flow_stale(ft, fle)) {
1447 * delete head of the list
1449 if (fleprev == *flehead) {
1451 if (fle == fleprev) {
1452 fleprev = *flehead = fle->f_next;
1454 fleprev = *flehead = fle;
1458 * don't advance fleprev
1461 fleprev->f_next = fle->f_next;
1462 fle = fleprev->f_next;
1465 if (flefreehead == NULL)
1466 flefreehead = flefreetail = fletmp;
1468 flefreetail->f_next = fletmp;
1469 flefreetail = fletmp;
1471 fletmp->f_next = NULL;
1473 if (*flehead == NULL)
1474 bit_clear(mask, curbit);
1475 FL_ENTRY_UNLOCK(ft, curbit);
1476 bit_clear(tmpmask, curbit);
1477 bit_ffs(tmpmask, ft->ft_size, &curbit);
1480 while ((fle = flefreehead) != NULL) {
1481 flefreehead = fle->f_next;
1486 if (V_flowtable_debug && count)
1487 log(LOG_DEBUG, "freed %d flow entries\n", count);
1491 flowtable_route_flush(struct flowtable *ft, struct rtentry *rt)
1495 if (ft->ft_flags & FL_PCPU) {
1496 for (i = 0; i <= mp_maxid; i++) {
1500 if (smp_started == 1) {
1501 thread_lock(curthread);
1502 sched_bind(curthread, i);
1503 thread_unlock(curthread);
1506 flowtable_free_stale(ft, rt);
1508 if (smp_started == 1) {
1509 thread_lock(curthread);
1510 sched_unbind(curthread);
1511 thread_unlock(curthread);
1515 flowtable_free_stale(ft, rt);
1520 flowtable_clean_vnet(void)
1522 struct flowtable *ft;
1525 ft = V_flow_list_head;
1526 while (ft != NULL) {
1527 if (ft->ft_flags & FL_PCPU) {
1528 for (i = 0; i <= mp_maxid; i++) {
1532 if (smp_started == 1) {
1533 thread_lock(curthread);
1534 sched_bind(curthread, i);
1535 thread_unlock(curthread);
1538 flowtable_free_stale(ft, NULL);
1540 if (smp_started == 1) {
1541 thread_lock(curthread);
1542 sched_unbind(curthread);
1543 thread_unlock(curthread);
1547 flowtable_free_stale(ft, NULL);
1554 flowtable_cleaner(void)
1556 VNET_ITERATOR_DECL(vnet_iter);
1559 log(LOG_INFO, "flowtable cleaner started\n");
1562 VNET_FOREACH(vnet_iter) {
1563 CURVNET_SET(vnet_iter);
1564 flowtable_clean_vnet();
1567 VNET_LIST_RUNLOCK();
1571 * The 10 second interval between cleaning checks
1574 mtx_lock(&flowclean_lock);
1575 cv_broadcast(&flowclean_cv);
1576 cv_timedwait(&flowclean_cv, &flowclean_lock, flowclean_freq);
1577 mtx_unlock(&flowclean_lock);
1582 flowtable_flush(void *unused __unused)
1586 mtx_lock(&flowclean_lock);
1587 start = flowclean_cycles;
1588 while (start == flowclean_cycles) {
1589 cv_broadcast(&flowclean_cv);
1590 cv_wait(&flowclean_cv, &flowclean_lock);
1592 mtx_unlock(&flowclean_lock);
1595 static struct kproc_desc flow_kp = {
1600 SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
1603 flowtable_init_vnet(const void *unused __unused)
1606 V_flowtable_nmbflows = 1024 + maxusers * 64 * mp_ncpus;
1607 V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
1608 NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
1609 V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
1610 NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
1611 uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
1612 uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
1613 V_flowtable_ready = 1;
1615 VNET_SYSINIT(flowtable_init_vnet, SI_SUB_SMP, SI_ORDER_ANY,
1616 flowtable_init_vnet, NULL);
1619 flowtable_init(const void *unused __unused)
1622 cv_init(&flowclean_cv, "flowcleanwait");
1623 mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
1624 EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
1625 EVENTHANDLER_PRI_ANY);
1626 flowclean_freq = 20*hz;
1628 SYSINIT(flowtable_init, SI_SUB_SMP, SI_ORDER_MIDDLE,
1629 flowtable_init, NULL);
1634 flowtable_uninit(const void *unused __unused)
1637 V_flowtable_ready = 0;
1638 uma_zdestroy(V_flow_ipv4_zone);
1639 uma_zdestroy(V_flow_ipv6_zone);
1642 VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
1643 flowtable_uninit, NULL);
1648 flowtable_get_hashkey(struct flentry *fle)
1652 if (fle->f_flags & FL_IPV6)
1653 hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
1655 hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
1661 flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
1665 if (ft->ft_flags & FL_PCPU)
1666 mask = ft->ft_masks[cpuid];
1668 mask = ft->ft_masks[0];
1673 static struct flentry **
1674 flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
1676 struct flentry **fle;
1677 int index = (hash % ft->ft_size);
1679 if (ft->ft_flags & FL_PCPU) {
1680 fle = &ft->ft_table.pcpu[cpuid][index];
1682 fle = &ft->ft_table.global[index];
1689 flow_show(struct flowtable *ft, struct flentry *fle)
1692 int rt_valid, ifp_valid;
1693 uint16_t sport, dport;
1695 char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
1696 volatile struct rtentry *rt;
1697 struct ifnet *ifp = NULL;
1699 idle_time = (int)(time_uptime - fle->f_uptime);
1701 rt_valid = rt != NULL;
1704 ifp_valid = ifp != NULL;
1705 hashkey = flowtable_get_hashkey(fle);
1706 if (fle->f_flags & FL_IPV6)
1709 inet_ntoa_r(*(struct in_addr *) &hashkey[2], daddr);
1710 if (ft->ft_flags & FL_HASH_ALL) {
1711 inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr);
1712 sport = ntohs(((uint16_t *)hashkey)[0]);
1713 dport = ntohs(((uint16_t *)hashkey)[1]);
1714 db_printf("%s:%d->%s:%d",
1715 saddr, sport, daddr,
1718 db_printf("%s ", daddr);
1721 if (fle->f_flags & FL_STALE)
1722 db_printf(" FL_STALE ");
1723 if (fle->f_flags & FL_TCP)
1724 db_printf(" FL_TCP ");
1725 if (fle->f_flags & FL_UDP)
1726 db_printf(" FL_UDP ");
1728 if (rt->rt_flags & RTF_UP)
1729 db_printf(" RTF_UP ");
1732 if (ifp->if_flags & IFF_LOOPBACK)
1733 db_printf(" IFF_LOOPBACK ");
1734 if (ifp->if_flags & IFF_UP)
1735 db_printf(" IFF_UP ");
1736 if (ifp->if_flags & IFF_POINTOPOINT)
1737 db_printf(" IFF_POINTOPOINT ");
1739 if (fle->f_flags & FL_IPV6)
1740 db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x",
1741 hashkey[0], hashkey[1], hashkey[2],
1742 hashkey[3], hashkey[4], hashkey[5],
1743 hashkey[6], hashkey[7], hashkey[8]);
1745 db_printf("\n\tkey=%08x:%08x:%08x ",
1746 hashkey[0], hashkey[1], hashkey[2]);
1747 db_printf("hash=%08x idle_time=%03d"
1748 "\n\tfibnum=%02d rt=%p",
1749 fle->f_fhash, idle_time, fle->f_fibnum, fle->f_rt);
1754 flowtable_show(struct flowtable *ft, int cpuid)
1757 struct flentry *fle, **flehead;
1758 bitstr_t *mask, *tmpmask;
1761 db_printf("cpu: %d\n", cpuid);
1762 mask = flowtable_mask_pcpu(ft, cpuid);
1763 tmpmask = ft->ft_tmpmask;
1764 memcpy(tmpmask, mask, ft->ft_size/8);
1766 * XXX Note to self, bit_ffs operates at the byte level
1767 * and thus adds gratuitous overhead
1769 bit_ffs(tmpmask, ft->ft_size, &curbit);
1770 while (curbit != -1) {
1771 if (curbit >= ft->ft_size || curbit < -1) {
1772 db_printf("warning: bad curbit value %d \n",
1777 flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
1780 while (fle != NULL) {
1785 bit_clear(tmpmask, curbit);
1786 bit_ffs(tmpmask, ft->ft_size, &curbit);
1791 flowtable_show_vnet(void)
1793 struct flowtable *ft;
1796 ft = V_flow_list_head;
1797 while (ft != NULL) {
1798 printf("name: %s\n", ft->ft_name);
1799 if (ft->ft_flags & FL_PCPU) {
1800 for (i = 0; i <= mp_maxid; i++) {
1803 flowtable_show(ft, i);
1806 flowtable_show(ft, -1);
1812 DB_SHOW_COMMAND(flowtables, db_show_flowtables)
1814 VNET_ITERATOR_DECL(vnet_iter);
1816 VNET_FOREACH(vnet_iter) {
1817 CURVNET_SET(vnet_iter);
1818 flowtable_show_vnet();