2 * Copyright (c) 2014 Yandex LLC
3 * Copyright (c) 2014 Alexander V. Chernikov
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD: projects/ipfw/sys/netpfil/ipfw/ip_fw_table.c 267384 2014-06-12 09:59:11Z melifaro $");
31 * Lookup table algorithms.
38 #error IPFIREWALL requires INET.
40 #include "opt_inet6.h"
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/malloc.h>
45 #include <sys/kernel.h>
47 #include <sys/rwlock.h>
48 #include <sys/socket.h>
49 #include <sys/queue.h>
50 #include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
51 #include <net/radix.h>
52 #include <net/route.h>
54 #include <netinet/in.h>
55 #include <netinet/ip_var.h> /* struct ipfw_rule_ref */
56 #include <netinet/ip_fw.h>
58 #include <netpfil/ipfw/ip_fw_private.h>
59 #include <netpfil/ipfw/ip_fw_table.h>
63 * IPFW table lookup algorithms.
65 * What is needed to add another table algo?
68 * * struct table_algo has to be filled with:
69 * name: "type:algoname" format, e.g. "addr:radix". Currently
70 * there are the following types: "addr", "iface", "number" and "flow".
71 * type: one of IPFW_TABLE_* types
72 * flags: one or more TA_FLAGS_*
73 * ta_buf_size: size of structure used to store add/del item state.
74 * Needs to be less than TA_BUF_SZ.
75 * callbacks: see below for description.
76 * * ipfw_add_table_algo / ipfw_del_table_algo has to be called
78 * Callbacks description:
80 * -init: request to initialize new table instance.
81 * typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state,
82 * struct table_info *ti, char *data, uint8_t tflags);
83 * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success.
85 * Allocate all structures needed for normal operations.
86 * * Caller may want to parse @data for some algo-specific
87 * options provided by userland.
88 * * Caller may want to save configuration state pointer to @ta_state
89 * * Caller needs to save desired runtime structure pointer(s)
90 * inside @ti fields. Note that it is not correct to save
91 * @ti pointer at this moment. Use -change_ti hook for that.
92 * * Caller has to fill in ti->lookup to appropriate function
97 * -destroy: request to destroy table instance.
98 * typedef void (ta_destroy)(void *ta_state, struct table_info *ti);
99 * MANDATORY, may be locked (UH+WLOCK). (M_NOWAIT).
101 * Frees all table entries and all tables structures allocated by -init.
105 * -prepare_add: request to allocate state for adding new entry.
106 * typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei,
108 * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success.
110 * Allocates state and fills it in with all necessary data (EXCEPT value)
111 * from @tei to minimize operations needed to be done under WLOCK.
112 * "value" field has to be copied to new entry in @add callback.
113 * Buffer ta_buf of size ta->ta_buf_sz may be used to store
118 * -prepare_del: request to set state for deleting existing entry.
119 * typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei,
121 * MANDATORY, locked, UH. (M_NOWAIT). Returns 0 on success.
123 * Buffer ta_buf of size ta->ta_buf_sz may be used to store
124 * allocated state. Caller should use on-stack ta_buf allocation
125 * instead of doing malloc().
129 * -add: request to insert new entry into runtime/config structures.
130 * typedef int (ta_add)(void *ta_state, struct table_info *ti,
131 * struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
132 * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success.
134 * Insert new entry using previously-allocated state in @ta_buf.
135 * * @tei may have the following flags:
136 * TEI_FLAGS_UPDATE: request to add or update entry.
137 * TEI_FLAGS_DONTADD: request to update (but not add) entry.
138 * * Caller is required to do the following:
139 * copy real entry value from @tei
140 * entry added: return 0, set 1 to @pnum
141 * entry updated: return 0, store 0 to @pnum, store old value in @tei,
142 * add TEI_FLAGS_UPDATED flag to @tei.
143 * entry exists: return EEXIST
144 * entry not found: return ENOENT
145 * other error: return non-zero error code.
149 * -del: request to delete existing entry from runtime/config structures.
150 * typedef int (ta_del)(void *ta_state, struct table_info *ti,
151 * struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
152 * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success.
154 * Delete entry using previously set up in @ta_buf.
155 * * Caller is required to do the following:
156 * entry deleted: return 0, set 1 to @pnum, store old value in @tei.
157 * entry not found: return ENOENT
158 * other error: return non-zero error code.
162 * -flush_entry: flush entry state created by -prepare_add / -del / others
163 * typedef void (ta_flush_entry)(struct ip_fw_chain *ch,
164 * struct tentry_info *tei, void *ta_buf);
165 * MANDATORY, may be locked. (M_NOWAIT).
167 * Delete state allocated by:
168 * -prepare_add (-add returned EEXIST|UPDATED)
169 * -prepare_del (if any)
171 * * Caller is required to handle empty @ta_buf correctly.
174 * -find_tentry: finds entry specified by key @tei
175 * typedef int ta_find_tentry(void *ta_state, struct table_info *ti,
176 * ipfw_obj_tentry *tent);
177 * OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 on success.
179 * Finds entry specified by given key.
180 * * Caller is requred to do the following:
181 * entry found: returns 0, export entry to @tent
182 * entry not found: returns ENOENT
185 * -need_modify: checks if @ti has enough space to hold another @count items.
186 * typedef int (ta_need_modify)(void *ta_state, struct table_info *ti,
187 * uint32_t count, uint64_t *pflags);
188 * OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 if has.
190 * Checks if given table has enough space to add @count items without
191 * resize. Caller may use @pflags to store desired modification data.
195 * -prepare_mod: allocate structures for table modification.
196 * typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags);
197 * OPTIONAL(need_modify), unlocked. (M_WAITOK). Returns 0 on success.
199 * Allocate all needed state for table modification. Caller
200 * should use `struct mod_item` to store new state in @ta_buf.
201 * Up to TA_BUF_SZ (128 bytes) can be stored in @ta_buf.
205 * -fill_mod: copy some data to new state/
206 * typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti,
207 * void *ta_buf, uint64_t *pflags);
208 * OPTIONAL(need_modify), locked (UH). (M_NOWAIT). Returns 0 on success.
210 * Copy as much data as we can to minimize changes under WLOCK.
211 * For example, array can be merged inside this callback.
215 * -modify: perform final modification.
216 * typedef void (ta_modify)(void *ta_state, struct table_info *ti,
217 * void *ta_buf, uint64_t pflags);
218 * OPTIONAL(need_modify), locked (UH+WLOCK). (M_NOWAIT).
220 * Performs all changes necessary to switch to new structures.
221 * * Caller should save old pointers to @ta_buf storage.
225 * -flush_mod: flush table modification state.
226 * typedef void (ta_flush_mod)(void *ta_buf);
227 * OPTIONAL(need_modify), unlocked. (M_WAITOK).
229 * Performs flush for the following:
230 * - prepare_mod (modification was not necessary)
231 * - modify (for the old state)
235 * -change_gi: monitor table info pointer changes
236 * typedef void (ta_change_ti)(void *ta_state, struct table_info *ti);
237 * OPTIONAL, locked (UH). (M_NOWAIT).
239 * Called on @ti pointer changed. Called immediately after -init
240 * to set initial state.
244 * -foreach: calls @f for each table entry
245 * typedef void ta_foreach(void *ta_state, struct table_info *ti,
246 * ta_foreach_f *f, void *arg);
247 * MANDATORY, locked(UH). (M_NOWAIT).
249 * Runs callback with specified argument for each table entry,
250 * Typically used for dumping table entries.
254 * -dump_tentry: dump table entry in current @tentry format.
255 * typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e,
256 * ipfw_obj_tentry *tent);
257 * MANDATORY, locked(UH). (M_NOWAIT). Returns 0 on success.
259 * Dumps entry @e to @tent.
262 * -print_config: prints custom algoritm options into buffer.
263 * typedef void (ta_print_config)(void *ta_state, struct table_info *ti,
264 * char *buf, size_t bufsize);
265 * OPTIONAL. locked(UH). (M_NOWAIT).
267 * Prints custom algorithm options in the format suitable to pass
268 * back to -init callback.
272 * -dump_tinfo: dumps algo-specific info.
273 * typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti,
274 * ipfw_ta_tinfo *tinfo);
275 * OPTIONAL. locked(UH). (M_NOWAIT).
277 * Dumps options like items size/hash size, etc.
280 MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
283 * Utility structures/functions common to more than one algo
293 static int badd(const void *key, void *item, void *base, size_t nmemb,
294 size_t size, int (*compar) (const void *, const void *));
295 static int bdel(const void *key, void *base, size_t nmemb, size_t size,
296 int (*compar) (const void *, const void *));
300 * ADDR implementation using radix
305 * The radix code expects addr and mask to be array of bytes,
306 * with the first byte being the length of the array. rn_inithead
307 * is called with the offset in bits of the lookup key within the
308 * array. If we use a sockaddr_in as the underlying type,
309 * sin_len is conveniently located at offset 0, sin_addr is at
310 * offset 4 and normally aligned.
311 * But for portability, let's avoid assumption and make the code explicit
313 #define KEY_LEN(v) *((uint8_t *)&(v))
315 * Do not require radix to compare more than actual IPv4/IPv6 address
317 #define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
318 #define KEY_LEN_INET6 (offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr))
320 #define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr))
321 #define OFF_LEN_INET6 (8 * offsetof(struct sa_in6, sin6_addr))
323 struct radix_addr_entry {
324 struct radix_node rn[2];
325 struct sockaddr_in addr;
334 struct in6_addr sin6_addr;
337 struct radix_addr_xentry {
338 struct radix_node rn[2];
345 struct radix_node_head *head4;
346 struct radix_node_head *head6;
354 struct sockaddr *addr_ptr;
355 struct sockaddr *mask_ptr;
358 struct sockaddr_in sa;
359 struct sockaddr_in ma;
369 ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen,
372 struct radix_node_head *rnh;
374 if (keylen == sizeof(in_addr_t)) {
375 struct radix_addr_entry *ent;
376 struct sockaddr_in sa;
377 KEY_LEN(sa) = KEY_LEN_INET;
378 sa.sin_addr.s_addr = *((in_addr_t *)key);
379 rnh = (struct radix_node_head *)ti->state;
380 ent = (struct radix_addr_entry *)(rnh->rnh_matchaddr(&sa, rnh));
386 struct radix_addr_xentry *xent;
388 KEY_LEN(sa6) = KEY_LEN_INET6;
389 memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr));
390 rnh = (struct radix_node_head *)ti->xstate;
391 xent = (struct radix_addr_xentry *)(rnh->rnh_matchaddr(&sa6, rnh));
405 ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
406 char *data, uint8_t tflags)
408 struct radix_cfg *cfg;
410 if (!rn_inithead(&ti->state, OFF_LEN_INET))
412 if (!rn_inithead(&ti->xstate, OFF_LEN_INET6)) {
413 rn_detachhead(&ti->state);
417 cfg = malloc(sizeof(struct radix_cfg), M_IPFW, M_WAITOK | M_ZERO);
420 ti->lookup = ta_lookup_radix;
426 flush_radix_entry(struct radix_node *rn, void *arg)
428 struct radix_node_head * const rnh = arg;
429 struct radix_addr_entry *ent;
431 ent = (struct radix_addr_entry *)
432 rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
434 free(ent, M_IPFW_TBL);
439 ta_destroy_radix(void *ta_state, struct table_info *ti)
441 struct radix_cfg *cfg;
442 struct radix_node_head *rnh;
444 cfg = (struct radix_cfg *)ta_state;
446 rnh = (struct radix_node_head *)(ti->state);
447 rnh->rnh_walktree(rnh, flush_radix_entry, rnh);
448 rn_detachhead(&ti->state);
450 rnh = (struct radix_node_head *)(ti->xstate);
451 rnh->rnh_walktree(rnh, flush_radix_entry, rnh);
452 rn_detachhead(&ti->xstate);
458 * Provide algo-specific table info
461 ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
463 struct radix_cfg *cfg;
465 cfg = (struct radix_cfg *)ta_state;
467 tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM;
468 tinfo->taclass4 = IPFW_TACLASS_RADIX;
469 tinfo->count4 = cfg->count4;
470 tinfo->itemsize4 = sizeof(struct radix_addr_entry);
471 tinfo->taclass6 = IPFW_TACLASS_RADIX;
472 tinfo->count6 = cfg->count6;
473 tinfo->itemsize6 = sizeof(struct radix_addr_xentry);
477 ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e,
478 ipfw_obj_tentry *tent)
480 struct radix_addr_entry *n;
481 struct radix_addr_xentry *xn;
483 n = (struct radix_addr_entry *)e;
485 /* Guess IPv4/IPv6 radix by sockaddr family */
486 if (n->addr.sin_family == AF_INET) {
487 tent->k.addr.s_addr = n->addr.sin_addr.s_addr;
488 tent->masklen = n->masklen;
489 tent->subtype = AF_INET;
490 tent->v.kidx = n->value;
493 xn = (struct radix_addr_xentry *)e;
494 memcpy(&tent->k, &xn->addr6.sin6_addr, sizeof(struct in6_addr));
495 tent->masklen = xn->masklen;
496 tent->subtype = AF_INET6;
497 tent->v.kidx = xn->value;
505 ta_find_radix_tentry(void *ta_state, struct table_info *ti,
506 ipfw_obj_tentry *tent)
508 struct radix_node_head *rnh;
512 if (tent->subtype == AF_INET) {
513 struct sockaddr_in sa;
514 KEY_LEN(sa) = KEY_LEN_INET;
515 sa.sin_addr.s_addr = tent->k.addr.s_addr;
516 rnh = (struct radix_node_head *)ti->state;
517 e = rnh->rnh_matchaddr(&sa, rnh);
520 KEY_LEN(sa6) = KEY_LEN_INET6;
521 memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr));
522 rnh = (struct radix_node_head *)ti->xstate;
523 e = rnh->rnh_matchaddr(&sa6, rnh);
527 ta_dump_radix_tentry(ta_state, ti, e, tent);
535 ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f,
538 struct radix_node_head *rnh;
540 rnh = (struct radix_node_head *)(ti->state);
541 rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg);
543 rnh = (struct radix_node_head *)(ti->xstate);
544 rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg);
550 ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
554 for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
556 *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
561 tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa,
562 struct sockaddr *ma, int *set_mask)
565 struct sockaddr_in *addr, *mask;
566 struct sa_in6 *addr6, *mask6;
571 if (tei->subtype == AF_INET) {
573 addr = (struct sockaddr_in *)sa;
574 mask = (struct sockaddr_in *)ma;
575 /* Set 'total' structure length */
576 KEY_LEN(*addr) = KEY_LEN_INET;
577 KEY_LEN(*mask) = KEY_LEN_INET;
578 addr->sin_family = AF_INET;
579 mask->sin_addr.s_addr =
580 htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
581 a4 = *((in_addr_t *)tei->paddr);
582 addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr;
589 } else if (tei->subtype == AF_INET6) {
591 addr6 = (struct sa_in6 *)sa;
592 mask6 = (struct sa_in6 *)ma;
593 /* Set 'total' structure length */
594 KEY_LEN(*addr6) = KEY_LEN_INET6;
595 KEY_LEN(*mask6) = KEY_LEN_INET6;
596 addr6->sin6_family = AF_INET6;
597 ipv6_writemask(&mask6->sin6_addr, mlen);
598 memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr));
599 APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr);
609 ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
612 struct ta_buf_radix *tb;
613 struct radix_addr_entry *ent;
614 struct radix_addr_xentry *xent;
615 struct sockaddr *addr, *mask;
618 tb = (struct ta_buf_radix *)ta_buf;
623 if (tei->subtype == AF_INET) {
627 ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
630 addr = (struct sockaddr *)&ent->addr;
631 mask = (struct sockaddr *)&tb->addr.a4.ma;
635 } else if (tei->subtype == AF_INET6) {
639 xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
640 xent->masklen = mlen;
642 addr = (struct sockaddr *)&xent->addr6;
643 mask = (struct sockaddr *)&tb->addr.a6.ma;
647 /* Unknown CIDR type */
651 tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
661 ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei,
662 void *ta_buf, uint32_t *pnum)
664 struct radix_cfg *cfg;
665 struct radix_node_head *rnh;
666 struct radix_node *rn;
667 struct ta_buf_radix *tb;
668 uint32_t *old_value, value;
670 cfg = (struct radix_cfg *)ta_state;
671 tb = (struct ta_buf_radix *)ta_buf;
673 /* Save current entry value from @tei */
674 if (tei->subtype == AF_INET) {
676 ((struct radix_addr_entry *)tb->ent_ptr)->value = tei->value;
679 ((struct radix_addr_xentry *)tb->ent_ptr)->value = tei->value;
682 /* Search for an entry first */
683 rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, rnh);
685 if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
687 /* Record already exists. Update value if we're asked to */
688 if (tei->subtype == AF_INET)
689 old_value = &((struct radix_addr_entry *)rn)->value;
691 old_value = &((struct radix_addr_xentry *)rn)->value;
694 *old_value = tei->value;
697 /* Indicate that update has happened instead of addition */
698 tei->flags |= TEI_FLAGS_UPDATED;
704 if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
707 rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, rnh, tb->ent_ptr);
713 if (tei->subtype == AF_INET)
724 ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
727 struct ta_buf_radix *tb;
728 struct sockaddr *addr, *mask;
731 tb = (struct ta_buf_radix *)ta_buf;
736 if (tei->subtype == AF_INET) {
740 addr = (struct sockaddr *)&tb->addr.a4.sa;
741 mask = (struct sockaddr *)&tb->addr.a4.ma;
743 } else if (tei->subtype == AF_INET6) {
747 addr = (struct sockaddr *)&tb->addr.a6.sa;
748 mask = (struct sockaddr *)&tb->addr.a6.ma;
753 tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
762 ta_del_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei,
763 void *ta_buf, uint32_t *pnum)
765 struct radix_cfg *cfg;
766 struct radix_node_head *rnh;
767 struct radix_node *rn;
768 struct ta_buf_radix *tb;
770 cfg = (struct radix_cfg *)ta_state;
771 tb = (struct ta_buf_radix *)ta_buf;
773 if (tei->subtype == AF_INET)
778 rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, rnh);
783 /* Save entry value to @tei */
784 if (tei->subtype == AF_INET)
785 tei->value = ((struct radix_addr_entry *)rn)->value;
787 tei->value = ((struct radix_addr_xentry *)rn)->value;
791 if (tei->subtype == AF_INET)
801 ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
804 struct ta_buf_radix *tb;
806 tb = (struct ta_buf_radix *)ta_buf;
808 if (tb->ent_ptr != NULL)
809 free(tb->ent_ptr, M_IPFW_TBL);
813 ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count,
818 * radix does not require additional memory allocations
819 * other than nodes itself. Adding new masks to the tree do
820 * but we don't have any API to call (and we don't known which
826 struct table_algo addr_radix = {
827 .name = "addr:radix",
828 .type = IPFW_TABLE_ADDR,
829 .flags = TA_FLAG_DEFAULT,
830 .ta_buf_size = sizeof(struct ta_buf_radix),
831 .init = ta_init_radix,
832 .destroy = ta_destroy_radix,
833 .prepare_add = ta_prepare_add_radix,
834 .prepare_del = ta_prepare_del_radix,
837 .flush_entry = ta_flush_radix_entry,
838 .foreach = ta_foreach_radix,
839 .dump_tentry = ta_dump_radix_tentry,
840 .find_tentry = ta_find_radix_tentry,
841 .dump_tinfo = ta_dump_radix_tinfo,
842 .need_modify = ta_need_modify_radix,
851 * [inv.mask4][inv.mask6][log2hsize4][log2hsize6]
854 * inv.mask4: 32 - mask
856 * 1) _slow lookup: mask
857 * 2) _aligned: (128 - mask) / 8
868 SLIST_HEAD(chashbhead, chashentry);
871 struct chashbhead *head4;
872 struct chashbhead *head6;
882 SLIST_ENTRY(chashentry) next;
886 uint32_t a4; /* Host format */
887 struct in6_addr a6; /* Network format */
894 struct chashentry ent;
898 static __inline uint32_t
899 hash_ip(uint32_t addr, int hsize)
902 return (addr % (hsize - 1));
905 static __inline uint32_t
906 hash_ip6(struct in6_addr *addr6, int hsize)
910 i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1] ^
911 addr6->s6_addr32[2] ^ addr6->s6_addr32[3];
913 return (i % (hsize - 1));
917 static __inline uint16_t
918 hash_ip64(struct in6_addr *addr6, int hsize)
922 i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1];
924 return (i % (hsize - 1));
928 static __inline uint32_t
929 hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize)
931 struct in6_addr mask6;
933 ipv6_writemask(&mask6, mask);
934 memcpy(addr6, key, sizeof(struct in6_addr));
935 APPLY_MASK(addr6, &mask6);
936 return (hash_ip6(addr6, hsize));
939 static __inline uint32_t
940 hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize)
944 paddr = (uint64_t *)addr6;
947 memcpy(addr6, key, mask);
948 return (hash_ip6(addr6, hsize));
952 ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen,
955 struct chashbhead *head;
956 struct chashentry *ent;
957 uint16_t hash, hsize;
960 if (keylen == sizeof(in_addr_t)) {
961 head = (struct chashbhead *)ti->state;
962 imask = ti->data >> 24;
963 hsize = 1 << ((ti->data & 0xFFFF) >> 8);
965 a = ntohl(*((in_addr_t *)key));
967 hash = hash_ip(a, hsize);
968 SLIST_FOREACH(ent, &head[hash], next) {
969 if (ent->a.a4 == a) {
975 /* IPv6: worst scenario: non-round mask */
976 struct in6_addr addr6;
977 head = (struct chashbhead *)ti->xstate;
978 imask = (ti->data & 0xFF0000) >> 16;
979 hsize = 1 << (ti->data & 0xFF);
980 hash = hash_ip6_slow(&addr6, key, imask, hsize);
981 SLIST_FOREACH(ent, &head[hash], next) {
982 if (memcmp(&ent->a.a6, &addr6, 16) == 0) {
993 ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen,
996 struct chashbhead *head;
997 struct chashentry *ent;
998 uint16_t hash, hsize;
1001 if (keylen == sizeof(in_addr_t)) {
1002 head = (struct chashbhead *)ti->state;
1003 imask = ti->data >> 24;
1004 hsize = 1 << ((ti->data & 0xFFFF) >> 8);
1006 a = ntohl(*((in_addr_t *)key));
1008 hash = hash_ip(a, hsize);
1009 SLIST_FOREACH(ent, &head[hash], next) {
1010 if (ent->a.a4 == a) {
1016 /* IPv6: aligned to 8bit mask */
1017 struct in6_addr addr6;
1018 uint64_t *paddr, *ptmp;
1019 head = (struct chashbhead *)ti->xstate;
1020 imask = (ti->data & 0xFF0000) >> 16;
1021 hsize = 1 << (ti->data & 0xFF);
1023 hash = hash_ip6_al(&addr6, key, imask, hsize);
1024 paddr = (uint64_t *)&addr6;
1025 SLIST_FOREACH(ent, &head[hash], next) {
1026 ptmp = (uint64_t *)&ent->a.a6;
1027 if (paddr[0] == ptmp[0] && paddr[1] == ptmp[1]) {
1038 ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen,
1041 struct chashbhead *head;
1042 struct chashentry *ent;
1043 uint16_t hash, hsize;
1046 if (keylen == sizeof(in_addr_t)) {
1047 head = (struct chashbhead *)ti->state;
1048 imask = ti->data >> 24;
1049 hsize = 1 << ((ti->data & 0xFFFF) >> 8);
1051 a = ntohl(*((in_addr_t *)key));
1053 hash = hash_ip(a, hsize);
1054 SLIST_FOREACH(ent, &head[hash], next) {
1055 if (ent->a.a4 == a) {
1062 uint64_t a6, *paddr;
1063 head = (struct chashbhead *)ti->xstate;
1064 paddr = (uint64_t *)key;
1065 hsize = 1 << (ti->data & 0xFF);
1067 hash = hash_ip64((struct in6_addr *)key, hsize);
1068 SLIST_FOREACH(ent, &head[hash], next) {
1069 paddr = (uint64_t *)&ent->a.a6;
1081 chash_parse_opts(struct chash_cfg *cfg, char *data)
1083 char *pdel, *pend, *s;
1091 if ((pdel = strchr(data, ' ')) == NULL)
1093 while (*pdel == ' ')
1095 if (strncmp(pdel, "masks=", 6) != 0)
1097 if ((s = strchr(pdel, ' ')) != NULL)
1101 /* Need /XX[,/YY] */
1104 mask4 = strtol(pdel, &pend, 10);
1110 mask6 = strtol(pdel, &pend, 10);
1113 } else if (*pend != '\0')
1116 if (mask4 < 0 || mask4 > 32 || mask6 < 0 || mask6 > 128)
1126 ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf,
1129 struct chash_cfg *cfg;
1131 cfg = (struct chash_cfg *)ta_state;
1133 if (cfg->mask4 != 32 || cfg->mask6 != 128)
1134 snprintf(buf, bufsize, "%s masks=/%d,/%d", "addr:hash",
1135 cfg->mask4, cfg->mask6);
1137 snprintf(buf, bufsize, "%s", "addr:hash");
1154 * We assume 'data' to be either NULL or the following format:
1155 * 'addr:hash [masks=/32[,/128]]'
1158 ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
1159 char *data, uint8_t tflags)
1163 struct chash_cfg *cfg;
1165 cfg = malloc(sizeof(struct chash_cfg), M_IPFW, M_WAITOK | M_ZERO);
1170 if ((error = chash_parse_opts(cfg, data)) != 0) {
1178 cfg->head4 = malloc(sizeof(struct chashbhead) * cfg->size4, M_IPFW,
1180 cfg->head6 = malloc(sizeof(struct chashbhead) * cfg->size6, M_IPFW,
1182 for (i = 0; i < cfg->size4; i++)
1183 SLIST_INIT(&cfg->head4[i]);
1184 for (i = 0; i < cfg->size6; i++)
1185 SLIST_INIT(&cfg->head6[i]);
1189 ti->state = cfg->head4;
1190 ti->xstate = cfg->head6;
1192 /* Store data depending on v6 mask length */
1193 hsize = log2(cfg->size4) << 8 | log2(cfg->size6);
1194 if (cfg->mask6 == 64) {
1195 ti->data = (32 - cfg->mask4) << 24 | (128 - cfg->mask6) << 16|
1197 ti->lookup = ta_lookup_chash_64;
1198 } else if ((cfg->mask6 % 8) == 0) {
1199 ti->data = (32 - cfg->mask4) << 24 |
1200 cfg->mask6 << 13 | hsize;
1201 ti->lookup = ta_lookup_chash_aligned;
1203 /* don't do that! */
1204 ti->data = (32 - cfg->mask4) << 24 |
1205 cfg->mask6 << 16 | hsize;
1206 ti->lookup = ta_lookup_chash_slow;
1213 ta_destroy_chash(void *ta_state, struct table_info *ti)
1215 struct chash_cfg *cfg;
1216 struct chashentry *ent, *ent_next;
1219 cfg = (struct chash_cfg *)ta_state;
1221 for (i = 0; i < cfg->size4; i++)
1222 SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next)
1223 free(ent, M_IPFW_TBL);
1225 for (i = 0; i < cfg->size6; i++)
1226 SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next)
1227 free(ent, M_IPFW_TBL);
1229 free(cfg->head4, M_IPFW);
1230 free(cfg->head6, M_IPFW);
1236 ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
1238 struct chash_cfg *cfg;
1240 cfg = (struct chash_cfg *)ta_state;
1242 tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM;
1243 tinfo->taclass4 = IPFW_TACLASS_HASH;
1244 tinfo->size4 = cfg->size4;
1245 tinfo->count4 = cfg->items4;
1246 tinfo->itemsize4 = sizeof(struct chashentry);
1247 tinfo->taclass6 = IPFW_TACLASS_HASH;
1248 tinfo->size6 = cfg->size6;
1249 tinfo->count6 = cfg->items6;
1250 tinfo->itemsize6 = sizeof(struct chashentry);
1254 ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e,
1255 ipfw_obj_tentry *tent)
1257 struct chash_cfg *cfg;
1258 struct chashentry *ent;
1260 cfg = (struct chash_cfg *)ta_state;
1261 ent = (struct chashentry *)e;
1263 if (ent->type == AF_INET) {
1264 tent->k.addr.s_addr = htonl(ent->a.a4 << (32 - cfg->mask4));
1265 tent->masklen = cfg->mask4;
1266 tent->subtype = AF_INET;
1267 tent->v.kidx = ent->value;
1270 memcpy(&tent->k, &ent->a.a6, sizeof(struct in6_addr));
1271 tent->masklen = cfg->mask6;
1272 tent->subtype = AF_INET6;
1273 tent->v.kidx = ent->value;
1281 hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size)
1285 if (af == AF_INET) {
1286 hash = hash_ip(ent->a.a4, size);
1289 hash = hash_ip64(&ent->a.a6, size);
1291 hash = hash_ip6(&ent->a.a6, size);
1298 tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent)
1300 struct in6_addr mask6;
1304 mlen = tei->masklen;
1306 if (tei->subtype == AF_INET) {
1310 ent->type = AF_INET;
1312 /* Calculate masked address */
1313 ent->a.a4 = ntohl(*((in_addr_t *)tei->paddr)) >> (32 - mlen);
1316 } else if (tei->subtype == AF_INET6) {
1320 ent->type = AF_INET6;
1322 ipv6_writemask(&mask6, mlen);
1323 memcpy(&ent->a.a6, tei->paddr, sizeof(struct in6_addr));
1324 APPLY_MASK(&ent->a.a6, &mask6);
1327 /* Unknown CIDR type */
1335 ta_find_chash_tentry(void *ta_state, struct table_info *ti,
1336 ipfw_obj_tentry *tent)
1338 struct chash_cfg *cfg;
1339 struct chashbhead *head;
1340 struct chashentry ent, *tmp;
1341 struct tentry_info tei;
1345 cfg = (struct chash_cfg *)ta_state;
1347 memset(&ent, 0, sizeof(ent));
1348 memset(&tei, 0, sizeof(tei));
1350 if (tent->subtype == AF_INET) {
1351 tei.paddr = &tent->k.addr;
1352 tei.masklen = cfg->mask4;
1353 tei.subtype = AF_INET;
1355 if ((error = tei_to_chash_ent(&tei, &ent)) != 0)
1359 hash = hash_ent(&ent, AF_INET, cfg->mask4, cfg->size4);
1360 /* Check for existence */
1361 SLIST_FOREACH(tmp, &head[hash], next) {
1362 if (tmp->a.a4 != ent.a.a4)
1365 ta_dump_chash_tentry(ta_state, ti, tmp, tent);
1369 tei.paddr = &tent->k.addr6;
1370 tei.masklen = cfg->mask6;
1371 tei.subtype = AF_INET6;
1373 if ((error = tei_to_chash_ent(&tei, &ent)) != 0)
1377 hash = hash_ent(&ent, AF_INET6, cfg->mask6, cfg->size6);
1378 /* Check for existence */
1379 SLIST_FOREACH(tmp, &head[hash], next) {
1380 if (memcmp(&tmp->a.a6, &ent.a.a6, 16) != 0)
1382 ta_dump_chash_tentry(ta_state, ti, tmp, tent);
1391 ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f,
1394 struct chash_cfg *cfg;
1395 struct chashentry *ent, *ent_next;
1398 cfg = (struct chash_cfg *)ta_state;
1400 for (i = 0; i < cfg->size4; i++)
1401 SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next)
1404 for (i = 0; i < cfg->size6; i++)
1405 SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next)
1410 ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
1413 struct ta_buf_chash *tb;
1414 struct chashentry *ent;
1417 tb = (struct ta_buf_chash *)ta_buf;
1419 ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
1421 error = tei_to_chash_ent(tei, ent);
1423 free(ent, M_IPFW_TBL);
1432 ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
1433 void *ta_buf, uint32_t *pnum)
1435 struct chash_cfg *cfg;
1436 struct chashbhead *head;
1437 struct chashentry *ent, *tmp;
1438 struct ta_buf_chash *tb;
1440 uint32_t hash, value;
1442 cfg = (struct chash_cfg *)ta_state;
1443 tb = (struct ta_buf_chash *)ta_buf;
1444 ent = (struct chashentry *)tb->ent_ptr;
1448 /* Read current value from @tei */
1449 ent->value = tei->value;
1451 /* Read cuurrent value */
1452 if (tei->subtype == AF_INET) {
1453 if (tei->masklen != cfg->mask4)
1456 hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4);
1458 /* Check for existence */
1459 SLIST_FOREACH(tmp, &head[hash], next) {
1460 if (tmp->a.a4 == ent->a.a4) {
1466 if (tei->masklen != cfg->mask6)
1469 hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6);
1470 /* Check for existence */
1471 SLIST_FOREACH(tmp, &head[hash], next) {
1472 if (memcmp(&tmp->a.a6, &ent->a.a6, 16) == 0) {
1480 if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
1482 /* Record already exists. Update value if we're asked to */
1484 tmp->value = tei->value;
1486 /* Indicate that update has happened instead of addition */
1487 tei->flags |= TEI_FLAGS_UPDATED;
1490 if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
1492 SLIST_INSERT_HEAD(&head[hash], ent, next);
1496 /* Update counters */
1497 if (tei->subtype == AF_INET)
1507 ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
1510 struct ta_buf_chash *tb;
1512 tb = (struct ta_buf_chash *)ta_buf;
1514 return (tei_to_chash_ent(tei, &tb->ent));
1518 ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
1519 void *ta_buf, uint32_t *pnum)
1521 struct chash_cfg *cfg;
1522 struct chashbhead *head;
1523 struct chashentry *tmp, *tmp_next, *ent;
1524 struct ta_buf_chash *tb;
1527 cfg = (struct chash_cfg *)ta_state;
1528 tb = (struct ta_buf_chash *)ta_buf;
1531 if (tei->subtype == AF_INET) {
1532 if (tei->masklen != cfg->mask4)
1535 hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4);
1537 SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) {
1538 if (tmp->a.a4 != ent->a.a4)
1541 SLIST_REMOVE(&head[hash], tmp, chashentry, next);
1544 tei->value = tmp->value;
1549 if (tei->masklen != cfg->mask6)
1552 hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6);
1553 SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) {
1554 if (memcmp(&tmp->a.a6, &ent->a.a6, 16) != 0)
1557 SLIST_REMOVE(&head[hash], tmp, chashentry, next);
1560 tei->value = tmp->value;
1570 ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
1573 struct ta_buf_chash *tb;
1575 tb = (struct ta_buf_chash *)ta_buf;
1577 if (tb->ent_ptr != NULL)
1578 free(tb->ent_ptr, M_IPFW_TBL);
1582 * Hash growing callbacks.
1586 ta_need_modify_chash(void *ta_state, struct table_info *ti, uint32_t count,
1589 struct chash_cfg *cfg;
1593 * Since we don't know exact number of IPv4/IPv6 records in @count,
1594 * ignore non-zero @count value at all. Check current hash sizes
1595 * and return appropriate data.
1598 cfg = (struct chash_cfg *)ta_state;
1601 if (cfg->items4 > cfg->size4 && cfg->size4 < 65536)
1602 data |= (cfg->size4 * 2) << 16;
1603 if (cfg->items6 > cfg->size6 && cfg->size6 < 65536)
1604 data |= cfg->size6 * 2;
1615 * Allocate new, larger chash.
1618 ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags)
1620 struct mod_item *mi;
1621 struct chashbhead *head;
1624 mi = (struct mod_item *)ta_buf;
1626 memset(mi, 0, sizeof(struct mod_item));
1627 mi->size = (*pflags >> 16) & 0xFFFF;
1628 mi->size6 = *pflags & 0xFFFF;
1630 head = malloc(sizeof(struct chashbhead) * mi->size,
1631 M_IPFW, M_WAITOK | M_ZERO);
1632 for (i = 0; i < mi->size; i++)
1633 SLIST_INIT(&head[i]);
1634 mi->main_ptr = head;
1637 if (mi->size6 > 0) {
1638 head = malloc(sizeof(struct chashbhead) * mi->size6,
1639 M_IPFW, M_WAITOK | M_ZERO);
1640 for (i = 0; i < mi->size6; i++)
1641 SLIST_INIT(&head[i]);
1642 mi->main_ptr6 = head;
1649 * Copy data from old runtime array to new one.
1652 ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf,
1656 /* In is not possible to do rehash if we're not holidng WLOCK. */
1661 * Switch old & new arrays.
1664 ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf,
1667 struct mod_item *mi;
1668 struct chash_cfg *cfg;
1669 struct chashbhead *old_head, *new_head;
1670 struct chashentry *ent, *ent_next;
1673 size_t old_size, new_size;
1675 mi = (struct mod_item *)ta_buf;
1676 cfg = (struct chash_cfg *)ta_state;
1678 /* Check which hash we need to grow and do we still need that */
1679 if (mi->size > 0 && cfg->size4 < mi->size) {
1680 new_head = (struct chashbhead *)mi->main_ptr;
1681 new_size = mi->size;
1682 old_size = cfg->size4;
1683 old_head = ti->state;
1687 for (i = 0; i < old_size; i++) {
1688 SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
1689 nhash = hash_ent(ent, af, mlen, new_size);
1690 SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
1694 ti->state = new_head;
1695 cfg->head4 = new_head;
1696 cfg->size4 = mi->size;
1697 mi->main_ptr = old_head;
1700 if (mi->size6 > 0 && cfg->size6 < mi->size6) {
1701 new_head = (struct chashbhead *)mi->main_ptr6;
1702 new_size = mi->size6;
1703 old_size = cfg->size6;
1704 old_head = ti->xstate;
1708 for (i = 0; i < old_size; i++) {
1709 SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
1710 nhash = hash_ent(ent, af, mlen, new_size);
1711 SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
1715 ti->xstate = new_head;
1716 cfg->head6 = new_head;
1717 cfg->size6 = mi->size6;
1718 mi->main_ptr6 = old_head;
1721 /* Update lower 32 bits with new values */
1722 ti->data &= 0xFFFFFFFF00000000;
1723 ti->data |= log2(cfg->size4) << 8 | log2(cfg->size6);
1727 * Free unneded array.
1730 ta_flush_mod_chash(void *ta_buf)
1732 struct mod_item *mi;
1734 mi = (struct mod_item *)ta_buf;
1735 if (mi->main_ptr != NULL)
1736 free(mi->main_ptr, M_IPFW);
1737 if (mi->main_ptr6 != NULL)
1738 free(mi->main_ptr6, M_IPFW);
1741 struct table_algo addr_hash = {
1742 .name = "addr:hash",
1743 .type = IPFW_TABLE_ADDR,
1744 .ta_buf_size = sizeof(struct ta_buf_chash),
1745 .init = ta_init_chash,
1746 .destroy = ta_destroy_chash,
1747 .prepare_add = ta_prepare_add_chash,
1748 .prepare_del = ta_prepare_del_chash,
1749 .add = ta_add_chash,
1750 .del = ta_del_chash,
1751 .flush_entry = ta_flush_chash_entry,
1752 .foreach = ta_foreach_chash,
1753 .dump_tentry = ta_dump_chash_tentry,
1754 .find_tentry = ta_find_chash_tentry,
1755 .print_config = ta_print_chash_config,
1756 .dump_tinfo = ta_dump_chash_tinfo,
1757 .need_modify = ta_need_modify_chash,
1758 .prepare_mod = ta_prepare_mod_chash,
1759 .fill_mod = ta_fill_mod_chash,
1760 .modify = ta_modify_chash,
1761 .flush_mod = ta_flush_mod_chash,
1771 * - sorted array of "struct ifidx" pointed by ti->state.
1772 * Array is allocated with rounding up to IFIDX_CHUNK. Only existing
1773 * interfaces are stored in array, however its allocated size is
1774 * sufficient to hold all table records if needed.
1775 * - current array size is stored in ti->data
1778 * - "struct iftable_cfg" is allocated to store table state (ta_state).
1779 * - All table records are stored inside namedobj instance.
1788 #define DEFAULT_IFIDX_SIZE 64
1793 struct named_object no;
1795 struct iftable_cfg *icfg;
1800 struct iftable_cfg {
1801 struct namedobj_instance *ii;
1802 struct ip_fw_chain *ch;
1803 struct table_info *ti;
1805 size_t size; /* Number of items allocated in array */
1806 size_t count; /* Number of all items */
1807 size_t used; /* Number of items _active_ now */
1812 struct ifentry *ife;
1816 int compare_ifidx(const void *k, const void *v);
1817 static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex);
1820 compare_ifidx(const void *k, const void *v)
1822 struct ifidx *ifidx;
1825 key = *((uint16_t *)k);
1826 ifidx = (struct ifidx *)v;
1828 if (key < ifidx->kidx)
1830 else if (key > ifidx->kidx)
1837 * Adds item @item with key @key into ascending-sorted array @base.
1838 * Assumes @base has enough additional storage.
1840 * Returns 1 on success, 0 on duplicate key.
1843 badd(const void *key, void *item, void *base, size_t nmemb,
1844 size_t size, int (*compar) (const void *, const void *))
1846 int min, max, mid, shift, res;
1850 memcpy(base, item, size);
1858 while (min <= max) {
1859 mid = (min + max) / 2;
1860 res = compar(key, (const void *)((caddr_t)base + mid * size));
1870 /* Item not found. */
1871 res = compar(key, (const void *)((caddr_t)base + mid * size));
1877 paddr = (caddr_t)base + shift * size;
1879 memmove(paddr + size, paddr, (nmemb - shift) * size);
1881 memcpy(paddr, item, size);
1887 * Deletes item with key @key from ascending-sorted array @base.
1889 * Returns 1 on success, 0 for non-existent key.
1892 bdel(const void *key, void *base, size_t nmemb, size_t size,
1893 int (*compar) (const void *, const void *))
1898 item = (caddr_t)bsearch(key, base, nmemb, size, compar);
1903 sz = (caddr_t)base + nmemb * size - item;
1906 memmove(item, item + size, sz);
1911 static struct ifidx *
1912 ifidx_find(struct table_info *ti, void *key)
1916 ifi = bsearch(key, ti->state, ti->data, sizeof(struct ifidx),
1923 ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen,
1928 ifi = ifidx_find(ti, key);
1939 ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
1940 char *data, uint8_t tflags)
1942 struct iftable_cfg *icfg;
1944 icfg = malloc(sizeof(struct iftable_cfg), M_IPFW, M_WAITOK | M_ZERO);
1946 icfg->ii = ipfw_objhash_create(DEFAULT_IFIDX_SIZE);
1947 icfg->size = DEFAULT_IFIDX_SIZE;
1948 icfg->main_ptr = malloc(sizeof(struct ifidx) * icfg->size, M_IPFW,
1953 ti->state = icfg->main_ptr;
1954 ti->lookup = ta_lookup_ifidx;
1960 * Handle tableinfo @ti pointer change (on table array resize).
1963 ta_change_ti_ifidx(void *ta_state, struct table_info *ti)
1965 struct iftable_cfg *icfg;
1967 icfg = (struct iftable_cfg *)ta_state;
1972 destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no,
1975 struct ifentry *ife;
1976 struct ip_fw_chain *ch;
1978 ch = (struct ip_fw_chain *)arg;
1979 ife = (struct ifentry *)no;
1981 ipfw_iface_del_notify(ch, &ife->ic);
1982 free(ife, M_IPFW_TBL);
1987 * Destroys table @ti
1990 ta_destroy_ifidx(void *ta_state, struct table_info *ti)
1992 struct iftable_cfg *icfg;
1993 struct ip_fw_chain *ch;
1995 icfg = (struct iftable_cfg *)ta_state;
1998 if (icfg->main_ptr != NULL)
1999 free(icfg->main_ptr, M_IPFW);
2001 ipfw_objhash_foreach(icfg->ii, destroy_ifidx_locked, ch);
2003 ipfw_objhash_destroy(icfg->ii);
2009 * Provide algo-specific table info
2012 ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
2014 struct iftable_cfg *cfg;
2016 cfg = (struct iftable_cfg *)ta_state;
2018 tinfo->taclass4 = IPFW_TACLASS_ARRAY;
2019 tinfo->size4 = cfg->size;
2020 tinfo->count4 = cfg->used;
2021 tinfo->itemsize4 = sizeof(struct ifidx);
2025 * Prepare state to add to the table:
2026 * allocate ifentry and reference needed interface.
2029 ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
2032 struct ta_buf_ifidx *tb;
2034 struct ifentry *ife;
2036 tb = (struct ta_buf_ifidx *)ta_buf;
2038 /* Check if string is terminated */
2039 ifname = (char *)tei->paddr;
2040 if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
2043 ife = malloc(sizeof(struct ifentry), M_IPFW_TBL, M_WAITOK | M_ZERO);
2044 ife->ic.cb = if_notifier;
2045 ife->ic.cbdata = ife;
2047 if (ipfw_iface_ref(ch, ifname, &ife->ic) != 0)
2050 /* Use ipfw_iface 'ifname' field as stable storage */
2051 ife->no.name = ife->ic.iface->ifname;
2059 ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei,
2060 void *ta_buf, uint32_t *pnum)
2062 struct iftable_cfg *icfg;
2063 struct ifentry *ife, *tmp;
2064 struct ta_buf_ifidx *tb;
2065 struct ipfw_iface *iif;
2070 tb = (struct ta_buf_ifidx *)ta_buf;
2071 ifname = (char *)tei->paddr;
2072 icfg = (struct iftable_cfg *)ta_state;
2076 ife->value = tei->value;
2078 tmp = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
2081 if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
2084 /* Exchange values in @tmp and @tei */
2086 tmp->value = tei->value;
2089 iif = tmp->ic.iface;
2090 if (iif->resolved != 0) {
2091 /* We have to update runtime value, too */
2092 ifi = ifidx_find(ti, &iif->ifindex);
2093 ifi->value = ife->value;
2096 /* Indicate that update has happened instead of addition */
2097 tei->flags |= TEI_FLAGS_UPDATED;
2102 if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
2105 /* Link to internal list */
2106 ipfw_objhash_add(icfg->ii, &ife->no);
2108 /* Link notifier (possible running its callback) */
2109 ipfw_iface_add_notify(icfg->ch, &ife->ic);
2119 * Prepare to delete key from table.
2120 * Do basic interface name checks.
2123 ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
2126 struct ta_buf_ifidx *tb;
2129 tb = (struct ta_buf_ifidx *)ta_buf;
2131 /* Check if string is terminated */
2132 ifname = (char *)tei->paddr;
2133 if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
2140 * Remove key from both configuration list and
2141 * runtime array. Removed interface notification.
2144 ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei,
2145 void *ta_buf, uint32_t *pnum)
2147 struct iftable_cfg *icfg;
2148 struct ifentry *ife;
2149 struct ta_buf_ifidx *tb;
2154 tb = (struct ta_buf_ifidx *)ta_buf;
2155 ifname = (char *)tei->paddr;
2156 icfg = (struct iftable_cfg *)ta_state;
2159 ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
2164 if (ife->linked != 0) {
2165 /* We have to remove item from runtime */
2166 ifindex = ife->ic.iface->ifindex;
2168 res = bdel(&ifindex, icfg->main_ptr, icfg->used,
2169 sizeof(struct ifidx), compare_ifidx);
2171 KASSERT(res == 1, ("index %d does not exist", ifindex));
2173 ti->data = icfg->used;
2177 /* Unlink from local list */
2178 ipfw_objhash_del(icfg->ii, &ife->no);
2179 /* Unlink notifier */
2180 ipfw_iface_del_notify(icfg->ch, &ife->ic);
2183 tei->value = ife->value;
2192 * Flush deleted entry.
2193 * Drops interface reference and frees entry.
2196 ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
2199 struct ta_buf_ifidx *tb;
2201 tb = (struct ta_buf_ifidx *)ta_buf;
2203 if (tb->ife != NULL) {
2205 ipfw_iface_unref(ch, &tb->ife->ic);
2206 free(tb->ife, M_IPFW_TBL);
2212 * Handle interface announce/withdrawal for particular table.
2213 * Every real runtime array modification happens here.
2216 if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex)
2218 struct ifentry *ife;
2220 struct iftable_cfg *icfg;
2221 struct table_info *ti;
2224 ife = (struct ifentry *)cbdata;
2228 KASSERT(ti != NULL, ("ti=NULL, check change_ti handler"));
2230 if (ife->linked == 0 && ifindex != 0) {
2231 /* Interface announce */
2234 ifi.value = ife->value;
2235 res = badd(&ifindex, &ifi, icfg->main_ptr, icfg->used,
2236 sizeof(struct ifidx), compare_ifidx);
2237 KASSERT(res == 1, ("index %d already exists", ifindex));
2239 ti->data = icfg->used;
2241 } else if (ife->linked != 0 && ifindex == 0) {
2242 /* Interface withdrawal */
2243 ifindex = ife->ic.iface->ifindex;
2245 res = bdel(&ifindex, icfg->main_ptr, icfg->used,
2246 sizeof(struct ifidx), compare_ifidx);
2248 KASSERT(res == 1, ("index %d does not exist", ifindex));
2250 ti->data = icfg->used;
2257 * Table growing callbacks.
2261 ta_need_modify_ifidx(void *ta_state, struct table_info *ti, uint32_t count,
2264 struct iftable_cfg *cfg;
2267 cfg = (struct iftable_cfg *)ta_state;
2270 while (size < cfg->count + count)
2273 if (size != cfg->size) {
2282 * Allocate ned, larger runtime ifidx array.
2285 ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags)
2287 struct mod_item *mi;
2289 mi = (struct mod_item *)ta_buf;
2291 memset(mi, 0, sizeof(struct mod_item));
2293 mi->main_ptr = malloc(sizeof(struct ifidx) * mi->size, M_IPFW,
2300 * Copy data from old runtime array to new one.
2303 ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
2306 struct mod_item *mi;
2307 struct iftable_cfg *icfg;
2309 mi = (struct mod_item *)ta_buf;
2310 icfg = (struct iftable_cfg *)ta_state;
2312 /* Check if we still need to grow array */
2313 if (icfg->size >= mi->size) {
2318 memcpy(mi->main_ptr, icfg->main_ptr, icfg->used * sizeof(struct ifidx));
2324 * Switch old & new arrays.
2327 ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
2330 struct mod_item *mi;
2331 struct iftable_cfg *icfg;
2334 mi = (struct mod_item *)ta_buf;
2335 icfg = (struct iftable_cfg *)ta_state;
2337 old_ptr = icfg->main_ptr;
2338 icfg->main_ptr = mi->main_ptr;
2339 icfg->size = mi->size;
2340 ti->state = icfg->main_ptr;
2342 mi->main_ptr = old_ptr;
2346 * Free unneded array.
2349 ta_flush_mod_ifidx(void *ta_buf)
2351 struct mod_item *mi;
2353 mi = (struct mod_item *)ta_buf;
2354 if (mi->main_ptr != NULL)
2355 free(mi->main_ptr, M_IPFW);
2359 ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e,
2360 ipfw_obj_tentry *tent)
2362 struct ifentry *ife;
2364 ife = (struct ifentry *)e;
2366 tent->masklen = 8 * IF_NAMESIZE;
2367 memcpy(&tent->k, ife->no.name, IF_NAMESIZE);
2368 tent->v.kidx = ife->value;
2374 ta_find_ifidx_tentry(void *ta_state, struct table_info *ti,
2375 ipfw_obj_tentry *tent)
2377 struct iftable_cfg *icfg;
2378 struct ifentry *ife;
2381 icfg = (struct iftable_cfg *)ta_state;
2382 ifname = tent->k.iface;
2384 if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
2387 ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
2390 ta_dump_ifidx_tentry(ta_state, ti, ife, tent);
2403 foreach_ifidx(struct namedobj_instance *ii, struct named_object *no,
2406 struct ifentry *ife;
2407 struct wa_ifidx *wa;
2409 ife = (struct ifentry *)no;
2410 wa = (struct wa_ifidx *)arg;
2412 wa->f(ife, wa->arg);
2416 ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f,
2419 struct iftable_cfg *icfg;
2422 icfg = (struct iftable_cfg *)ta_state;
2427 ipfw_objhash_foreach(icfg->ii, foreach_ifidx, &wa);
2430 struct table_algo iface_idx = {
2431 .name = "iface:array",
2432 .type = IPFW_TABLE_INTERFACE,
2433 .flags = TA_FLAG_DEFAULT,
2434 .ta_buf_size = sizeof(struct ta_buf_ifidx),
2435 .init = ta_init_ifidx,
2436 .destroy = ta_destroy_ifidx,
2437 .prepare_add = ta_prepare_add_ifidx,
2438 .prepare_del = ta_prepare_del_ifidx,
2439 .add = ta_add_ifidx,
2440 .del = ta_del_ifidx,
2441 .flush_entry = ta_flush_ifidx_entry,
2442 .foreach = ta_foreach_ifidx,
2443 .dump_tentry = ta_dump_ifidx_tentry,
2444 .find_tentry = ta_find_ifidx_tentry,
2445 .dump_tinfo = ta_dump_ifidx_tinfo,
2446 .need_modify = ta_need_modify_ifidx,
2447 .prepare_mod = ta_prepare_mod_ifidx,
2448 .fill_mod = ta_fill_mod_ifidx,
2449 .modify = ta_modify_ifidx,
2450 .flush_mod = ta_flush_mod_ifidx,
2451 .change_ti = ta_change_ti_ifidx,
2455 * Number array cmds.
2460 * - sorted array of "struct numarray" pointed by ti->state.
2461 * Array is allocated with rounding up to NUMARRAY_CHUNK.
2462 * - current array size is stored in ti->data
2471 struct numarray_cfg {
2473 size_t size; /* Number of items allocated in array */
2474 size_t used; /* Number of items _active_ now */
2477 struct ta_buf_numarray
2482 int compare_numarray(const void *k, const void *v);
2485 compare_numarray(const void *k, const void *v)
2487 struct numarray *na;
2490 key = *((uint32_t *)k);
2491 na = (struct numarray *)v;
2493 if (key < na->number)
2495 else if (key > na->number)
2501 static struct numarray *
2502 numarray_find(struct table_info *ti, void *key)
2504 struct numarray *ri;
2506 ri = bsearch(key, ti->state, ti->data, sizeof(struct numarray),
2513 ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen,
2516 struct numarray *ri;
2518 ri = numarray_find(ti, key);
2529 ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
2530 char *data, uint8_t tflags)
2532 struct numarray_cfg *cfg;
2534 cfg = malloc(sizeof(*cfg), M_IPFW, M_WAITOK | M_ZERO);
2537 cfg->main_ptr = malloc(sizeof(struct numarray) * cfg->size, M_IPFW,
2541 ti->state = cfg->main_ptr;
2542 ti->lookup = ta_lookup_numarray;
2548 * Destroys table @ti
2551 ta_destroy_numarray(void *ta_state, struct table_info *ti)
2553 struct numarray_cfg *cfg;
2555 cfg = (struct numarray_cfg *)ta_state;
2557 if (cfg->main_ptr != NULL)
2558 free(cfg->main_ptr, M_IPFW);
2564 * Provide algo-specific table info
2567 ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
2569 struct numarray_cfg *cfg;
2571 cfg = (struct numarray_cfg *)ta_state;
2573 tinfo->taclass4 = IPFW_TACLASS_ARRAY;
2574 tinfo->size4 = cfg->size;
2575 tinfo->count4 = cfg->used;
2576 tinfo->itemsize4 = sizeof(struct numarray);
2580 * Prepare for addition/deletion to an array.
2583 ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei,
2586 struct ta_buf_numarray *tb;
2588 tb = (struct ta_buf_numarray *)ta_buf;
2590 tb->na.number = *((uint32_t *)tei->paddr);
2596 ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei,
2597 void *ta_buf, uint32_t *pnum)
2599 struct numarray_cfg *cfg;
2600 struct ta_buf_numarray *tb;
2601 struct numarray *ri;
2605 tb = (struct ta_buf_numarray *)ta_buf;
2606 cfg = (struct numarray_cfg *)ta_state;
2608 /* Read current value from @tei */
2609 tb->na.value = tei->value;
2611 ri = numarray_find(ti, &tb->na.number);
2614 if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
2617 /* Exchange values between ri and @tei */
2619 ri->value = tei->value;
2621 /* Indicate that update has happened instead of addition */
2622 tei->flags |= TEI_FLAGS_UPDATED;
2627 if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
2630 res = badd(&tb->na.number, &tb->na, cfg->main_ptr, cfg->used,
2631 sizeof(struct numarray), compare_numarray);
2633 KASSERT(res == 1, ("number %d already exists", tb->na.number));
2635 ti->data = cfg->used;
2642 * Remove key from both configuration list and
2643 * runtime array. Removed interface notification.
2646 ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei,
2647 void *ta_buf, uint32_t *pnum)
2649 struct numarray_cfg *cfg;
2650 struct ta_buf_numarray *tb;
2651 struct numarray *ri;
2654 tb = (struct ta_buf_numarray *)ta_buf;
2655 cfg = (struct numarray_cfg *)ta_state;
2657 ri = numarray_find(ti, &tb->na.number);
2661 tei->value = ri->value;
2663 res = bdel(&tb->na.number, cfg->main_ptr, cfg->used,
2664 sizeof(struct numarray), compare_numarray);
2666 KASSERT(res == 1, ("number %u does not exist", tb->na.number));
2668 ti->data = cfg->used;
2675 ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
2679 /* We don't have any state, do nothing */
2684 * Table growing callbacks.
2688 ta_need_modify_numarray(void *ta_state, struct table_info *ti, uint32_t count,
2691 struct numarray_cfg *cfg;
2694 cfg = (struct numarray_cfg *)ta_state;
2697 while (size < cfg->used + count)
2700 if (size != cfg->size) {
2709 * Allocate new, larger runtime array.
2712 ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags)
2714 struct mod_item *mi;
2716 mi = (struct mod_item *)ta_buf;
2718 memset(mi, 0, sizeof(struct mod_item));
2720 mi->main_ptr = malloc(sizeof(struct numarray) * mi->size, M_IPFW,
2727 * Copy data from old runtime array to new one.
2730 ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf,
2733 struct mod_item *mi;
2734 struct numarray_cfg *cfg;
2736 mi = (struct mod_item *)ta_buf;
2737 cfg = (struct numarray_cfg *)ta_state;
2739 /* Check if we still need to grow array */
2740 if (cfg->size >= mi->size) {
2745 memcpy(mi->main_ptr, cfg->main_ptr, cfg->used * sizeof(struct numarray));
2751 * Switch old & new arrays.
2754 ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf,
2757 struct mod_item *mi;
2758 struct numarray_cfg *cfg;
2761 mi = (struct mod_item *)ta_buf;
2762 cfg = (struct numarray_cfg *)ta_state;
2764 old_ptr = cfg->main_ptr;
2765 cfg->main_ptr = mi->main_ptr;
2766 cfg->size = mi->size;
2767 ti->state = cfg->main_ptr;
2769 mi->main_ptr = old_ptr;
2773 * Free unneded array.
2776 ta_flush_mod_numarray(void *ta_buf)
2778 struct mod_item *mi;
2780 mi = (struct mod_item *)ta_buf;
2781 if (mi->main_ptr != NULL)
2782 free(mi->main_ptr, M_IPFW);
2786 ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e,
2787 ipfw_obj_tentry *tent)
2789 struct numarray *na;
2791 na = (struct numarray *)e;
2793 tent->k.key = na->number;
2794 tent->v.kidx = na->value;
2800 ta_find_numarray_tentry(void *ta_state, struct table_info *ti,
2801 ipfw_obj_tentry *tent)
2803 struct numarray_cfg *cfg;
2804 struct numarray *ri;
2806 cfg = (struct numarray_cfg *)ta_state;
2808 ri = numarray_find(ti, &tent->k.key);
2811 ta_dump_numarray_tentry(ta_state, ti, ri, tent);
2819 ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f,
2822 struct numarray_cfg *cfg;
2823 struct numarray *array;
2826 cfg = (struct numarray_cfg *)ta_state;
2827 array = cfg->main_ptr;
2829 for (i = 0; i < cfg->used; i++)
2833 struct table_algo number_array = {
2834 .name = "number:array",
2835 .type = IPFW_TABLE_NUMBER,
2836 .ta_buf_size = sizeof(struct ta_buf_numarray),
2837 .init = ta_init_numarray,
2838 .destroy = ta_destroy_numarray,
2839 .prepare_add = ta_prepare_add_numarray,
2840 .prepare_del = ta_prepare_add_numarray,
2841 .add = ta_add_numarray,
2842 .del = ta_del_numarray,
2843 .flush_entry = ta_flush_numarray_entry,
2844 .foreach = ta_foreach_numarray,
2845 .dump_tentry = ta_dump_numarray_tentry,
2846 .find_tentry = ta_find_numarray_tentry,
2847 .dump_tinfo = ta_dump_numarray_tinfo,
2848 .need_modify = ta_need_modify_numarray,
2849 .prepare_mod = ta_prepare_mod_numarray,
2850 .fill_mod = ta_fill_mod_numarray,
2851 .modify = ta_modify_numarray,
2852 .flush_mod = ta_flush_mod_numarray,
2860 * [inv.mask4][inv.mask6][log2hsize4][log2hsize6]
2863 * inv.mask4: 32 - mask
2865 * 1) _slow lookup: mask
2866 * 2) _aligned: (128 - mask) / 8
2877 SLIST_HEAD(fhashbhead, fhashentry);
2880 SLIST_ENTRY(fhashentry) next;
2890 struct fhashentry4 {
2891 struct fhashentry e;
2896 struct fhashentry6 {
2897 struct fhashentry e;
2898 struct in6_addr dip6;
2899 struct in6_addr sip6;
2903 struct fhashbhead *head;
2906 struct fhashentry4 fe4;
2907 struct fhashentry6 fe6;
2910 struct ta_buf_fhash {
2912 struct fhashentry6 fe6;
2916 cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz)
2920 ka = (uint64_t *)(&a->next + 1);
2921 kb = (uint64_t *)(&b->next + 1);
2923 if (*ka == *kb && (memcmp(a + 1, b + 1, sz) == 0))
2929 static __inline uint32_t
2930 hash_flow4(struct fhashentry4 *f, int hsize)
2934 i = (f->dip.s_addr) ^ (f->sip.s_addr) ^ (f->e.dport) ^ (f->e.sport);
2936 return (i % (hsize - 1));
2939 static __inline uint32_t
2940 hash_flow6(struct fhashentry6 *f, int hsize)
2944 i = (f->dip6.__u6_addr.__u6_addr32[2]) ^
2945 (f->dip6.__u6_addr.__u6_addr32[3]) ^
2946 (f->sip6.__u6_addr.__u6_addr32[2]) ^
2947 (f->sip6.__u6_addr.__u6_addr32[3]) ^
2948 (f->e.dport) ^ (f->e.sport);
2950 return (i % (hsize - 1));
2954 hash_flow_ent(struct fhashentry *ent, uint32_t size)
2958 if (ent->af == AF_INET) {
2959 hash = hash_flow4((struct fhashentry4 *)ent, size);
2961 hash = hash_flow6((struct fhashentry6 *)ent, size);
2968 ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen,
2971 struct fhashbhead *head;
2972 struct fhashentry *ent;
2973 struct fhashentry4 *m4;
2974 struct ipfw_flow_id *id;
2975 uint16_t hash, hsize;
2977 id = (struct ipfw_flow_id *)key;
2978 head = (struct fhashbhead *)ti->state;
2980 m4 = (struct fhashentry4 *)ti->xstate;
2982 if (id->addr_type == 4) {
2983 struct fhashentry4 f;
2985 /* Copy hash mask */
2988 f.dip.s_addr &= id->dst_ip;
2989 f.sip.s_addr &= id->src_ip;
2990 f.e.dport &= id->dst_port;
2991 f.e.sport &= id->src_port;
2992 f.e.proto &= id->proto;
2993 hash = hash_flow4(&f, hsize);
2994 SLIST_FOREACH(ent, &head[hash], next) {
2995 if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) {
3000 } else if (id->addr_type == 6) {
3001 struct fhashentry6 f;
3004 /* Copy hash mask */
3005 f = *((struct fhashentry6 *)(m4 + 1));
3007 /* Handle lack of __u6_addr.__u6_addr64 */
3008 fp = (uint64_t *)&f.dip6;
3009 idp = (uint64_t *)&id->dst_ip6;
3010 /* src IPv6 is stored after dst IPv6 */
3015 f.e.dport &= id->dst_port;
3016 f.e.sport &= id->src_port;
3017 f.e.proto &= id->proto;
3018 hash = hash_flow6(&f, hsize);
3019 SLIST_FOREACH(ent, &head[hash], next) {
3020 if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) {
3034 ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
3035 char *data, uint8_t tflags)
3038 struct fhash_cfg *cfg;
3039 struct fhashentry4 *fe4;
3040 struct fhashentry6 *fe6;
3042 cfg = malloc(sizeof(struct fhash_cfg), M_IPFW, M_WAITOK | M_ZERO);
3046 cfg->head = malloc(sizeof(struct fhashbhead) * cfg->size, M_IPFW,
3048 for (i = 0; i < cfg->size; i++)
3049 SLIST_INIT(&cfg->head[i]);
3051 /* Fill in fe masks based on @tflags */
3054 if (tflags & IPFW_TFFLAG_SRCIP) {
3055 memset(&fe4->sip, 0xFF, sizeof(fe4->sip));
3056 memset(&fe6->sip6, 0xFF, sizeof(fe6->sip6));
3058 if (tflags & IPFW_TFFLAG_DSTIP) {
3059 memset(&fe4->dip, 0xFF, sizeof(fe4->dip));
3060 memset(&fe6->dip6, 0xFF, sizeof(fe6->dip6));
3062 if (tflags & IPFW_TFFLAG_SRCPORT) {
3063 memset(&fe4->e.sport, 0xFF, sizeof(fe4->e.sport));
3064 memset(&fe6->e.sport, 0xFF, sizeof(fe6->e.sport));
3066 if (tflags & IPFW_TFFLAG_DSTPORT) {
3067 memset(&fe4->e.dport, 0xFF, sizeof(fe4->e.dport));
3068 memset(&fe6->e.dport, 0xFF, sizeof(fe6->e.dport));
3070 if (tflags & IPFW_TFFLAG_PROTO) {
3071 memset(&fe4->e.proto, 0xFF, sizeof(fe4->e.proto));
3072 memset(&fe6->e.proto, 0xFF, sizeof(fe6->e.proto));
3075 fe4->e.af = AF_INET;
3076 fe6->e.af = AF_INET6;
3079 ti->state = cfg->head;
3080 ti->xstate = &cfg->fe4;
3081 ti->data = cfg->size;
3082 ti->lookup = ta_lookup_fhash;
3088 ta_destroy_fhash(void *ta_state, struct table_info *ti)
3090 struct fhash_cfg *cfg;
3091 struct fhashentry *ent, *ent_next;
3094 cfg = (struct fhash_cfg *)ta_state;
3096 for (i = 0; i < cfg->size; i++)
3097 SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next)
3098 free(ent, M_IPFW_TBL);
3100 free(cfg->head, M_IPFW);
3105 * Provide algo-specific table info
3108 ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
3110 struct fhash_cfg *cfg;
3112 cfg = (struct fhash_cfg *)ta_state;
3114 tinfo->flags = IPFW_TATFLAGS_AFITEM;
3115 tinfo->taclass4 = IPFW_TACLASS_HASH;
3116 tinfo->size4 = cfg->size;
3117 tinfo->count4 = cfg->items;
3118 tinfo->itemsize4 = sizeof(struct fhashentry4);
3119 tinfo->itemsize6 = sizeof(struct fhashentry6);
3123 ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e,
3124 ipfw_obj_tentry *tent)
3126 struct fhash_cfg *cfg;
3127 struct fhashentry *ent;
3128 struct fhashentry4 *fe4;
3129 struct fhashentry6 *fe6;
3130 struct tflow_entry *tfe;
3132 cfg = (struct fhash_cfg *)ta_state;
3133 ent = (struct fhashentry *)e;
3134 tfe = &tent->k.flow;
3137 tfe->proto = ent->proto;
3138 tfe->dport = htons(ent->dport);
3139 tfe->sport = htons(ent->sport);
3140 tent->v.kidx = ent->value;
3141 tent->subtype = ent->af;
3143 if (ent->af == AF_INET) {
3144 fe4 = (struct fhashentry4 *)ent;
3145 tfe->a.a4.sip.s_addr = htonl(fe4->sip.s_addr);
3146 tfe->a.a4.dip.s_addr = htonl(fe4->dip.s_addr);
3150 fe6 = (struct fhashentry6 *)ent;
3151 tfe->a.a6.sip6 = fe6->sip6;
3152 tfe->a.a6.dip6 = fe6->dip6;
3153 tent->masklen = 128;
3161 tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent)
3163 struct fhashentry4 *fe4;
3164 struct fhashentry6 *fe6;
3165 struct tflow_entry *tfe;
3167 tfe = (struct tflow_entry *)tei->paddr;
3169 ent->af = tei->subtype;
3170 ent->proto = tfe->proto;
3171 ent->dport = ntohs(tfe->dport);
3172 ent->sport = ntohs(tfe->sport);
3174 if (tei->subtype == AF_INET) {
3176 fe4 = (struct fhashentry4 *)ent;
3177 fe4->sip.s_addr = ntohl(tfe->a.a4.sip.s_addr);
3178 fe4->dip.s_addr = ntohl(tfe->a.a4.dip.s_addr);
3181 } else if (tei->subtype == AF_INET6) {
3182 fe6 = (struct fhashentry6 *)ent;
3183 fe6->sip6 = tfe->a.a6.sip6;
3184 fe6->dip6 = tfe->a.a6.dip6;
3187 /* Unknown CIDR type */
3196 ta_find_fhash_tentry(void *ta_state, struct table_info *ti,
3197 ipfw_obj_tentry *tent)
3199 struct fhash_cfg *cfg;
3200 struct fhashbhead *head;
3201 struct fhashentry *ent, *tmp;
3202 struct fhashentry6 fe6;
3203 struct tentry_info tei;
3208 cfg = (struct fhash_cfg *)ta_state;
3212 memset(&fe6, 0, sizeof(fe6));
3213 memset(&tei, 0, sizeof(tei));
3215 tei.paddr = &tent->k.flow;
3216 tei.subtype = tent->subtype;
3218 if ((error = tei_to_fhash_ent(&tei, ent)) != 0)
3222 hash = hash_flow_ent(ent, cfg->size);
3224 if (tei.subtype == AF_INET)
3225 sz = 2 * sizeof(struct in_addr);
3227 sz = 2 * sizeof(struct in6_addr);
3229 /* Check for existence */
3230 SLIST_FOREACH(tmp, &head[hash], next) {
3231 if (cmp_flow_ent(tmp, ent, sz) != 0) {
3232 ta_dump_fhash_tentry(ta_state, ti, tmp, tent);
3241 ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f,
3244 struct fhash_cfg *cfg;
3245 struct fhashentry *ent, *ent_next;
3248 cfg = (struct fhash_cfg *)ta_state;
3250 for (i = 0; i < cfg->size; i++)
3251 SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next)
3256 ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
3259 struct ta_buf_fhash *tb;
3260 struct fhashentry *ent;
3264 tb = (struct ta_buf_fhash *)ta_buf;
3266 if (tei->subtype == AF_INET)
3267 sz = sizeof(struct fhashentry4);
3268 else if (tei->subtype == AF_INET6)
3269 sz = sizeof(struct fhashentry6);
3273 ent = malloc(sz, M_IPFW_TBL, M_WAITOK | M_ZERO);
3275 error = tei_to_fhash_ent(tei, ent);
3277 free(ent, M_IPFW_TBL);
3286 ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
3287 void *ta_buf, uint32_t *pnum)
3289 struct fhash_cfg *cfg;
3290 struct fhashbhead *head;
3291 struct fhashentry *ent, *tmp;
3292 struct ta_buf_fhash *tb;
3294 uint32_t hash, value;
3297 cfg = (struct fhash_cfg *)ta_state;
3298 tb = (struct ta_buf_fhash *)ta_buf;
3299 ent = (struct fhashentry *)tb->ent_ptr;
3302 /* Read current value from @tei */
3303 ent->value = tei->value;
3306 hash = hash_flow_ent(ent, cfg->size);
3308 if (tei->subtype == AF_INET)
3309 sz = 2 * sizeof(struct in_addr);
3311 sz = 2 * sizeof(struct in6_addr);
3313 /* Check for existence */
3314 SLIST_FOREACH(tmp, &head[hash], next) {
3315 if (cmp_flow_ent(tmp, ent, sz) != 0) {
3322 if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
3324 /* Record already exists. Update value if we're asked to */
3325 /* Exchange values between tmp and @tei */
3327 tmp->value = tei->value;
3329 /* Indicate that update has happened instead of addition */
3330 tei->flags |= TEI_FLAGS_UPDATED;
3333 if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
3336 SLIST_INSERT_HEAD(&head[hash], ent, next);
3340 /* Update counters and check if we need to grow hash */
3348 ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
3351 struct ta_buf_fhash *tb;
3353 tb = (struct ta_buf_fhash *)ta_buf;
3355 return (tei_to_fhash_ent(tei, &tb->fe6.e));
3359 ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
3360 void *ta_buf, uint32_t *pnum)
3362 struct fhash_cfg *cfg;
3363 struct fhashbhead *head;
3364 struct fhashentry *ent, *tmp;
3365 struct ta_buf_fhash *tb;
3369 cfg = (struct fhash_cfg *)ta_state;
3370 tb = (struct ta_buf_fhash *)ta_buf;
3374 hash = hash_flow_ent(ent, cfg->size);
3376 if (tei->subtype == AF_INET)
3377 sz = 2 * sizeof(struct in_addr);
3379 sz = 2 * sizeof(struct in6_addr);
3381 /* Check for existence */
3382 SLIST_FOREACH(tmp, &head[hash], next) {
3383 if (cmp_flow_ent(tmp, ent, sz) == 0)
3386 SLIST_REMOVE(&head[hash], tmp, fhashentry, next);
3387 tei->value = tmp->value;
3398 ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
3401 struct ta_buf_fhash *tb;
3403 tb = (struct ta_buf_fhash *)ta_buf;
3405 if (tb->ent_ptr != NULL)
3406 free(tb->ent_ptr, M_IPFW_TBL);
3410 * Hash growing callbacks.
3414 ta_need_modify_fhash(void *ta_state, struct table_info *ti, uint32_t count,
3417 struct fhash_cfg *cfg;
3419 cfg = (struct fhash_cfg *)ta_state;
3421 if (cfg->items > cfg->size && cfg->size < 65536) {
3422 *pflags = cfg->size * 2;
3430 * Allocate new, larger fhash.
3433 ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags)
3435 struct mod_item *mi;
3436 struct fhashbhead *head;
3439 mi = (struct mod_item *)ta_buf;
3441 memset(mi, 0, sizeof(struct mod_item));
3443 head = malloc(sizeof(struct fhashbhead) * mi->size, M_IPFW,
3445 for (i = 0; i < mi->size; i++)
3446 SLIST_INIT(&head[i]);
3448 mi->main_ptr = head;
3454 * Copy data from old runtime array to new one.
3457 ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
3461 /* In is not possible to do rehash if we're not holidng WLOCK. */
3466 * Switch old & new arrays.
3469 ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
3472 struct mod_item *mi;
3473 struct fhash_cfg *cfg;
3474 struct fhashbhead *old_head, *new_head;
3475 struct fhashentry *ent, *ent_next;
3480 mi = (struct mod_item *)ta_buf;
3481 cfg = (struct fhash_cfg *)ta_state;
3483 old_size = cfg->size;
3484 old_head = ti->state;
3486 new_head = (struct fhashbhead *)mi->main_ptr;
3487 for (i = 0; i < old_size; i++) {
3488 SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
3489 nhash = hash_flow_ent(ent, mi->size);
3490 SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
3494 ti->state = new_head;
3495 ti->data = mi->size;
3496 cfg->head = new_head;
3497 cfg->size = mi->size;
3499 mi->main_ptr = old_head;
3503 * Free unneded array.
3506 ta_flush_mod_fhash(void *ta_buf)
3508 struct mod_item *mi;
3510 mi = (struct mod_item *)ta_buf;
3511 if (mi->main_ptr != NULL)
3512 free(mi->main_ptr, M_IPFW);
3515 struct table_algo flow_hash = {
3516 .name = "flow:hash",
3517 .type = IPFW_TABLE_FLOW,
3518 .flags = TA_FLAG_DEFAULT,
3519 .ta_buf_size = sizeof(struct ta_buf_fhash),
3520 .init = ta_init_fhash,
3521 .destroy = ta_destroy_fhash,
3522 .prepare_add = ta_prepare_add_fhash,
3523 .prepare_del = ta_prepare_del_fhash,
3524 .add = ta_add_fhash,
3525 .del = ta_del_fhash,
3526 .flush_entry = ta_flush_fhash_entry,
3527 .foreach = ta_foreach_fhash,
3528 .dump_tentry = ta_dump_fhash_tentry,
3529 .find_tentry = ta_find_fhash_tentry,
3530 .dump_tinfo = ta_dump_fhash_tinfo,
3531 .need_modify = ta_need_modify_fhash,
3532 .prepare_mod = ta_prepare_mod_fhash,
3533 .fill_mod = ta_fill_mod_fhash,
3534 .modify = ta_modify_fhash,
3535 .flush_mod = ta_flush_mod_fhash,
3539 * Kernel fibs bindings.
3544 * - fully relies on route API
3545 * - fib number is stored in ti->data
3549 static struct rtentry *
3550 lookup_kfib(void *key, int keylen, int fib)
3555 struct sockaddr_in sin;
3556 bzero(&sin, sizeof(sin));
3557 sin.sin_len = sizeof(struct sockaddr_in);
3558 sin.sin_family = AF_INET;
3559 sin.sin_addr.s_addr = *(in_addr_t *)key;
3560 s = (struct sockaddr *)&sin;
3562 struct sockaddr_in6 sin6;
3563 bzero(&sin6, sizeof(sin6));
3564 sin6.sin6_len = sizeof(struct sockaddr_in6);
3565 sin6.sin6_family = AF_INET6;
3566 sin6.sin6_addr = *(struct in6_addr *)key;
3567 s = (struct sockaddr *)&sin6;
3570 return (rtalloc1_fib(s, 0, 0, fib));
3574 ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen,
3577 struct rtentry *rte;
3579 if ((rte = lookup_kfib(key, keylen, ti->data)) == NULL)
3588 /* Parse 'fib=%d' */
3590 kfib_parse_opts(int *pfib, char *data)
3592 char *pdel, *pend, *s;
3597 if ((pdel = strchr(data, ' ')) == NULL)
3599 while (*pdel == ' ')
3601 if (strncmp(pdel, "fib=", 4) != 0)
3603 if ((s = strchr(pdel, ' ')) != NULL)
3608 fibnum = strtol(pdel, &pend, 10);
3618 ta_print_kfib_config(void *ta_state, struct table_info *ti, char *buf,
3623 snprintf(buf, bufsize, "%s fib=%lu", "addr:kfib", ti->data);
3625 snprintf(buf, bufsize, "%s", "addr:kfib");
3629 ta_init_kfib(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
3630 char *data, uint8_t tflags)
3635 if ((error = kfib_parse_opts(&fibnum, data)) != 0)
3638 if (fibnum >= rt_numfibs)
3642 ti->lookup = ta_lookup_kfib;
3648 * Destroys table @ti
3651 ta_destroy_kfib(void *ta_state, struct table_info *ti)
3657 * Provide algo-specific table info
3660 ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
3663 tinfo->flags = IPFW_TATFLAGS_AFDATA;
3664 tinfo->taclass4 = IPFW_TACLASS_RADIX;
3666 tinfo->itemsize4 = sizeof(struct rtentry);
3667 tinfo->taclass6 = IPFW_TACLASS_RADIX;
3669 tinfo->itemsize6 = sizeof(struct rtentry);
3673 contigmask(uint8_t *p, int len)
3677 for (i = 0; i < len ; i++)
3678 if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */
3680 for (n= i + 1; n < len; n++)
3681 if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0)
3682 return (-1); /* mask not contiguous */
3688 ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e,
3689 ipfw_obj_tentry *tent)
3691 struct rtentry *rte;
3692 struct sockaddr_in *addr, *mask;
3693 struct sockaddr_in6 *addr6, *mask6;
3696 rte = (struct rtentry *)e;
3697 addr = (struct sockaddr_in *)rt_key(rte);
3698 mask = (struct sockaddr_in *)rt_mask(rte);
3701 /* Guess IPv4/IPv6 radix by sockaddr family */
3702 if (addr->sin_family == AF_INET) {
3703 tent->k.addr.s_addr = addr->sin_addr.s_addr;
3706 len = contigmask((uint8_t *)&mask->sin_addr, 32);
3709 tent->masklen = len;
3710 tent->subtype = AF_INET;
3711 tent->v.kidx = 0; /* Do we need to put GW here? */
3713 } else if (addr->sin_family == AF_INET6) {
3714 addr6 = (struct sockaddr_in6 *)addr;
3715 mask6 = (struct sockaddr_in6 *)mask;
3716 memcpy(&tent->k, &addr6->sin6_addr, sizeof(struct in6_addr));
3719 len = contigmask((uint8_t *)&mask6->sin6_addr, 128);
3722 tent->masklen = len;
3723 tent->subtype = AF_INET6;
3732 ta_find_kfib_tentry(void *ta_state, struct table_info *ti,
3733 ipfw_obj_tentry *tent)
3735 struct rtentry *rte;
3739 if (tent->subtype == AF_INET) {
3740 key = &tent->k.addr;
3741 keylen = sizeof(struct in_addr);
3743 key = &tent->k.addr6;
3744 keylen = sizeof(struct in6_addr);
3747 if ((rte = lookup_kfib(key, keylen, ti->data)) == NULL)
3751 ta_dump_kfib_tentry(ta_state, ti, rte, tent);
3760 ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f,
3763 struct radix_node_head *rnh;
3766 rnh = rt_tables_get_rnh(ti->data, AF_INET);
3768 RADIX_NODE_HEAD_RLOCK(rnh);
3769 error = rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg);
3770 RADIX_NODE_HEAD_RUNLOCK(rnh);
3773 rnh = rt_tables_get_rnh(ti->data, AF_INET6);
3775 RADIX_NODE_HEAD_RLOCK(rnh);
3776 error = rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg);
3777 RADIX_NODE_HEAD_RUNLOCK(rnh);
3781 struct table_algo addr_kfib = {
3782 .name = "addr:kfib",
3783 .type = IPFW_TABLE_ADDR,
3784 .flags = TA_FLAG_READONLY,
3786 .init = ta_init_kfib,
3787 .destroy = ta_destroy_kfib,
3788 .foreach = ta_foreach_kfib,
3789 .dump_tentry = ta_dump_kfib_tentry,
3790 .find_tentry = ta_find_kfib_tentry,
3791 .dump_tinfo = ta_dump_kfib_tinfo,
3792 .print_config = ta_print_kfib_config,
3796 ipfw_table_algo_init(struct ip_fw_chain *ch)
3801 * Register all algorithms presented here.
3803 sz = sizeof(struct table_algo);
3804 ipfw_add_table_algo(ch, &addr_radix, sz, &addr_radix.idx);
3805 ipfw_add_table_algo(ch, &addr_hash, sz, &addr_hash.idx);
3806 ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx);
3807 ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx);
3808 ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx);
3809 ipfw_add_table_algo(ch, &addr_kfib, sz, &addr_kfib.idx);
3810 ipfw_add_table_algo(ch, &addr_dxr, sz, &addr_dxr.idx);
3814 ipfw_table_algo_destroy(struct ip_fw_chain *ch)
3817 ipfw_del_table_algo(ch, addr_radix.idx);
3818 ipfw_del_table_algo(ch, addr_hash.idx);
3819 ipfw_del_table_algo(ch, iface_idx.idx);
3820 ipfw_del_table_algo(ch, number_array.idx);
3821 ipfw_del_table_algo(ch, flow_hash.idx);
3822 ipfw_del_table_algo(ch, addr_kfib.idx);
3823 ipfw_del_table_algo(ch, addr_dxr.idx);