2 * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
30 * Lookup table support for ipfw
32 * Lookup tables are implemented (at the moment) using the radix
33 * tree used for routing tables. Tables store key-value entries, where
34 * keys are network prefixes (addr/masklen), and values are integers.
35 * As a degenerate case we can interpret keys as 32-bit integers
38 * The table is protected by the IPFW lock even for manipulation coming
39 * from userland, because operations are typically fast.
45 #error IPFIREWALL requires INET.
47 #include "opt_inet6.h"
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/malloc.h>
52 #include <sys/kernel.h>
54 #include <sys/rwlock.h>
55 #include <sys/socket.h>
56 #include <sys/queue.h>
57 #include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
58 #include <net/radix.h>
59 #include <net/route.h>
62 #include <netinet/in.h>
63 #include <netinet/ip_var.h> /* struct ipfw_rule_ref */
64 #include <netinet/ip_fw.h>
66 #include <netpfil/ipfw/ip_fw_private.h>
69 #include <security/mac/mac_framework.h>
72 MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
75 struct radix_node rn[2];
76 struct sockaddr_in addr, mask;
81 uint8_t if_len; /* length of this struct */
82 uint8_t pad[7]; /* Align name */
83 char ifname[IF_NAMESIZE]; /* Interface name */
87 struct radix_node rn[2];
90 struct sockaddr_in6 addr6;
92 struct xaddr_iface iface;
96 struct sockaddr_in6 mask6;
98 struct xaddr_iface ifmask;
104 * The radix code expects addr and mask to be array of bytes,
105 * with the first byte being the length of the array. rn_inithead
106 * is called with the offset in bits of the lookup key within the
107 * array. If we use a sockaddr_in as the underlying type,
108 * sin_len is conveniently located at offset 0, sin_addr is at
109 * offset 4 and normally aligned.
110 * But for portability, let's avoid assumption and make the code explicit
112 #define KEY_LEN(v) *((uint8_t *)&(v))
113 #define KEY_OFS (8*offsetof(struct sockaddr_in, sin_addr))
115 * Do not require radix to compare more than actual IPv4/IPv6 address
117 #define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
118 #define KEY_LEN_INET6 (offsetof(struct sockaddr_in6, sin6_addr) + sizeof(struct in6_addr))
119 #define KEY_LEN_IFACE (offsetof(struct xaddr_iface, ifname))
121 #define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr))
122 #define OFF_LEN_INET6 (8 * offsetof(struct sockaddr_in6, sin6_addr))
123 #define OFF_LEN_IFACE (8 * offsetof(struct xaddr_iface, ifname))
128 ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
132 for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
134 *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
139 ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
140 uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value)
142 struct radix_node_head *rnh, **rnh_ptr;
143 struct table_entry *ent;
144 struct table_xentry *xent;
145 struct radix_node *rn;
149 struct sockaddr *addr_ptr, *mask_ptr;
152 if (tbl >= V_fw_tables_max)
156 case IPFW_TABLE_CIDR:
157 if (plen == sizeof(in_addr_t)) {
162 ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
164 /* Set 'total' structure length */
165 KEY_LEN(ent->addr) = KEY_LEN_INET;
166 KEY_LEN(ent->mask) = KEY_LEN_INET;
167 /* Set offset of IPv4 address in bits */
168 offset = OFF_LEN_INET;
169 ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
170 addr = *((in_addr_t *)paddr);
171 ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
173 rnh_ptr = &ch->tables[tbl];
175 addr_ptr = (struct sockaddr *)&ent->addr;
176 mask_ptr = (struct sockaddr *)&ent->mask;
179 } else if (plen == sizeof(struct in6_addr)) {
183 xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
185 /* Set 'total' structure length */
186 KEY_LEN(xent->a.addr6) = KEY_LEN_INET6;
187 KEY_LEN(xent->m.mask6) = KEY_LEN_INET6;
188 /* Set offset of IPv6 address in bits */
189 offset = OFF_LEN_INET6;
190 ipv6_writemask(&xent->m.mask6.sin6_addr, mlen);
191 memcpy(&xent->a.addr6.sin6_addr, paddr, sizeof(struct in6_addr));
192 APPLY_MASK(&xent->a.addr6.sin6_addr, &xent->m.mask6.sin6_addr);
194 rnh_ptr = &ch->xtables[tbl];
196 addr_ptr = (struct sockaddr *)&xent->a.addr6;
197 mask_ptr = (struct sockaddr *)&xent->m.mask6;
200 /* Unknown CIDR type */
205 case IPFW_TABLE_INTERFACE:
206 /* Check if string is terminated */
207 c = ((char *)paddr)[IF_NAMESIZE - 1];
208 ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
209 if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
212 /* Include last \0 into comparison */
215 xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
217 /* Set 'total' structure length */
218 KEY_LEN(xent->a.iface) = KEY_LEN_IFACE + mlen;
219 KEY_LEN(xent->m.ifmask) = KEY_LEN_IFACE + mlen;
220 /* Set offset of interface name in bits */
221 offset = OFF_LEN_IFACE;
222 memcpy(xent->a.iface.ifname, paddr, mlen);
223 /* Assume direct match */
224 /* TODO: Add interface pattern matching */
226 memset(xent->m.ifmask.ifname, 0xFF, IF_NAMESIZE);
227 mask_ptr = (struct sockaddr *)&xent->m.ifmask;
230 rnh_ptr = &ch->xtables[tbl];
232 addr_ptr = (struct sockaddr *)&xent->a.iface;
242 /* Check if tabletype is valid */
243 if ((ch->tabletype[tbl] != 0) && (ch->tabletype[tbl] != type)) {
245 free(ent_ptr, M_IPFW_TBL);
249 /* Check if radix tree exists */
250 if ((rnh = *rnh_ptr) == NULL) {
252 /* Create radix for a new table */
253 if (!rn_inithead((void **)&rnh, offset)) {
254 free(ent_ptr, M_IPFW_TBL);
259 if (*rnh_ptr != NULL) {
260 /* Tree is already attached by other thread */
261 rn_detachhead((void **)&rnh);
263 /* Check table type another time */
264 if (ch->tabletype[tbl] != type) {
266 free(ent_ptr, M_IPFW_TBL);
272 * Set table type. It can be set already
273 * (if we have IPv6-only table) but setting
274 * it another time does not hurt
276 ch->tabletype[tbl] = type;
280 rn = rnh->rnh_addaddr(addr_ptr, mask_ptr, rnh, ent_ptr);
284 free(ent_ptr, M_IPFW_TBL);
291 ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
292 uint8_t plen, uint8_t mlen, uint8_t type)
294 struct radix_node_head *rnh, **rnh_ptr;
295 struct table_entry *ent;
297 struct sockaddr_in sa, mask;
298 struct sockaddr *sa_ptr, *mask_ptr;
301 if (tbl >= V_fw_tables_max)
305 case IPFW_TABLE_CIDR:
306 if (plen == sizeof(in_addr_t)) {
307 /* Set 'total' structure length */
308 KEY_LEN(sa) = KEY_LEN_INET;
309 KEY_LEN(mask) = KEY_LEN_INET;
310 mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
311 addr = *((in_addr_t *)paddr);
312 sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
313 rnh_ptr = &ch->tables[tbl];
314 sa_ptr = (struct sockaddr *)&sa;
315 mask_ptr = (struct sockaddr *)&mask;
317 } else if (plen == sizeof(struct in6_addr)) {
321 struct sockaddr_in6 sa6, mask6;
322 memset(&sa6, 0, sizeof(struct sockaddr_in6));
323 memset(&mask6, 0, sizeof(struct sockaddr_in6));
324 /* Set 'total' structure length */
325 KEY_LEN(sa6) = KEY_LEN_INET6;
326 KEY_LEN(mask6) = KEY_LEN_INET6;
327 ipv6_writemask(&mask6.sin6_addr, mlen);
328 memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
329 APPLY_MASK(&sa6.sin6_addr, &mask6.sin6_addr);
330 rnh_ptr = &ch->xtables[tbl];
331 sa_ptr = (struct sockaddr *)&sa6;
332 mask_ptr = (struct sockaddr *)&mask6;
335 /* Unknown CIDR type */
340 case IPFW_TABLE_INTERFACE:
341 /* Check if string is terminated */
342 c = ((char *)paddr)[IF_NAMESIZE - 1];
343 ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
344 if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
347 struct xaddr_iface ifname, ifmask;
348 memset(&ifname, 0, sizeof(ifname));
350 /* Include last \0 into comparison */
353 /* Set 'total' structure length */
354 KEY_LEN(ifname) = KEY_LEN_IFACE + mlen;
355 KEY_LEN(ifmask) = KEY_LEN_IFACE + mlen;
356 /* Assume direct match */
357 /* FIXME: Add interface pattern matching */
359 memset(ifmask.ifname, 0xFF, IF_NAMESIZE);
360 mask_ptr = (struct sockaddr *)&ifmask;
363 memcpy(ifname.ifname, paddr, mlen);
365 rnh_ptr = &ch->xtables[tbl];
366 sa_ptr = (struct sockaddr *)&ifname;
375 if ((rnh = *rnh_ptr) == NULL) {
380 if (ch->tabletype[tbl] != type) {
385 ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh);
391 free(ent, M_IPFW_TBL);
396 flush_table_entry(struct radix_node *rn, void *arg)
398 struct radix_node_head * const rnh = arg;
399 struct table_entry *ent;
401 ent = (struct table_entry *)
402 rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
404 free(ent, M_IPFW_TBL);
409 ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl)
411 struct radix_node_head *rnh, *xrnh;
413 if (tbl >= V_fw_tables_max)
417 * We free both (IPv4 and extended) radix trees and
418 * clear table type here to permit table to be reused
419 * for different type without module reload
423 /* Set IPv4 table pointer to zero */
424 if ((rnh = ch->tables[tbl]) != NULL)
425 ch->tables[tbl] = NULL;
426 /* Set extended table pointer to zero */
427 if ((xrnh = ch->xtables[tbl]) != NULL)
428 ch->xtables[tbl] = NULL;
429 /* Zero table type */
430 ch->tabletype[tbl] = 0;
434 rnh->rnh_walktree(rnh, flush_table_entry, rnh);
435 rn_detachhead((void **)&rnh);
439 xrnh->rnh_walktree(xrnh, flush_table_entry, xrnh);
440 rn_detachhead((void **)&xrnh);
447 ipfw_destroy_tables(struct ip_fw_chain *ch)
451 /* Flush all tables */
452 for (tbl = 0; tbl < V_fw_tables_max; tbl++)
453 ipfw_flush_table(ch, tbl);
455 /* Free pointers itself */
456 free(ch->tables, M_IPFW);
457 free(ch->xtables, M_IPFW);
458 free(ch->tabletype, M_IPFW);
462 ipfw_init_tables(struct ip_fw_chain *ch)
464 /* Allocate pointers */
465 ch->tables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
466 ch->xtables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
467 ch->tabletype = malloc(V_fw_tables_max * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
472 ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
474 struct radix_node_head **tables, **xtables, *rnh;
475 struct radix_node_head **tables_old, **xtables_old;
476 uint8_t *tabletype, *tabletype_old;
477 unsigned int ntables_old, tbl;
479 /* Check new value for validity */
480 if (ntables > IPFW_TABLES_MAX)
481 ntables = IPFW_TABLES_MAX;
483 /* Allocate new pointers */
484 tables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
485 xtables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
486 tabletype = malloc(ntables * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
490 tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
492 /* Copy old table pointers */
493 memcpy(tables, ch->tables, sizeof(void *) * tbl);
494 memcpy(xtables, ch->xtables, sizeof(void *) * tbl);
495 memcpy(tabletype, ch->tabletype, sizeof(uint8_t) * tbl);
497 /* Change pointers and number of tables */
498 tables_old = ch->tables;
499 xtables_old = ch->xtables;
500 tabletype_old = ch->tabletype;
502 ch->xtables = xtables;
503 ch->tabletype = tabletype;
505 ntables_old = V_fw_tables_max;
506 V_fw_tables_max = ntables;
510 /* Check if we need to destroy radix trees */
511 if (ntables < ntables_old) {
512 for (tbl = ntables; tbl < ntables_old; tbl++) {
513 if ((rnh = tables_old[tbl]) != NULL) {
514 rnh->rnh_walktree(rnh, flush_table_entry, rnh);
515 rn_detachhead((void **)&rnh);
518 if ((rnh = xtables_old[tbl]) != NULL) {
519 rnh->rnh_walktree(rnh, flush_table_entry, rnh);
520 rn_detachhead((void **)&rnh);
525 /* Free old pointers */
526 free(tables_old, M_IPFW);
527 free(xtables_old, M_IPFW);
528 free(tabletype_old, M_IPFW);
534 ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
537 struct radix_node_head *rnh;
538 struct table_entry *ent;
539 struct sockaddr_in sa;
541 if (tbl >= V_fw_tables_max)
543 if ((rnh = ch->tables[tbl]) == NULL)
545 KEY_LEN(sa) = KEY_LEN_INET;
546 sa.sin_addr.s_addr = addr;
547 ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh));
556 ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
557 uint32_t *val, int type)
559 struct radix_node_head *rnh;
560 struct table_xentry *xent;
561 struct sockaddr_in6 sa6;
562 struct xaddr_iface iface;
564 if (tbl >= V_fw_tables_max)
566 if ((rnh = ch->xtables[tbl]) == NULL)
570 case IPFW_TABLE_CIDR:
571 KEY_LEN(sa6) = KEY_LEN_INET6;
572 memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
573 xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh));
576 case IPFW_TABLE_INTERFACE:
577 KEY_LEN(iface) = KEY_LEN_IFACE +
578 strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1;
579 /* Assume direct match */
580 /* FIXME: Add interface pattern matching */
581 xent = (struct table_xentry *)(rnh->rnh_matchaddr(&iface, rnh));
596 count_table_entry(struct radix_node *rn, void *arg)
598 u_int32_t * const cnt = arg;
605 ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
607 struct radix_node_head *rnh;
609 if (tbl >= V_fw_tables_max)
612 if ((rnh = ch->tables[tbl]) == NULL)
614 rnh->rnh_walktree(rnh, count_table_entry, cnt);
619 dump_table_entry(struct radix_node *rn, void *arg)
621 struct table_entry * const n = (struct table_entry *)rn;
622 ipfw_table * const tbl = arg;
623 ipfw_table_entry *ent;
625 if (tbl->cnt == tbl->size)
627 ent = &tbl->ent[tbl->cnt];
629 if (in_nullhost(n->mask.sin_addr))
632 ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
633 ent->addr = n->addr.sin_addr.s_addr;
634 ent->value = n->value;
640 ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
642 struct radix_node_head *rnh;
644 if (tbl->tbl >= V_fw_tables_max)
647 if ((rnh = ch->tables[tbl->tbl]) == NULL)
649 rnh->rnh_walktree(rnh, dump_table_entry, tbl);
654 count_table_xentry(struct radix_node *rn, void *arg)
656 uint32_t * const cnt = arg;
658 (*cnt) += sizeof(ipfw_table_xentry);
663 ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
665 struct radix_node_head *rnh;
667 if (tbl >= V_fw_tables_max)
670 if ((rnh = ch->tables[tbl]) != NULL)
671 rnh->rnh_walktree(rnh, count_table_xentry, cnt);
672 if ((rnh = ch->xtables[tbl]) != NULL)
673 rnh->rnh_walktree(rnh, count_table_xentry, cnt);
674 /* Return zero if table is empty */
676 (*cnt) += sizeof(ipfw_xtable);
682 dump_table_xentry_base(struct radix_node *rn, void *arg)
684 struct table_entry * const n = (struct table_entry *)rn;
685 ipfw_xtable * const tbl = arg;
686 ipfw_table_xentry *xent;
688 /* Out of memory, returning */
689 if (tbl->cnt == tbl->size)
691 xent = &tbl->xent[tbl->cnt];
692 xent->len = sizeof(ipfw_table_xentry);
693 xent->tbl = tbl->tbl;
694 if (in_nullhost(n->mask.sin_addr))
697 xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
698 /* Save IPv4 address as deprecated IPv6 compatible */
699 xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr;
700 xent->value = n->value;
706 dump_table_xentry_extended(struct radix_node *rn, void *arg)
708 struct table_xentry * const n = (struct table_xentry *)rn;
709 ipfw_xtable * const tbl = arg;
710 ipfw_table_xentry *xent;
715 /* Out of memory, returning */
716 if (tbl->cnt == tbl->size)
718 xent = &tbl->xent[tbl->cnt];
719 xent->len = sizeof(ipfw_table_xentry);
720 xent->tbl = tbl->tbl;
724 case IPFW_TABLE_CIDR:
725 /* Count IPv6 mask */
726 v = (uint32_t *)&n->m.mask6.sin6_addr;
727 for (i = 0; i < sizeof(struct in6_addr) / 4; i++, v++)
728 xent->masklen += bitcount32(*v);
729 memcpy(&xent->k, &n->a.addr6.sin6_addr, sizeof(struct in6_addr));
732 case IPFW_TABLE_INTERFACE:
733 /* Assume exact mask */
734 xent->masklen = 8 * IF_NAMESIZE;
735 memcpy(&xent->k, &n->a.iface.ifname, IF_NAMESIZE);
739 /* unknown, skip entry */
743 xent->value = n->value;
749 ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl)
751 struct radix_node_head *rnh;
753 if (tbl->tbl >= V_fw_tables_max)
756 tbl->type = ch->tabletype[tbl->tbl];
757 if ((rnh = ch->tables[tbl->tbl]) != NULL)
758 rnh->rnh_walktree(rnh, dump_table_xentry_base, tbl);
759 if ((rnh = ch->xtables[tbl->tbl]) != NULL)
760 rnh->rnh_walktree(rnh, dump_table_xentry_extended, tbl);