2 * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
30 * Lookup table support for ipfw
32 * Lookup tables are implemented (at the moment) using the radix
33 * tree used for routing tables. Tables store key-value entries, where
34 * keys are network prefixes (addr/masklen), and values are integers.
35 * As a degenerate case we can interpret keys as 32-bit integers
38 * The table is protected by the IPFW lock even for manipulation coming
39 * from userland, because operations are typically fast.
45 #error IPFIREWALL requires INET.
47 #include "opt_inet6.h"
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/malloc.h>
52 #include <sys/kernel.h>
54 #include <sys/rwlock.h>
55 #include <sys/socket.h>
56 #include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
57 #include <net/radix.h>
58 #include <net/route.h>
61 #include <netinet/in.h>
62 #include <netinet/ip_var.h> /* struct ipfw_rule_ref */
63 #include <netinet/ip_fw.h>
64 #include <sys/queue.h> /* LIST_HEAD */
65 #include <netinet/ipfw/ip_fw_private.h>
68 #include <security/mac/mac_framework.h>
71 MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
74 struct radix_node rn[2];
75 struct sockaddr_in addr, mask;
80 uint8_t if_len; /* length of this struct */
81 uint8_t pad[7]; /* Align name */
82 char ifname[IF_NAMESIZE]; /* Interface name */
86 struct radix_node rn[2];
89 struct sockaddr_in6 addr6;
91 struct xaddr_iface iface;
95 struct sockaddr_in6 mask6;
97 struct xaddr_iface ifmask;
103 * The radix code expects addr and mask to be array of bytes,
104 * with the first byte being the length of the array. rn_inithead
105 * is called with the offset in bits of the lookup key within the
106 * array. If we use a sockaddr_in as the underlying type,
107 * sin_len is conveniently located at offset 0, sin_addr is at
108 * offset 4 and normally aligned.
109 * But for portability, let's avoid assumption and make the code explicit
111 #define KEY_LEN(v) *((uint8_t *)&(v))
112 #define KEY_OFS (8*offsetof(struct sockaddr_in, sin_addr))
114 * Do not require radix to compare more than actual IPv4/IPv6 address
116 #define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
117 #define KEY_LEN_INET6 (offsetof(struct sockaddr_in6, sin6_addr) + sizeof(struct in6_addr))
118 #define KEY_LEN_IFACE (offsetof(struct xaddr_iface, ifname))
120 #define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr))
121 #define OFF_LEN_INET6 (8 * offsetof(struct sockaddr_in6, sin6_addr))
122 #define OFF_LEN_IFACE (8 * offsetof(struct xaddr_iface, ifname))
126 ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
130 for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
132 *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
136 ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
137 uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value)
139 struct radix_node_head *rnh, **rnh_ptr;
140 struct table_entry *ent;
141 struct table_xentry *xent;
142 struct radix_node *rn;
146 struct sockaddr *addr_ptr, *mask_ptr;
149 if (tbl >= V_fw_tables_max)
153 case IPFW_TABLE_CIDR:
154 if (plen == sizeof(in_addr_t)) {
159 ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
161 /* Set 'total' structure length */
162 KEY_LEN(ent->addr) = KEY_LEN_INET;
163 KEY_LEN(ent->mask) = KEY_LEN_INET;
164 /* Set offset of IPv4 address in bits */
165 offset = OFF_LEN_INET;
166 ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
167 addr = *((in_addr_t *)paddr);
168 ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
170 rnh_ptr = &ch->tables[tbl];
172 addr_ptr = (struct sockaddr *)&ent->addr;
173 mask_ptr = (struct sockaddr *)&ent->mask;
176 } else if (plen == sizeof(struct in6_addr)) {
180 xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
182 /* Set 'total' structure length */
183 KEY_LEN(xent->a.addr6) = KEY_LEN_INET6;
184 KEY_LEN(xent->m.mask6) = KEY_LEN_INET6;
185 /* Set offset of IPv6 address in bits */
186 offset = OFF_LEN_INET6;
187 ipv6_writemask(&xent->m.mask6.sin6_addr, mlen);
188 memcpy(&xent->a.addr6.sin6_addr, paddr, sizeof(struct in6_addr));
189 APPLY_MASK(&xent->a.addr6.sin6_addr, &xent->m.mask6.sin6_addr);
191 rnh_ptr = &ch->xtables[tbl];
193 addr_ptr = (struct sockaddr *)&xent->a.addr6;
194 mask_ptr = (struct sockaddr *)&xent->m.mask6;
197 /* Unknown CIDR type */
202 case IPFW_TABLE_INTERFACE:
203 /* Check if string is terminated */
204 c = ((char *)paddr)[IF_NAMESIZE - 1];
205 ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
206 if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
209 /* Include last \0 into comparison */
212 xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
214 /* Set 'total' structure length */
215 KEY_LEN(xent->a.iface) = KEY_LEN_IFACE + mlen;
216 KEY_LEN(xent->m.ifmask) = KEY_LEN_IFACE + mlen;
217 /* Set offset of interface name in bits */
218 offset = OFF_LEN_IFACE;
219 memcpy(xent->a.iface.ifname, paddr, mlen);
220 /* Assume direct match */
221 /* TODO: Add interface pattern matching */
223 memset(xent->m.ifmask.ifname, 0xFF, IF_NAMESIZE);
224 mask_ptr = (struct sockaddr *)&xent->m.ifmask;
227 rnh_ptr = &ch->xtables[tbl];
229 addr_ptr = (struct sockaddr *)&xent->a.iface;
239 /* Check if tabletype is valid */
240 if ((ch->tabletype[tbl] != 0) && (ch->tabletype[tbl] != type)) {
242 free(ent_ptr, M_IPFW_TBL);
246 /* Check if radix tree exists */
247 if ((rnh = *rnh_ptr) == NULL) {
249 /* Create radix for a new table */
250 if (!rn_inithead((void **)&rnh, offset)) {
251 free(ent_ptr, M_IPFW_TBL);
256 if (*rnh_ptr != NULL) {
257 /* Tree is already attached by other thread */
258 rn_detachhead((void **)&rnh);
260 /* Check table type another time */
261 if (ch->tabletype[tbl] != type) {
263 free(ent_ptr, M_IPFW_TBL);
269 * Set table type. It can be set already
270 * (if we have IPv6-only table) but setting
271 * it another time does not hurt
273 ch->tabletype[tbl] = type;
277 rn = rnh->rnh_addaddr(addr_ptr, mask_ptr, rnh, ent_ptr);
281 free(ent_ptr, M_IPFW_TBL);
288 ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
289 uint8_t plen, uint8_t mlen, uint8_t type)
291 struct radix_node_head *rnh, **rnh_ptr;
292 struct table_entry *ent;
294 struct sockaddr_in sa, mask;
295 struct sockaddr *sa_ptr, *mask_ptr;
298 if (tbl >= V_fw_tables_max)
302 case IPFW_TABLE_CIDR:
303 if (plen == sizeof(in_addr_t)) {
304 /* Set 'total' structure length */
305 KEY_LEN(sa) = KEY_LEN_INET;
306 KEY_LEN(mask) = KEY_LEN_INET;
307 mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
308 addr = *((in_addr_t *)paddr);
309 sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
310 rnh_ptr = &ch->tables[tbl];
311 sa_ptr = (struct sockaddr *)&sa;
312 mask_ptr = (struct sockaddr *)&mask;
314 } else if (plen == sizeof(struct in6_addr)) {
318 struct sockaddr_in6 sa6, mask6;
319 memset(&sa6, 0, sizeof(struct sockaddr_in6));
320 memset(&mask6, 0, sizeof(struct sockaddr_in6));
321 /* Set 'total' structure length */
322 KEY_LEN(sa6) = KEY_LEN_INET6;
323 KEY_LEN(mask6) = KEY_LEN_INET6;
324 ipv6_writemask(&mask6.sin6_addr, mlen);
325 memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
326 APPLY_MASK(&sa6.sin6_addr, &mask6.sin6_addr);
327 rnh_ptr = &ch->xtables[tbl];
328 sa_ptr = (struct sockaddr *)&sa6;
329 mask_ptr = (struct sockaddr *)&mask6;
332 /* Unknown CIDR type */
337 case IPFW_TABLE_INTERFACE:
338 /* Check if string is terminated */
339 c = ((char *)paddr)[IF_NAMESIZE - 1];
340 ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
341 if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
344 struct xaddr_iface ifname, ifmask;
345 memset(&ifname, 0, sizeof(ifname));
347 /* Include last \0 into comparison */
350 /* Set 'total' structure length */
351 KEY_LEN(ifname) = KEY_LEN_IFACE + mlen;
352 KEY_LEN(ifmask) = KEY_LEN_IFACE + mlen;
353 /* Assume direct match */
354 /* FIXME: Add interface pattern matching */
356 memset(ifmask.ifname, 0xFF, IF_NAMESIZE);
357 mask_ptr = (struct sockaddr *)&ifmask;
360 memcpy(ifname.ifname, paddr, mlen);
362 rnh_ptr = &ch->xtables[tbl];
363 sa_ptr = (struct sockaddr *)&ifname;
372 if ((rnh = *rnh_ptr) == NULL) {
377 if (ch->tabletype[tbl] != type) {
382 ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh);
388 free(ent, M_IPFW_TBL);
393 flush_table_entry(struct radix_node *rn, void *arg)
395 struct radix_node_head * const rnh = arg;
396 struct table_entry *ent;
398 ent = (struct table_entry *)
399 rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
401 free(ent, M_IPFW_TBL);
406 ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl)
408 struct radix_node_head *rnh, *xrnh;
410 if (tbl >= V_fw_tables_max)
414 * We free both (IPv4 and extended) radix trees and
415 * clear table type here to permit table to be reused
416 * for different type without module reload
420 /* Set IPv4 table pointer to zero */
421 if ((rnh = ch->tables[tbl]) != NULL)
422 ch->tables[tbl] = NULL;
423 /* Set extended table pointer to zero */
424 if ((xrnh = ch->xtables[tbl]) != NULL)
425 ch->xtables[tbl] = NULL;
426 /* Zero table type */
427 ch->tabletype[tbl] = 0;
431 rnh->rnh_walktree(rnh, flush_table_entry, rnh);
432 rn_detachhead((void **)&rnh);
436 xrnh->rnh_walktree(xrnh, flush_table_entry, xrnh);
437 rn_detachhead((void **)&xrnh);
444 ipfw_destroy_tables(struct ip_fw_chain *ch)
448 /* Flush all tables */
449 for (tbl = 0; tbl < V_fw_tables_max; tbl++)
450 ipfw_flush_table(ch, tbl);
452 /* Free pointers itself */
453 free(ch->tables, M_IPFW);
454 free(ch->xtables, M_IPFW);
455 free(ch->tabletype, M_IPFW);
459 ipfw_init_tables(struct ip_fw_chain *ch)
461 /* Allocate pointers */
462 ch->tables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
463 ch->xtables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
464 ch->tabletype = malloc(V_fw_tables_max * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
469 ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
471 struct radix_node_head **tables, **xtables, *rnh;
472 struct radix_node_head **tables_old, **xtables_old;
473 uint8_t *tabletype, *tabletype_old;
474 unsigned int ntables_old, tbl;
476 /* Check new value for validity */
477 if (ntables > IPFW_TABLES_MAX)
478 ntables = IPFW_TABLES_MAX;
480 /* Allocate new pointers */
481 tables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
482 xtables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
483 tabletype = malloc(ntables * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
487 tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
489 /* Copy old table pointers */
490 memcpy(tables, ch->tables, sizeof(void *) * tbl);
491 memcpy(xtables, ch->xtables, sizeof(void *) * tbl);
492 memcpy(tabletype, ch->tabletype, sizeof(uint8_t) * tbl);
494 /* Change pointers and number of tables */
495 tables_old = ch->tables;
496 xtables_old = ch->xtables;
497 tabletype_old = ch->tabletype;
499 ch->xtables = xtables;
500 ch->tabletype = tabletype;
502 ntables_old = V_fw_tables_max;
503 V_fw_tables_max = ntables;
507 /* Check if we need to destroy radix trees */
508 if (ntables < ntables_old) {
509 for (tbl = ntables; tbl < ntables_old; tbl++) {
510 if ((rnh = tables_old[tbl]) != NULL) {
511 rnh->rnh_walktree(rnh, flush_table_entry, rnh);
512 rn_detachhead((void **)&rnh);
515 if ((rnh = xtables_old[tbl]) != NULL) {
516 rnh->rnh_walktree(rnh, flush_table_entry, rnh);
517 rn_detachhead((void **)&rnh);
522 /* Free old pointers */
523 free(tables_old, M_IPFW);
524 free(xtables_old, M_IPFW);
525 free(tabletype_old, M_IPFW);
531 ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
534 struct radix_node_head *rnh;
535 struct table_entry *ent;
536 struct sockaddr_in sa;
538 if (tbl >= V_fw_tables_max)
540 if ((rnh = ch->tables[tbl]) == NULL)
542 KEY_LEN(sa) = KEY_LEN_INET;
543 sa.sin_addr.s_addr = addr;
544 ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
553 ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
554 uint32_t *val, int type)
556 struct radix_node_head *rnh;
557 struct table_xentry *xent;
558 struct sockaddr_in6 sa6;
559 struct xaddr_iface iface;
561 if (tbl >= V_fw_tables_max)
563 if ((rnh = ch->xtables[tbl]) == NULL)
567 case IPFW_TABLE_CIDR:
568 KEY_LEN(sa6) = KEY_LEN_INET6;
569 memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
570 xent = (struct table_xentry *)(rnh->rnh_lookup(&sa6, NULL, rnh));
573 case IPFW_TABLE_INTERFACE:
574 KEY_LEN(iface) = KEY_LEN_IFACE +
575 strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1;
576 /* Assume direct match */
577 /* FIXME: Add interface pattern matching */
578 xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh));
593 count_table_entry(struct radix_node *rn, void *arg)
595 u_int32_t * const cnt = arg;
602 ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
604 struct radix_node_head *rnh;
606 if (tbl >= V_fw_tables_max)
609 if ((rnh = ch->tables[tbl]) == NULL)
611 rnh->rnh_walktree(rnh, count_table_entry, cnt);
616 dump_table_entry(struct radix_node *rn, void *arg)
618 struct table_entry * const n = (struct table_entry *)rn;
619 ipfw_table * const tbl = arg;
620 ipfw_table_entry *ent;
622 if (tbl->cnt == tbl->size)
624 ent = &tbl->ent[tbl->cnt];
626 if (in_nullhost(n->mask.sin_addr))
629 ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
630 ent->addr = n->addr.sin_addr.s_addr;
631 ent->value = n->value;
637 ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
639 struct radix_node_head *rnh;
641 if (tbl->tbl >= V_fw_tables_max)
644 if ((rnh = ch->tables[tbl->tbl]) == NULL)
646 rnh->rnh_walktree(rnh, dump_table_entry, tbl);
651 count_table_xentry(struct radix_node *rn, void *arg)
653 uint32_t * const cnt = arg;
655 (*cnt) += sizeof(ipfw_table_xentry);
660 ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
662 struct radix_node_head *rnh;
664 if (tbl >= V_fw_tables_max)
667 if ((rnh = ch->tables[tbl]) != NULL)
668 rnh->rnh_walktree(rnh, count_table_xentry, cnt);
669 if ((rnh = ch->xtables[tbl]) != NULL)
670 rnh->rnh_walktree(rnh, count_table_xentry, cnt);
671 /* Return zero if table is empty */
673 (*cnt) += sizeof(ipfw_xtable);
679 dump_table_xentry_base(struct radix_node *rn, void *arg)
681 struct table_entry * const n = (struct table_entry *)rn;
682 ipfw_xtable * const tbl = arg;
683 ipfw_table_xentry *xent;
685 /* Out of memory, returning */
686 if (tbl->cnt == tbl->size)
688 xent = &tbl->xent[tbl->cnt];
689 xent->len = sizeof(ipfw_table_xentry);
690 xent->tbl = tbl->tbl;
691 if (in_nullhost(n->mask.sin_addr))
694 xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
695 /* Save IPv4 address as deprecated IPv6 compatible */
696 xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr;
697 xent->value = n->value;
703 dump_table_xentry_extended(struct radix_node *rn, void *arg)
705 struct table_xentry * const n = (struct table_xentry *)rn;
706 ipfw_xtable * const tbl = arg;
707 ipfw_table_xentry *xent;
712 /* Out of memory, returning */
713 if (tbl->cnt == tbl->size)
715 xent = &tbl->xent[tbl->cnt];
716 xent->len = sizeof(ipfw_table_xentry);
717 xent->tbl = tbl->tbl;
721 case IPFW_TABLE_CIDR:
722 /* Count IPv6 mask */
723 v = (uint32_t *)&n->m.mask6.sin6_addr;
724 for (i = 0; i < sizeof(struct in6_addr) / 4; i++, v++)
725 xent->masklen += bitcount32(*v);
726 memcpy(&xent->k, &n->a.addr6.sin6_addr, sizeof(struct in6_addr));
729 case IPFW_TABLE_INTERFACE:
730 /* Assume exact mask */
731 xent->masklen = 8 * IF_NAMESIZE;
732 memcpy(&xent->k, &n->a.iface.ifname, IF_NAMESIZE);
736 /* unknown, skip entry */
740 xent->value = n->value;
746 ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl)
748 struct radix_node_head *rnh;
750 if (tbl->tbl >= V_fw_tables_max)
753 tbl->type = ch->tabletype[tbl->tbl];
754 if ((rnh = ch->tables[tbl->tbl]) != NULL)
755 rnh->rnh_walktree(rnh, dump_table_xentry_base, tbl);
756 if ((rnh = ch->xtables[tbl->tbl]) != NULL)
757 rnh->rnh_walktree(rnh, dump_table_xentry_extended, tbl);