2 * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
30 * Lookup table support for ipfw
32 * Lookup tables are implemented (at the moment) using the radix
33 * tree used for routing tables. Tables store key-value entries, where
34 * keys are network prefixes (addr/masklen), and values are integers.
35 * As a degenerate case we can interpret keys as 32-bit integers
38 * The table is protected by the IPFW lock even for manipulation coming
39 * from userland, because operations are typically fast.
45 #error IPFIREWALL requires INET.
47 #include "opt_inet6.h"
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/malloc.h>
52 #include <sys/kernel.h>
54 #include <sys/rwlock.h>
55 #include <sys/socket.h>
56 #include <sys/queue.h>
57 #include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
58 #include <net/radix.h>
59 #include <net/route.h>
62 #include <netinet/in.h>
63 #include <netinet/ip_var.h> /* struct ipfw_rule_ref */
64 #include <netinet/ip_fw.h>
66 #include <netpfil/ipfw/ip_fw_private.h>
69 #include <security/mac/mac_framework.h>
72 static MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
75 struct radix_node rn[2];
76 struct sockaddr_in addr, mask;
81 uint8_t if_len; /* length of this struct */
82 uint8_t pad[7]; /* Align name */
83 char ifname[IF_NAMESIZE]; /* Interface name */
87 struct radix_node rn[2];
90 struct sockaddr_in6 addr6;
92 struct xaddr_iface iface;
96 struct sockaddr_in6 mask6;
98 struct xaddr_iface ifmask;
104 * The radix code expects addr and mask to be array of bytes,
105 * with the first byte being the length of the array. rn_inithead
106 * is called with the offset in bits of the lookup key within the
107 * array. If we use a sockaddr_in as the underlying type,
108 * sin_len is conveniently located at offset 0, sin_addr is at
109 * offset 4 and normally aligned.
110 * But for portability, let's avoid assumption and make the code explicit
112 #define KEY_LEN(v) *((uint8_t *)&(v))
113 #define KEY_OFS (8*offsetof(struct sockaddr_in, sin_addr))
115 * Do not require radix to compare more than actual IPv4/IPv6 address
117 #define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
118 #define KEY_LEN_INET6 (offsetof(struct sockaddr_in6, sin6_addr) + sizeof(struct in6_addr))
119 #define KEY_LEN_IFACE (offsetof(struct xaddr_iface, ifname))
121 #define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr))
122 #define OFF_LEN_INET6 (8 * offsetof(struct sockaddr_in6, sin6_addr))
123 #define OFF_LEN_IFACE (8 * offsetof(struct xaddr_iface, ifname))
127 ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
131 for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
133 *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
137 ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
138 uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value)
140 struct radix_node_head *rnh, **rnh_ptr;
141 struct table_entry *ent;
142 struct table_xentry *xent;
143 struct radix_node *rn;
147 struct sockaddr *addr_ptr, *mask_ptr;
150 if (tbl >= V_fw_tables_max)
154 case IPFW_TABLE_CIDR:
155 if (plen == sizeof(in_addr_t)) {
160 ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
162 /* Set 'total' structure length */
163 KEY_LEN(ent->addr) = KEY_LEN_INET;
164 KEY_LEN(ent->mask) = KEY_LEN_INET;
165 /* Set offset of IPv4 address in bits */
166 offset = OFF_LEN_INET;
167 ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
168 addr = *((in_addr_t *)paddr);
169 ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
171 rnh_ptr = &ch->tables[tbl];
173 addr_ptr = (struct sockaddr *)&ent->addr;
174 mask_ptr = (struct sockaddr *)&ent->mask;
177 } else if (plen == sizeof(struct in6_addr)) {
181 xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
183 /* Set 'total' structure length */
184 KEY_LEN(xent->a.addr6) = KEY_LEN_INET6;
185 KEY_LEN(xent->m.mask6) = KEY_LEN_INET6;
186 /* Set offset of IPv6 address in bits */
187 offset = OFF_LEN_INET6;
188 ipv6_writemask(&xent->m.mask6.sin6_addr, mlen);
189 memcpy(&xent->a.addr6.sin6_addr, paddr, sizeof(struct in6_addr));
190 APPLY_MASK(&xent->a.addr6.sin6_addr, &xent->m.mask6.sin6_addr);
192 rnh_ptr = &ch->xtables[tbl];
194 addr_ptr = (struct sockaddr *)&xent->a.addr6;
195 mask_ptr = (struct sockaddr *)&xent->m.mask6;
198 /* Unknown CIDR type */
203 case IPFW_TABLE_INTERFACE:
204 /* Check if string is terminated */
205 c = ((char *)paddr)[IF_NAMESIZE - 1];
206 ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
207 if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
210 /* Include last \0 into comparison */
213 xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
215 /* Set 'total' structure length */
216 KEY_LEN(xent->a.iface) = KEY_LEN_IFACE + mlen;
217 KEY_LEN(xent->m.ifmask) = KEY_LEN_IFACE + mlen;
218 /* Set offset of interface name in bits */
219 offset = OFF_LEN_IFACE;
220 memcpy(xent->a.iface.ifname, paddr, mlen);
221 /* Assume direct match */
222 /* TODO: Add interface pattern matching */
224 memset(xent->m.ifmask.ifname, 0xFF, IF_NAMESIZE);
225 mask_ptr = (struct sockaddr *)&xent->m.ifmask;
228 rnh_ptr = &ch->xtables[tbl];
230 addr_ptr = (struct sockaddr *)&xent->a.iface;
240 /* Check if tabletype is valid */
241 if ((ch->tabletype[tbl] != 0) && (ch->tabletype[tbl] != type)) {
243 free(ent_ptr, M_IPFW_TBL);
247 /* Check if radix tree exists */
248 if ((rnh = *rnh_ptr) == NULL) {
250 /* Create radix for a new table */
251 if (!rn_inithead((void **)&rnh, offset)) {
252 free(ent_ptr, M_IPFW_TBL);
257 if (*rnh_ptr != NULL) {
258 /* Tree is already attached by other thread */
259 rn_detachhead((void **)&rnh);
261 /* Check table type another time */
262 if (ch->tabletype[tbl] != type) {
264 free(ent_ptr, M_IPFW_TBL);
270 * Set table type. It can be set already
271 * (if we have IPv6-only table) but setting
272 * it another time does not hurt
274 ch->tabletype[tbl] = type;
278 rn = rnh->rnh_addaddr(addr_ptr, mask_ptr, rnh, ent_ptr);
282 free(ent_ptr, M_IPFW_TBL);
289 ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
290 uint8_t plen, uint8_t mlen, uint8_t type)
292 struct radix_node_head *rnh, **rnh_ptr;
293 struct table_entry *ent;
295 struct sockaddr_in sa, mask;
296 struct sockaddr *sa_ptr, *mask_ptr;
299 if (tbl >= V_fw_tables_max)
303 case IPFW_TABLE_CIDR:
304 if (plen == sizeof(in_addr_t)) {
305 /* Set 'total' structure length */
306 KEY_LEN(sa) = KEY_LEN_INET;
307 KEY_LEN(mask) = KEY_LEN_INET;
308 mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
309 addr = *((in_addr_t *)paddr);
310 sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
311 rnh_ptr = &ch->tables[tbl];
312 sa_ptr = (struct sockaddr *)&sa;
313 mask_ptr = (struct sockaddr *)&mask;
315 } else if (plen == sizeof(struct in6_addr)) {
319 struct sockaddr_in6 sa6, mask6;
320 memset(&sa6, 0, sizeof(struct sockaddr_in6));
321 memset(&mask6, 0, sizeof(struct sockaddr_in6));
322 /* Set 'total' structure length */
323 KEY_LEN(sa6) = KEY_LEN_INET6;
324 KEY_LEN(mask6) = KEY_LEN_INET6;
325 ipv6_writemask(&mask6.sin6_addr, mlen);
326 memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
327 APPLY_MASK(&sa6.sin6_addr, &mask6.sin6_addr);
328 rnh_ptr = &ch->xtables[tbl];
329 sa_ptr = (struct sockaddr *)&sa6;
330 mask_ptr = (struct sockaddr *)&mask6;
333 /* Unknown CIDR type */
338 case IPFW_TABLE_INTERFACE:
339 /* Check if string is terminated */
340 c = ((char *)paddr)[IF_NAMESIZE - 1];
341 ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
342 if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
345 struct xaddr_iface ifname, ifmask;
346 memset(&ifname, 0, sizeof(ifname));
348 /* Include last \0 into comparison */
351 /* Set 'total' structure length */
352 KEY_LEN(ifname) = KEY_LEN_IFACE + mlen;
353 KEY_LEN(ifmask) = KEY_LEN_IFACE + mlen;
354 /* Assume direct match */
355 /* FIXME: Add interface pattern matching */
357 memset(ifmask.ifname, 0xFF, IF_NAMESIZE);
358 mask_ptr = (struct sockaddr *)&ifmask;
361 memcpy(ifname.ifname, paddr, mlen);
363 rnh_ptr = &ch->xtables[tbl];
364 sa_ptr = (struct sockaddr *)&ifname;
373 if ((rnh = *rnh_ptr) == NULL) {
378 if (ch->tabletype[tbl] != type) {
383 ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh);
389 free(ent, M_IPFW_TBL);
394 flush_table_entry(struct radix_node *rn, void *arg)
396 struct radix_node_head * const rnh = arg;
397 struct table_entry *ent;
399 ent = (struct table_entry *)
400 rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
402 free(ent, M_IPFW_TBL);
407 ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl)
409 struct radix_node_head *rnh, *xrnh;
411 if (tbl >= V_fw_tables_max)
415 * We free both (IPv4 and extended) radix trees and
416 * clear table type here to permit table to be reused
417 * for different type without module reload
421 /* Set IPv4 table pointer to zero */
422 if ((rnh = ch->tables[tbl]) != NULL)
423 ch->tables[tbl] = NULL;
424 /* Set extended table pointer to zero */
425 if ((xrnh = ch->xtables[tbl]) != NULL)
426 ch->xtables[tbl] = NULL;
427 /* Zero table type */
428 ch->tabletype[tbl] = 0;
432 rnh->rnh_walktree(rnh, flush_table_entry, rnh);
433 rn_detachhead((void **)&rnh);
437 xrnh->rnh_walktree(xrnh, flush_table_entry, xrnh);
438 rn_detachhead((void **)&xrnh);
445 ipfw_destroy_tables(struct ip_fw_chain *ch)
449 /* Flush all tables */
450 for (tbl = 0; tbl < V_fw_tables_max; tbl++)
451 ipfw_flush_table(ch, tbl);
453 /* Free pointers itself */
454 free(ch->tables, M_IPFW);
455 free(ch->xtables, M_IPFW);
456 free(ch->tabletype, M_IPFW);
460 ipfw_init_tables(struct ip_fw_chain *ch)
462 /* Allocate pointers */
463 ch->tables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
464 ch->xtables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
465 ch->tabletype = malloc(V_fw_tables_max * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
470 ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
472 struct radix_node_head **tables, **xtables, *rnh;
473 struct radix_node_head **tables_old, **xtables_old;
474 uint8_t *tabletype, *tabletype_old;
475 unsigned int ntables_old, tbl;
477 /* Check new value for validity */
478 if (ntables > IPFW_TABLES_MAX)
479 ntables = IPFW_TABLES_MAX;
481 /* Allocate new pointers */
482 tables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
483 xtables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
484 tabletype = malloc(ntables * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
488 tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
490 /* Copy old table pointers */
491 memcpy(tables, ch->tables, sizeof(void *) * tbl);
492 memcpy(xtables, ch->xtables, sizeof(void *) * tbl);
493 memcpy(tabletype, ch->tabletype, sizeof(uint8_t) * tbl);
495 /* Change pointers and number of tables */
496 tables_old = ch->tables;
497 xtables_old = ch->xtables;
498 tabletype_old = ch->tabletype;
500 ch->xtables = xtables;
501 ch->tabletype = tabletype;
503 ntables_old = V_fw_tables_max;
504 V_fw_tables_max = ntables;
508 /* Check if we need to destroy radix trees */
509 if (ntables < ntables_old) {
510 for (tbl = ntables; tbl < ntables_old; tbl++) {
511 if ((rnh = tables_old[tbl]) != NULL) {
512 rnh->rnh_walktree(rnh, flush_table_entry, rnh);
513 rn_detachhead((void **)&rnh);
516 if ((rnh = xtables_old[tbl]) != NULL) {
517 rnh->rnh_walktree(rnh, flush_table_entry, rnh);
518 rn_detachhead((void **)&rnh);
523 /* Free old pointers */
524 free(tables_old, M_IPFW);
525 free(xtables_old, M_IPFW);
526 free(tabletype_old, M_IPFW);
532 ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
535 struct radix_node_head *rnh;
536 struct table_entry *ent;
537 struct sockaddr_in sa;
539 if (tbl >= V_fw_tables_max)
541 if ((rnh = ch->tables[tbl]) == NULL)
543 KEY_LEN(sa) = KEY_LEN_INET;
544 sa.sin_addr.s_addr = addr;
545 ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
554 ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
555 uint32_t *val, int type)
557 struct radix_node_head *rnh;
558 struct table_xentry *xent;
559 struct sockaddr_in6 sa6;
560 struct xaddr_iface iface;
562 if (tbl >= V_fw_tables_max)
564 if ((rnh = ch->xtables[tbl]) == NULL)
568 case IPFW_TABLE_CIDR:
569 KEY_LEN(sa6) = KEY_LEN_INET6;
570 memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
571 xent = (struct table_xentry *)(rnh->rnh_lookup(&sa6, NULL, rnh));
574 case IPFW_TABLE_INTERFACE:
575 KEY_LEN(iface) = KEY_LEN_IFACE +
576 strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1;
577 /* Assume direct match */
578 /* FIXME: Add interface pattern matching */
579 xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh));
594 count_table_entry(struct radix_node *rn, void *arg)
596 u_int32_t * const cnt = arg;
603 ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
605 struct radix_node_head *rnh;
607 if (tbl >= V_fw_tables_max)
610 if ((rnh = ch->tables[tbl]) == NULL)
612 rnh->rnh_walktree(rnh, count_table_entry, cnt);
617 dump_table_entry(struct radix_node *rn, void *arg)
619 struct table_entry * const n = (struct table_entry *)rn;
620 ipfw_table * const tbl = arg;
621 ipfw_table_entry *ent;
623 if (tbl->cnt == tbl->size)
625 ent = &tbl->ent[tbl->cnt];
627 if (in_nullhost(n->mask.sin_addr))
630 ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
631 ent->addr = n->addr.sin_addr.s_addr;
632 ent->value = n->value;
638 ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
640 struct radix_node_head *rnh;
642 if (tbl->tbl >= V_fw_tables_max)
645 if ((rnh = ch->tables[tbl->tbl]) == NULL)
647 rnh->rnh_walktree(rnh, dump_table_entry, tbl);
652 count_table_xentry(struct radix_node *rn, void *arg)
654 uint32_t * const cnt = arg;
656 (*cnt) += sizeof(ipfw_table_xentry);
661 ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
663 struct radix_node_head *rnh;
665 if (tbl >= V_fw_tables_max)
668 if ((rnh = ch->tables[tbl]) != NULL)
669 rnh->rnh_walktree(rnh, count_table_xentry, cnt);
670 if ((rnh = ch->xtables[tbl]) != NULL)
671 rnh->rnh_walktree(rnh, count_table_xentry, cnt);
672 /* Return zero if table is empty */
674 (*cnt) += sizeof(ipfw_xtable);
680 dump_table_xentry_base(struct radix_node *rn, void *arg)
682 struct table_entry * const n = (struct table_entry *)rn;
683 ipfw_xtable * const tbl = arg;
684 ipfw_table_xentry *xent;
686 /* Out of memory, returning */
687 if (tbl->cnt == tbl->size)
689 xent = &tbl->xent[tbl->cnt];
690 xent->len = sizeof(ipfw_table_xentry);
691 xent->tbl = tbl->tbl;
692 if (in_nullhost(n->mask.sin_addr))
695 xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
696 /* Save IPv4 address as deprecated IPv6 compatible */
697 xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr;
698 xent->value = n->value;
704 dump_table_xentry_extended(struct radix_node *rn, void *arg)
706 struct table_xentry * const n = (struct table_xentry *)rn;
707 ipfw_xtable * const tbl = arg;
708 ipfw_table_xentry *xent;
713 /* Out of memory, returning */
714 if (tbl->cnt == tbl->size)
716 xent = &tbl->xent[tbl->cnt];
717 xent->len = sizeof(ipfw_table_xentry);
718 xent->tbl = tbl->tbl;
722 case IPFW_TABLE_CIDR:
723 /* Count IPv6 mask */
724 v = (uint32_t *)&n->m.mask6.sin6_addr;
725 for (i = 0; i < sizeof(struct in6_addr) / 4; i++, v++)
726 xent->masklen += bitcount32(*v);
727 memcpy(&xent->k, &n->a.addr6.sin6_addr, sizeof(struct in6_addr));
730 case IPFW_TABLE_INTERFACE:
731 /* Assume exact mask */
732 xent->masklen = 8 * IF_NAMESIZE;
733 memcpy(&xent->k, &n->a.iface.ifname, IF_NAMESIZE);
737 /* unknown, skip entry */
741 xent->value = n->value;
747 ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl)
749 struct radix_node_head *rnh;
751 if (tbl->tbl >= V_fw_tables_max)
754 tbl->type = ch->tabletype[tbl->tbl];
755 if ((rnh = ch->tables[tbl->tbl]) != NULL)
756 rnh->rnh_walktree(rnh, dump_table_xentry_base, tbl);
757 if ((rnh = ch->xtables[tbl->tbl]) != NULL)
758 rnh->rnh_walktree(rnh, dump_table_xentry_extended, tbl);