2 * Copyright (c) 2008 Paolo Pisati
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/eventhandler.h>
33 #include <sys/malloc.h>
34 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/rwlock.h>
39 #define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */
41 #include <netinet/libalias/alias.h>
42 #include <netinet/libalias/alias_local.h>
45 #include <netinet/in.h>
46 #include <netinet/ip.h>
47 #include <netinet/ip_var.h>
48 #include <netinet/ip_fw.h>
49 #include <netinet/tcp.h>
50 #include <netinet/udp.h>
52 #include <netpfil/ipfw/ip_fw_private.h>
54 #include <machine/in_cksum.h> /* XXX for in_cksum */
56 static eventhandler_tag ifaddr_event_tag;
59 ifaddr_change(void *arg __unused, struct ifnet *ifp)
63 struct ip_fw_chain *chain;
65 KASSERT(curvnet == ifp->if_vnet,
66 ("curvnet(%p) differs from iface vnet(%p)", curvnet, ifp->if_vnet));
67 chain = &V_layer3_chain;
69 /* Check every nat entry... */
70 LIST_FOREACH(ptr, &chain->nat, _next) {
71 /* ...using nic 'ifp->if_xname' as dynamic alias address. */
72 if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0)
75 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
76 if (ifa->ifa_addr == NULL)
78 if (ifa->ifa_addr->sa_family != AF_INET)
80 ptr->ip = ((struct sockaddr_in *)
81 (ifa->ifa_addr))->sin_addr;
82 LibAliasSetAddress(ptr->lib, ptr->ip);
90 * delete the pointers for nat entry ix, or all of them if ix < 0
93 flush_nat_ptrs(struct ip_fw_chain *chain, const int ix)
98 IPFW_WLOCK_ASSERT(chain);
99 for (i = 0; i < chain->n_rules; i++) {
100 cmd = (ipfw_insn_nat *)ACTION_PTR(chain->map[i]);
101 /* XXX skip log and the like ? */
102 if (cmd->o.opcode == O_NAT && cmd->nat != NULL &&
103 (ix < 0 || cmd->nat->id == ix))
109 del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
111 struct cfg_redir *r, *tmp_r;
112 struct cfg_spool *s, *tmp_s;
115 LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
116 num = 1; /* Number of alias_link to delete. */
123 /* Delete all libalias redirect entry. */
124 for (i = 0; i < num; i++)
125 LibAliasRedirectDelete(n->lib, r->alink[i]);
126 /* Del spool cfg if any. */
127 LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) {
128 LIST_REMOVE(s, _next);
131 free(r->alink, M_IPFW);
132 LIST_REMOVE(r, _next);
136 printf("unknown redirect mode: %u\n", r->mode);
137 /* XXX - panic?!?!? */
144 add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
146 struct cfg_redir *r, *ser_r;
147 struct cfg_spool *s, *ser_s;
150 for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
151 ser_r = (struct cfg_redir *)&buf[off];
152 r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
153 memcpy(r, ser_r, SOF_REDIR);
154 LIST_INIT(&r->spool_chain);
156 r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
157 M_IPFW, M_WAITOK | M_ZERO);
160 r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
164 for (i = 0 ; i < r->pport_cnt; i++) {
165 /* If remotePort is all ports, set it to 0. */
166 u_short remotePortCopy = r->rport + i;
167 if (r->rport_cnt == 1 && r->rport == 0)
169 r->alink[i] = LibAliasRedirectPort(ptr->lib,
170 r->laddr, htons(r->lport + i), r->raddr,
171 htons(remotePortCopy), r->paddr,
172 htons(r->pport + i), r->proto);
173 if (r->alink[i] == NULL) {
180 r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
181 r->raddr, r->paddr, r->proto);
184 printf("unknown redirect mode: %u\n", r->mode);
187 /* XXX perhaps return an error instead of panic ? */
188 if (r->alink[0] == NULL)
189 panic("LibAliasRedirect* returned NULL");
190 /* LSNAT handling. */
191 for (i = 0; i < r->spool_cnt; i++) {
192 ser_s = (struct cfg_spool *)&buf[off];
193 s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
194 memcpy(s, ser_s, SOF_SPOOL);
195 LibAliasAddServer(ptr->lib, r->alink[0],
196 s->addr, htons(s->port));
198 /* Hook spool entry. */
199 LIST_INSERT_HEAD(&r->spool_chain, s, _next);
201 /* And finally hook this redir entry. */
202 LIST_INSERT_HEAD(&ptr->redir_chain, r, _next);
207 * ipfw_nat - perform mbuf header translation.
209 * Note V_layer3_chain has to be locked while calling ipfw_nat() in
210 * 'global' operation mode (t == NULL).
214 ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
218 /* XXX - libalias duct tape */
219 int ldt, retval, found;
220 struct ip_fw_chain *chain;
225 mcl = m_megapullup(m, m->m_pkthdr.len);
230 ip = mtod(mcl, struct ip *);
233 * XXX - Libalias checksum offload 'duct tape':
235 * locally generated packets have only pseudo-header checksum
236 * calculated and libalias will break it[1], so mark them for
237 * later fix. Moreover there are cases when libalias modifies
238 * tcp packet data[2], mark them for later fix too.
240 * [1] libalias was never meant to run in kernel, so it does
241 * not have any knowledge about checksum offloading, and
242 * expects a packet with a full internet checksum.
243 * Unfortunately, packets generated locally will have just the
244 * pseudo header calculated, and when libalias tries to adjust
245 * the checksum it will actually compute a wrong value.
247 * [2] when libalias modifies tcp's data content, full TCP
248 * checksum has to be recomputed: the problem is that
249 * libalias does not have any idea about checksum offloading.
250 * To work around this, we do not do checksumming in LibAlias,
251 * but only mark the packets in th_x2 field. If we receive a
252 * marked packet, we calculate correct checksum for it
253 * aware of offloading. Why such a terrible hack instead of
254 * recalculating checksum for each packet?
255 * Because the previous checksum was not checked!
256 * Recalculating checksums for EVERY packet will hide ALL
257 * transmission errors. Yes, marked packets still suffer from
258 * this problem. But, sigh, natd(8) has this problem, too.
260 * TODO: -make libalias mbuf aware (so
261 * it can handle delayed checksum and tso)
264 if (mcl->m_pkthdr.rcvif == NULL &&
265 mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
268 c = mtod(mcl, char *);
270 /* Check if this is 'global' instance */
272 if (args->oif == NULL) {
273 /* Wrong direction, skip processing */
279 chain = &V_layer3_chain;
280 IPFW_RLOCK_ASSERT(chain);
281 /* Check every nat entry... */
282 LIST_FOREACH(t, &chain->nat, _next) {
283 if ((t->mode & PKT_ALIAS_SKIP_GLOBAL) != 0)
285 retval = LibAliasOutTry(t->lib, c,
286 mcl->m_len + M_TRAILINGSPACE(mcl), 0);
287 if (retval == PKT_ALIAS_OK) {
288 /* Nat instance recognises state */
294 /* No instance found, return ignore */
299 if (args->oif == NULL)
300 retval = LibAliasIn(t->lib, c,
301 mcl->m_len + M_TRAILINGSPACE(mcl));
303 retval = LibAliasOut(t->lib, c,
304 mcl->m_len + M_TRAILINGSPACE(mcl));
308 * We drop packet when:
309 * 1. libalias returns PKT_ALIAS_ERROR;
310 * 2. For incoming packets:
311 * a) for unresolved fragments;
312 * b) libalias returns PKT_ALIAS_IGNORED and
313 * PKT_ALIAS_DENY_INCOMING flag is set.
315 if (retval == PKT_ALIAS_ERROR ||
316 (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT ||
317 (retval == PKT_ALIAS_IGNORED &&
318 (t->mode & PKT_ALIAS_DENY_INCOMING) != 0)))) {
319 /* XXX - should i add some logging? */
325 if (retval == PKT_ALIAS_RESPOND)
326 mcl->m_flags |= M_SKIP_FIREWALL;
327 mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len);
330 * XXX - libalias checksum offload
331 * 'duct tape' (see above)
334 if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
335 ip->ip_p == IPPROTO_TCP) {
338 th = (struct tcphdr *)(ip + 1);
346 uint16_t ip_len, cksum;
348 ip_len = ntohs(ip->ip_len);
349 cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
350 htons(ip->ip_p + ip_len - (ip->ip_hl << 2)));
354 th = (struct tcphdr *)(ip + 1);
356 * Maybe it was set in
361 mcl->m_pkthdr.csum_data =
362 offsetof(struct tcphdr, th_sum);
365 uh = (struct udphdr *)(ip + 1);
367 mcl->m_pkthdr.csum_data =
368 offsetof(struct udphdr, uh_sum);
371 /* No hw checksum offloading: do it ourselves */
372 if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) {
373 in_delayed_cksum(mcl);
374 mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
381 static struct cfg_nat *
382 lookup_nat(struct nat_list *l, int nat_id)
386 LIST_FOREACH(res, l, _next) {
387 if (res->id == nat_id)
394 ipfw_nat_cfg(struct sockopt *sopt)
396 struct cfg_nat *cfg, *ptr;
398 struct ip_fw_chain *chain = &V_layer3_chain;
400 int gencnt, error = 0;
402 len = sopt->sopt_valsize;
403 buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
404 if ((error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat))) != 0)
407 cfg = (struct cfg_nat *)buf;
414 * Find/create nat rule.
417 gencnt = chain->gencnt;
418 ptr = lookup_nat(&chain->nat, cfg->id);
421 /* New rule: allocate and init new instance. */
422 ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO);
423 ptr->lib = LibAliasInit(NULL);
424 LIST_INIT(&ptr->redir_chain);
426 /* Entry already present: temporarily unhook it. */
427 LIST_REMOVE(ptr, _next);
428 flush_nat_ptrs(chain, cfg->id);
433 * Basic nat configuration.
437 * XXX - what if this rule doesn't nat any ip and just
439 * do we set aliasaddress to 0.0.0.0?
442 ptr->redir_cnt = cfg->redir_cnt;
443 ptr->mode = cfg->mode;
444 LibAliasSetMode(ptr->lib, cfg->mode, ~0);
445 LibAliasSetAddress(ptr->lib, ptr->ip);
446 memcpy(ptr->if_name, cfg->if_name, IF_NAMESIZE);
449 * Redir and LSNAT configuration.
451 /* Delete old cfgs. */
452 del_redir_spool_cfg(ptr, &ptr->redir_chain);
453 /* Add new entries. */
454 add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr);
457 /* Extra check to avoid race with another ipfw_nat_cfg() */
458 if (gencnt != chain->gencnt &&
459 ((cfg = lookup_nat(&chain->nat, ptr->id)) != NULL))
460 LIST_REMOVE(cfg, _next);
461 LIST_INSERT_HEAD(&chain->nat, ptr, _next);
471 ipfw_nat_del(struct sockopt *sopt)
474 struct ip_fw_chain *chain = &V_layer3_chain;
477 sooptcopyin(sopt, &i, sizeof i, sizeof i);
480 ptr = lookup_nat(&chain->nat, i);
485 LIST_REMOVE(ptr, _next);
486 flush_nat_ptrs(chain, i);
488 del_redir_spool_cfg(ptr, &ptr->redir_chain);
489 LibAliasUninit(ptr->lib);
495 ipfw_nat_get_cfg(struct sockopt *sopt)
497 struct ip_fw_chain *chain = &V_layer3_chain;
502 int gencnt, nat_cnt, len, error;
505 len = sizeof(nat_cnt);
509 gencnt = chain->gencnt;
510 /* Estimate memory amount */
511 LIST_FOREACH(n, &chain->nat, _next) {
513 len += sizeof(struct cfg_nat);
514 LIST_FOREACH(r, &n->redir_chain, _next) {
515 len += sizeof(struct cfg_redir);
516 LIST_FOREACH(s, &r->spool_chain, _next)
517 len += sizeof(struct cfg_spool);
522 data = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
523 bcopy(&nat_cnt, data, sizeof(nat_cnt));
526 len = sizeof(nat_cnt);
529 if (gencnt != chain->gencnt) {
533 /* Serialize all the data. */
534 LIST_FOREACH(n, &chain->nat, _next) {
535 bcopy(n, &data[len], sizeof(struct cfg_nat));
536 len += sizeof(struct cfg_nat);
537 LIST_FOREACH(r, &n->redir_chain, _next) {
538 bcopy(r, &data[len], sizeof(struct cfg_redir));
539 len += sizeof(struct cfg_redir);
540 LIST_FOREACH(s, &r->spool_chain, _next) {
541 bcopy(s, &data[len], sizeof(struct cfg_spool));
542 len += sizeof(struct cfg_spool);
548 error = sooptcopyout(sopt, data, len);
555 ipfw_nat_get_log(struct sockopt *sopt)
560 struct ip_fw_chain *chain;
562 chain = &V_layer3_chain;
565 /* one pass to count, one to copy the data */
567 LIST_FOREACH(ptr, &chain->nat, _next) {
568 if (ptr->lib->logDesc == NULL)
572 size = i * (LIBALIAS_BUF_SIZE + sizeof(int));
573 data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO);
579 LIST_FOREACH(ptr, &chain->nat, _next) {
580 if (ptr->lib->logDesc == NULL)
582 bcopy(&ptr->id, &data[i], sizeof(int));
584 bcopy(ptr->lib->logDesc, &data[i], LIBALIAS_BUF_SIZE);
585 i += LIBALIAS_BUF_SIZE;
588 sooptcopyout(sopt, data, size);
594 vnet_ipfw_nat_init(const void *arg __unused)
597 V_ipfw_nat_ready = 1;
602 vnet_ipfw_nat_uninit(const void *arg __unused)
604 struct cfg_nat *ptr, *ptr_temp;
605 struct ip_fw_chain *chain;
607 chain = &V_layer3_chain;
609 LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
610 LIST_REMOVE(ptr, _next);
611 del_redir_spool_cfg(ptr, &ptr->redir_chain);
612 LibAliasUninit(ptr->lib);
615 flush_nat_ptrs(chain, -1 /* flush all */);
616 V_ipfw_nat_ready = 0;
625 /* init ipfw hooks */
626 ipfw_nat_ptr = ipfw_nat;
627 lookup_nat_ptr = lookup_nat;
628 ipfw_nat_cfg_ptr = ipfw_nat_cfg;
629 ipfw_nat_del_ptr = ipfw_nat_del;
630 ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
631 ipfw_nat_get_log_ptr = ipfw_nat_get_log;
633 ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change,
634 NULL, EVENTHANDLER_PRI_ANY);
638 ipfw_nat_destroy(void)
641 EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
642 /* deregister ipfw_nat */
644 lookup_nat_ptr = NULL;
645 ipfw_nat_cfg_ptr = NULL;
646 ipfw_nat_del_ptr = NULL;
647 ipfw_nat_get_cfg_ptr = NULL;
648 ipfw_nat_get_log_ptr = NULL;
652 ipfw_nat_modevent(module_t mod, int type, void *unused)
670 static moduledata_t ipfw_nat_mod = {
676 /* Define startup order. */
677 #define IPFW_NAT_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
678 #define IPFW_NAT_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */
679 #define IPFW_NAT_MODULE_ORDER (IPFW_NAT_MODEVENT_ORDER + 1)
680 #define IPFW_NAT_VNET_ORDER (IPFW_NAT_MODEVENT_ORDER + 2)
682 DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, IPFW_NAT_SI_SUB_FIREWALL, SI_ORDER_ANY);
683 MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
684 MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
685 MODULE_VERSION(ipfw_nat, 1);
687 SYSINIT(ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
688 ipfw_nat_init, NULL);
689 VNET_SYSINIT(vnet_ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_VNET_ORDER,
690 vnet_ipfw_nat_init, NULL);
692 SYSUNINIT(ipfw_nat_destroy, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
693 ipfw_nat_destroy, NULL);
694 VNET_SYSUNINIT(vnet_ipfw_nat_uninit, IPFW_NAT_SI_SUB_FIREWALL,
695 IPFW_NAT_VNET_ORDER, vnet_ipfw_nat_uninit, NULL);