2 * Copyright (c) 2008 Paolo Pisati
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/condvar.h>
33 #include <sys/eventhandler.h>
34 #include <sys/malloc.h>
36 #include <sys/kernel.h>
39 #include <sys/module.h>
42 #include <sys/rwlock.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/sysctl.h>
46 #include <sys/syslog.h>
47 #include <sys/ucred.h>
48 #include <sys/vimage.h>
50 #include <netinet/libalias/alias.h>
51 #include <netinet/libalias/alias_local.h>
53 #define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */
56 #include <netinet/in.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip_var.h>
59 #include <netinet/ip_icmp.h>
60 #include <netinet/ip_fw.h>
61 #include <netinet/tcp.h>
62 #include <netinet/tcp_timer.h>
63 #include <netinet/tcp_var.h>
64 #include <netinet/tcpip.h>
65 #include <netinet/udp.h>
66 #include <netinet/udp_var.h>
68 #include <machine/in_cksum.h> /* XXX for in_cksum */
70 MALLOC_DECLARE(M_IPFW);
72 extern struct ip_fw_chain layer3_chain;
74 static eventhandler_tag ifaddr_event_tag;
76 extern ipfw_nat_t *ipfw_nat_ptr;
77 extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
78 extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
79 extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
80 extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
83 ifaddr_change(void *arg __unused, struct ifnet *ifp)
85 INIT_VNET_IPFW(curvnet);
89 IPFW_WLOCK(&V_layer3_chain);
90 /* Check every nat entry... */
91 LIST_FOREACH(ptr, &V_layer3_chain.nat, _next) {
92 /* ...using nic 'ifp->if_xname' as dynamic alias address. */
93 if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) == 0) {
94 mtx_lock(&ifp->if_addr_mtx);
95 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
96 if (ifa->ifa_addr == NULL)
98 if (ifa->ifa_addr->sa_family != AF_INET)
100 ptr->ip = ((struct sockaddr_in *)
101 (ifa->ifa_addr))->sin_addr;
102 LibAliasSetAddress(ptr->lib, ptr->ip);
104 mtx_unlock(&ifp->if_addr_mtx);
107 IPFW_WUNLOCK(&V_layer3_chain);
111 flush_nat_ptrs(const int i)
113 INIT_VNET_IPFW(curvnet);
116 IPFW_WLOCK_ASSERT(&V_layer3_chain);
117 for (rule = V_layer3_chain.rules; rule; rule = rule->next) {
118 ipfw_insn_nat *cmd = (ipfw_insn_nat *)ACTION_PTR(rule);
119 if (cmd->o.opcode != O_NAT)
121 if (cmd->nat != NULL && cmd->nat->id == i)
126 #define HOOK_NAT(b, p) do { \
127 IPFW_WLOCK_ASSERT(&V_layer3_chain); \
128 LIST_INSERT_HEAD(b, p, _next); \
131 #define UNHOOK_NAT(p) do { \
132 IPFW_WLOCK_ASSERT(&V_layer3_chain); \
133 LIST_REMOVE(p, _next); \
136 #define HOOK_REDIR(b, p) do { \
137 LIST_INSERT_HEAD(b, p, _next); \
140 #define HOOK_SPOOL(b, p) do { \
141 LIST_INSERT_HEAD(b, p, _next); \
145 del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
147 struct cfg_redir *r, *tmp_r;
148 struct cfg_spool *s, *tmp_s;
151 LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
152 num = 1; /* Number of alias_link to delete. */
159 /* Delete all libalias redirect entry. */
160 for (i = 0; i < num; i++)
161 LibAliasRedirectDelete(n->lib, r->alink[i]);
162 /* Del spool cfg if any. */
163 LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) {
164 LIST_REMOVE(s, _next);
167 free(r->alink, M_IPFW);
168 LIST_REMOVE(r, _next);
172 printf("unknown redirect mode: %u\n", r->mode);
173 /* XXX - panic?!?!? */
180 add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
182 struct cfg_redir *r, *ser_r;
183 struct cfg_spool *s, *ser_s;
187 for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
188 ser_r = (struct cfg_redir *)&buf[off];
189 r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
190 memcpy(r, ser_r, SOF_REDIR);
191 LIST_INIT(&r->spool_chain);
193 r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
194 M_IPFW, M_WAITOK | M_ZERO);
197 r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
201 for (i = 0 ; i < r->pport_cnt; i++) {
202 /* If remotePort is all ports, set it to 0. */
203 u_short remotePortCopy = r->rport + i;
204 if (r->rport_cnt == 1 && r->rport == 0)
206 r->alink[i] = LibAliasRedirectPort(ptr->lib,
207 r->laddr, htons(r->lport + i), r->raddr,
208 htons(remotePortCopy), r->paddr,
209 htons(r->pport + i), r->proto);
210 if (r->alink[i] == NULL) {
217 r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
218 r->raddr, r->paddr, r->proto);
221 printf("unknown redirect mode: %u\n", r->mode);
224 if (r->alink[0] == NULL) {
225 panic_err = "LibAliasRedirect* returned NULL";
227 } else /* LSNAT handling. */
228 for (i = 0; i < r->spool_cnt; i++) {
229 ser_s = (struct cfg_spool *)&buf[off];
230 s = malloc(SOF_REDIR, M_IPFW,
232 memcpy(s, ser_s, SOF_SPOOL);
233 LibAliasAddServer(ptr->lib, r->alink[0],
234 s->addr, htons(s->port));
236 /* Hook spool entry. */
237 HOOK_SPOOL(&r->spool_chain, s);
239 /* And finally hook this redir entry. */
240 HOOK_REDIR(&ptr->redir_chain, r);
244 /* something really bad happened: panic! */
245 panic("%s\n", panic_err);
249 ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
253 /* XXX - libalias duct tape */
259 if ((mcl = m_megapullup(m, m->m_pkthdr.len)) ==
262 ip = mtod(mcl, struct ip *);
263 if (args->eh == NULL) {
264 ip->ip_len = htons(ip->ip_len);
265 ip->ip_off = htons(ip->ip_off);
269 * XXX - Libalias checksum offload 'duct tape':
271 * locally generated packets have only
272 * pseudo-header checksum calculated
273 * and libalias will screw it[1], so
274 * mark them for later fix. Moreover
275 * there are cases when libalias
276 * modify tcp packet data[2], mark it
279 * [1] libalias was never meant to run
280 * in kernel, so it doesn't have any
281 * knowledge about checksum
282 * offloading, and it expects a packet
283 * with a full internet
284 * checksum. Unfortunately, packets
285 * generated locally will have just the
286 * pseudo header calculated, and when
287 * libalias tries to adjust the
288 * checksum it will actually screw it.
290 * [2] when libalias modify tcp's data
291 * content, full TCP checksum has to
292 * be recomputed: the problem is that
293 * libalias doesn't have any idea
294 * about checksum offloading To
295 * workaround this, we do not do
296 * checksumming in LibAlias, but only
297 * mark the packets in th_x2 field. If
298 * we receive a marked packet, we
299 * calculate correct checksum for it
300 * aware of offloading. Why such a
301 * terrible hack instead of
302 * recalculating checksum for each
303 * packet? Because the previous
304 * checksum was not checked!
305 * Recalculating checksums for EVERY
306 * packet will hide ALL transmission
307 * errors. Yes, marked packets still
308 * suffer from this problem. But,
309 * sigh, natd(8) has this problem,
312 * TODO: -make libalias mbuf aware (so
313 * it can handle delayed checksum and tso)
316 if (mcl->m_pkthdr.rcvif == NULL &&
317 mcl->m_pkthdr.csum_flags &
321 c = mtod(mcl, char *);
322 if (args->oif == NULL)
323 retval = LibAliasIn(t->lib, c,
324 mcl->m_len + M_TRAILINGSPACE(mcl));
326 retval = LibAliasOut(t->lib, c,
327 mcl->m_len + M_TRAILINGSPACE(mcl));
328 if (retval != PKT_ALIAS_OK &&
329 retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) {
330 /* XXX - should i add some logging? */
336 mcl->m_pkthdr.len = mcl->m_len =
340 * XXX - libalias checksum offload
341 * 'duct tape' (see above)
344 if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
345 ip->ip_p == IPPROTO_TCP) {
348 th = (struct tcphdr *)(ip + 1);
358 ip->ip_len = ntohs(ip->ip_len);
362 htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2))
367 th = (struct tcphdr *)(ip + 1);
369 * Maybe it was set in
374 mcl->m_pkthdr.csum_data =
375 offsetof(struct tcphdr, th_sum);
378 uh = (struct udphdr *)(ip + 1);
380 mcl->m_pkthdr.csum_data =
381 offsetof(struct udphdr, uh_sum);
385 * No hw checksum offloading: do it
388 if ((mcl->m_pkthdr.csum_flags &
389 CSUM_DELAY_DATA) == 0) {
390 in_delayed_cksum(mcl);
391 mcl->m_pkthdr.csum_flags &=
394 ip->ip_len = htons(ip->ip_len);
397 if (args->eh == NULL) {
398 ip->ip_len = ntohs(ip->ip_len);
399 ip->ip_off = ntohs(ip->ip_off);
407 ipfw_nat_cfg(struct sockopt *sopt)
409 INIT_VNET_IPFW(curvnet);
410 struct cfg_nat *ptr, *ser_n;
413 buf = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO);
414 sooptcopyin(sopt, buf, NAT_BUF_LEN,
415 sizeof(struct cfg_nat));
416 ser_n = (struct cfg_nat *)buf;
419 * Find/create nat rule.
421 IPFW_WLOCK(&V_layer3_chain);
422 LOOKUP_NAT(V_layer3_chain, ser_n->id, ptr);
424 /* New rule: allocate and init new instance. */
425 ptr = malloc(sizeof(struct cfg_nat),
426 M_IPFW, M_NOWAIT | M_ZERO);
428 IPFW_WUNLOCK(&V_layer3_chain);
432 ptr->lib = LibAliasInit(NULL);
433 if (ptr->lib == NULL) {
434 IPFW_WUNLOCK(&V_layer3_chain);
439 LIST_INIT(&ptr->redir_chain);
441 /* Entry already present: temporarly unhook it. */
443 flush_nat_ptrs(ser_n->id);
445 IPFW_WUNLOCK(&V_layer3_chain);
448 * Basic nat configuration.
452 * XXX - what if this rule doesn't nat any ip and just
454 * do we set aliasaddress to 0.0.0.0?
457 ptr->redir_cnt = ser_n->redir_cnt;
458 ptr->mode = ser_n->mode;
459 LibAliasSetMode(ptr->lib, ser_n->mode, ser_n->mode);
460 LibAliasSetAddress(ptr->lib, ptr->ip);
461 memcpy(ptr->if_name, ser_n->if_name, IF_NAMESIZE);
464 * Redir and LSNAT configuration.
466 /* Delete old cfgs. */
467 del_redir_spool_cfg(ptr, &ptr->redir_chain);
468 /* Add new entries. */
469 add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr);
471 IPFW_WLOCK(&V_layer3_chain);
472 HOOK_NAT(&V_layer3_chain.nat, ptr);
473 IPFW_WUNLOCK(&V_layer3_chain);
478 ipfw_nat_del(struct sockopt *sopt)
480 INIT_VNET_IPFW(curvnet);
484 sooptcopyin(sopt, &i, sizeof i, sizeof i);
485 IPFW_WLOCK(&V_layer3_chain);
486 LOOKUP_NAT(V_layer3_chain, i, ptr);
488 IPFW_WUNLOCK(&V_layer3_chain);
493 IPFW_WUNLOCK(&V_layer3_chain);
494 del_redir_spool_cfg(ptr, &ptr->redir_chain);
495 LibAliasUninit(ptr->lib);
501 ipfw_nat_get_cfg(struct sockopt *sopt)
503 INIT_VNET_IPFW(curvnet);
511 off = sizeof(nat_cnt);
513 data = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO);
514 IPFW_RLOCK(&V_layer3_chain);
515 /* Serialize all the data. */
516 LIST_FOREACH(n, &V_layer3_chain.nat, _next) {
518 if (off + SOF_NAT < NAT_BUF_LEN) {
519 bcopy(n, &data[off], SOF_NAT);
521 LIST_FOREACH(r, &n->redir_chain, _next) {
522 if (off + SOF_REDIR < NAT_BUF_LEN) {
526 LIST_FOREACH(s, &r->spool_chain,
528 if (off + SOF_SPOOL <
542 bcopy(&nat_cnt, data, sizeof(nat_cnt));
543 IPFW_RUNLOCK(&V_layer3_chain);
544 sooptcopyout(sopt, data, NAT_BUF_LEN);
548 IPFW_RUNLOCK(&V_layer3_chain);
549 printf("serialized data buffer not big enough:"
550 "please increase NAT_BUF_LEN\n");
556 ipfw_nat_get_log(struct sockopt *sopt)
558 INIT_VNET_IPFW(curvnet);
561 int i, size, cnt, sof;
564 sof = LIBALIAS_BUF_SIZE;
567 IPFW_RLOCK(&V_layer3_chain);
569 LIST_FOREACH(ptr, &V_layer3_chain.nat, _next) {
570 if (ptr->lib->logDesc == NULL)
573 size = cnt * (sof + sizeof(int));
574 data = realloc(data, size, M_IPFW, M_NOWAIT | M_ZERO);
576 IPFW_RUNLOCK(&V_layer3_chain);
579 bcopy(&ptr->id, &data[i], sizeof(int));
581 bcopy(ptr->lib->logDesc, &data[i], sof);
584 IPFW_RUNLOCK(&V_layer3_chain);
585 sooptcopyout(sopt, data, size);
593 INIT_VNET_IPFW(curvnet);
595 IPFW_WLOCK(&V_layer3_chain);
596 /* init ipfw hooks */
597 ipfw_nat_ptr = ipfw_nat;
598 ipfw_nat_cfg_ptr = ipfw_nat_cfg;
599 ipfw_nat_del_ptr = ipfw_nat_del;
600 ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
601 ipfw_nat_get_log_ptr = ipfw_nat_get_log;
602 IPFW_WUNLOCK(&V_layer3_chain);
603 V_ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change,
604 NULL, EVENTHANDLER_PRI_ANY);
608 ipfw_nat_destroy(void)
610 INIT_VNET_IPFW(curvnet);
612 struct cfg_nat *ptr, *ptr_temp;
614 IPFW_WLOCK(&V_layer3_chain);
615 LIST_FOREACH_SAFE(ptr, &V_layer3_chain.nat, _next, ptr_temp) {
616 LIST_REMOVE(ptr, _next);
617 del_redir_spool_cfg(ptr, &ptr->redir_chain);
618 LibAliasUninit(ptr->lib);
621 EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag);
622 /* flush all nat ptrs */
623 for (rule = V_layer3_chain.rules; rule; rule = rule->next) {
624 ipfw_insn_nat *cmd = (ipfw_insn_nat *)ACTION_PTR(rule);
625 if (cmd->o.opcode == O_NAT)
628 /* deregister ipfw_nat */
630 IPFW_WUNLOCK(&V_layer3_chain);
634 ipfw_nat_modevent(module_t mod, int type, void *unused)
654 static moduledata_t ipfw_nat_mod = {
660 DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
661 MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
662 MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
663 MODULE_VERSION(ipfw_nat, 1);