2 * Copyright (C) 1993-2001 by Darren Reed.
4 * See the IPFILTER.LICENCE file for details on licencing.
6 * I hate legaleese, don't you ?
9 static const char sccsid[] = "%W% %G% (C) 1993-2000 Darren Reed";
10 static const char rcsid[] = "@(#)$Id: ip_sfil.c,v 2.23.2.15 2001/12/26 22:28:51 darrenr Exp $";
13 #include <sys/types.h>
14 #include <sys/errno.h>
15 #include <sys/param.h>
16 #include <sys/cpuvar.h>
18 #include <sys/ioctl.h>
19 #include <sys/filio.h>
20 #include <sys/systm.h>
23 #include <sys/sunddi.h>
24 #include <sys/ksynch.h>
26 #include <sys/mkdev.h>
27 #include <sys/protosw.h>
28 #include <sys/socket.h>
29 #include <sys/dditypes.h>
30 #include <sys/cmn_err.h>
33 #include <net/route.h>
34 #include <netinet/in.h>
35 #include <netinet/in_systm.h>
36 #include <netinet/ip.h>
37 #include <netinet/ip_var.h>
38 #include <netinet/tcp.h>
39 #include <netinet/udp.h>
40 #include <netinet/tcpip.h>
41 #include <netinet/ip_icmp.h>
42 #include "ip_compat.h"
44 # include <netinet/icmp6.h>
52 #include <inet/ip_ire.h>
54 #define MIN(a,b) (((a)<(b))?(a):(b))
58 extern fr_flags, fr_active;
61 int ipl_unreach = ICMP_UNREACH_HOST;
62 u_long ipl_frouteok[2] = {0, 0};
63 static int frzerostats __P((caddr_t));
65 static int frrequest __P((minor_t, int, caddr_t, int));
66 static int send_ip __P((fr_info_t *fin, mblk_t *m));
67 kmutex_t ipl_mutex, ipf_authmx, ipf_rw;
68 KRWLOCK_T ipf_mutex, ipfs_mutex, ipf_solaris;
69 KRWLOCK_T ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth;
70 kcondvar_t iplwait, ipfauthwait;
78 cmn_err(CE_CONT, "ipldetach()\n");
81 for (i = IPL_LOGMAX; i >= 0; i--)
84 i = FR_INQUE|FR_OUTQUE;
85 (void) frflush(IPL_LOGIPF, FR_INQUE|FR_OUTQUE);
90 cv_destroy(&ipfauthwait);
91 mutex_destroy(&ipf_authmx);
92 mutex_destroy(&ipl_mutex);
93 mutex_destroy(&ipf_rw);
94 RW_DESTROY(&ipf_mutex);
95 RW_DESTROY(&ipf_frag);
96 RW_DESTROY(&ipf_state);
97 RW_DESTROY(&ipf_natfrag);
99 RW_DESTROY(&ipf_auth);
100 RW_DESTROY(&ipfs_mutex);
101 /* NOTE: This lock is acquired in ipf_detach */
102 RWLOCK_EXIT(&ipf_solaris);
103 RW_DESTROY(&ipf_solaris);
108 int iplattach __P((void))
111 cmn_err(CE_CONT, "iplattach()\n");
113 bzero((char *)frcache, sizeof(frcache));
114 mutex_init(&ipf_rw, "ipf rw mutex", MUTEX_DRIVER, NULL);
115 mutex_init(&ipl_mutex, "ipf log mutex", MUTEX_DRIVER, NULL);
116 mutex_init(&ipf_authmx, "ipf auth log mutex", MUTEX_DRIVER, NULL);
117 RWLOCK_INIT(&ipf_solaris, "ipf filter load/unload mutex", NULL);
118 RWLOCK_INIT(&ipf_mutex, "ipf filter rwlock", NULL);
119 RWLOCK_INIT(&ipfs_mutex, "ipf solaris mutex", NULL);
120 RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock", NULL);
121 RWLOCK_INIT(&ipf_state, "ipf IP state rwlock", NULL);
122 RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock", NULL);
123 RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock", NULL);
124 RWLOCK_INIT(&ipf_auth, "ipf IP User-Auth rwlock", NULL);
125 cv_init(&iplwait, "ipl condvar", CV_DRIVER, NULL);
126 cv_init(&ipfauthwait, "ipf auth condvar", CV_DRIVER, NULL);
130 if (nat_init() == -1)
132 if (fr_stateinit() == -1)
134 if (appr_init() == -1)
140 static int frzerostats(data)
147 error = IWCOPYPTR((caddr_t)&fio, data, sizeof(fio));
151 bzero((char *)frstats, sizeof(*frstats) * 2);
158 * Filter ioctl interface.
160 int iplioctl(dev, cmd, data, mode, cp, rp)
176 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
177 dev, cmd, data, mode, cp, rp);
179 unit = getminor(dev);
180 if (IPL_LOGMAX < unit)
186 READ_ENTER(&ipf_solaris);
187 if (unit == IPL_LOGNAT) {
188 error = nat_ioctl((caddr_t)data, cmd, mode);
189 RWLOCK_EXIT(&ipf_solaris);
192 if (unit == IPL_LOGSTATE) {
193 error = fr_state_ioctl((caddr_t)data, cmd, mode);
194 RWLOCK_EXIT(&ipf_solaris);
197 if (unit == IPL_LOGAUTH) {
198 error = fr_auth_ioctl((caddr_t)data, mode, cmd, NULL, NULL);
199 RWLOCK_EXIT(&ipf_solaris);
208 if (!(mode & FWRITE))
211 error = IRCOPY((caddr_t)data, (caddr_t)&enable,
216 if (!(mode & FWRITE))
219 WRITE_ENTER(&ipf_mutex);
220 error = IRCOPY((caddr_t)data, (caddr_t)&fr_flags,
222 RWLOCK_EXIT(&ipf_mutex);
226 error = IWCOPY((caddr_t)&fr_flags, (caddr_t)data,
235 if (!(mode & FWRITE))
238 error = frrequest(unit, cmd, (caddr_t)data, fr_active);
243 if (!(mode & FWRITE))
246 error = frrequest(unit, cmd, (caddr_t)data,
250 if (!(mode & FWRITE))
253 WRITE_ENTER(&ipf_mutex);
254 bzero((char *)frcache, sizeof(frcache[0]) * 2);
255 error = IWCOPY((caddr_t)&fr_active, (caddr_t)data,
259 fr_active = 1 - fr_active;
260 RWLOCK_EXIT(&ipf_mutex);
267 READ_ENTER(&ipf_mutex);
269 RWLOCK_EXIT(&ipf_mutex);
270 error = IWCOPYPTR((caddr_t)&fio, (caddr_t)data, sizeof(fio));
276 if (!(mode & FWRITE))
279 error = frzerostats((caddr_t)data);
282 if (!(mode & FWRITE))
285 error = IRCOPY((caddr_t)data, (caddr_t)&tmp,
288 tmp = frflush(unit, tmp);
289 error = IWCOPY((caddr_t)&tmp, (caddr_t)data,
297 error = IRCOPY((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
308 if (!(mode & FWRITE))
311 tmp = ipflog_clear(unit);
312 error = IWCOPY((caddr_t)&tmp, (caddr_t)data,
318 #endif /* IPFILTER_LOG */
320 if (!(mode & FWRITE))
326 error = IWCOPYPTR((caddr_t)ipfr_fragstats(), (caddr_t)data,
332 int copy = (int)iplused[IPL_LOGIPF];
334 error = IWCOPY((caddr_t)©, (caddr_t)data, sizeof(copy));
344 RWLOCK_EXIT(&ipf_solaris);
349 ill_t *get_unit(name, v)
353 size_t len = strlen(name) + 1; /* includes \0 */
363 for (il = ill_g_head; il; il = il->ill_next)
364 if ((len == il->ill_name_length) && (il->ill_sap == sap) &&
365 !strncmp(il->ill_name, name, len))
371 static int frrequest(unit, req, data, set)
376 register frentry_t *fp, *f, **fprev;
377 register frentry_t **ftail;
378 frgroup_t *fg = NULL;
379 int error = 0, in, i;
389 error = IRCOPYPTR(data, (caddr_t)fp, sizeof(*fp));
395 fp->fr_sap = IP_DL_SAP;
396 else if (fp->fr_v == 6)
397 fp->fr_sap = IP6_DL_SAP;
404 WRITE_ENTER(&ipf_mutex);
406 * Check that the group number does exist and that if a head group
407 * has been specified, doesn't exist.
409 if ((req != SIOCZRLST) && fp->fr_grhead &&
410 fr_findgroup(fp->fr_grhead, fp->fr_flags, unit, set, NULL)) {
414 if ((req != SIOCZRLST) && fp->fr_group &&
415 !fr_findgroup(fp->fr_group, fp->fr_flags, unit, set, NULL)) {
420 in = (fp->fr_flags & FR_INQUE) ? 0 : 1;
422 if (unit == IPL_LOGAUTH)
423 ftail = fprev = &ipauth;
424 else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 4))
425 ftail = fprev = &ipacct[in][set];
426 else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 4))
427 ftail = fprev = &ipfilter[in][set];
429 else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 6))
430 ftail = fprev = &ipacct6[in][set];
431 else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 6))
432 ftail = fprev = &ipfilter6[in][set];
439 group = fp->fr_group;
441 fg = fr_findgroup(group, fp->fr_flags, unit, set, NULL);
446 ftail = fprev = fg->fg_start;
449 bzero((char *)frcache, sizeof(frcache[0]) * 2);
451 for (i = 0; i < 4; i++) {
452 if ((fp->fr_ifnames[i][1] == '\0') &&
453 ((fp->fr_ifnames[i][0] == '-') ||
454 (fp->fr_ifnames[i][0] == '*'))) {
455 fp->fr_ifas[i] = NULL;
456 } else if (*fp->fr_ifnames[i]) {
457 fp->fr_ifas[i] = GETUNIT(fp->fr_ifnames[i], fp->fr_v);
459 fp->fr_ifas[i] = (void *)-1;
465 fp->fr_flags &= ~FR_DUP;
466 if (*fdp->fd_ifname) {
467 ill = get_unit(fdp->fd_ifname, (int)fp->fr_v);
470 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 4)) {
472 ire = ire_ctable_lookup(ipif->ipif_local_addr, 0,
473 IRE_LOCAL, NULL, NULL,
476 ire = ire_lookup_myaddr(ipif->ipif_local_addr);
481 fp->fr_flags |= FR_DUP;
484 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 6)) {
485 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, 0,
486 IRE_LOCAL, NULL, NULL,
491 fp->fr_flags |= FR_DUP;
494 fdp->fd_ifp = (struct ifnet *)ire;
499 if (*fdp->fd_ifname) {
500 ill = get_unit(fdp->fd_ifname, (int)fp->fr_v);
503 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 4)) {
505 ire = ire_ctable_lookup(ipif->ipif_local_addr, 0,
506 IRE_LOCAL, NULL, NULL,
509 ire = ire_lookup_myaddr(ipif->ipif_local_addr);
515 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 6)) {
516 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, 0,
517 IRE_LOCAL, NULL, NULL,
523 fdp->fd_ifp = (struct ifnet *)ire;
527 * Look for a matching filter rule, but don't include the next or
528 * interface pointer in the comparison (fr_next, fr_ifa).
530 for (fp->fr_cksum = 0, p = (u_int *)&fp->fr_ip, pp = &fp->fr_cksum;
534 for (; (f = *ftail); ftail = &f->fr_next)
535 if ((fp->fr_cksum == f->fr_cksum) &&
536 !bcmp((char *)&f->fr_ip, (char *)&fp->fr_ip, FR_CMPSIZ))
540 * If zero'ing statistics, copy current to caller and zero.
542 if (req == SIOCZRLST) {
547 MUTEX_DOWNGRADE(&ipf_mutex);
548 error = IWCOPYPTR((caddr_t)f, data, sizeof(*f));
557 if (req != SIOCINAFR && req != SIOCINIFR)
563 while (--fp->fr_hits && (f = *ftail))
570 if (req == SIOCRMAFR || req == SIOCRMIFR) {
575 * Only return EBUSY if there is a group list, else
576 * it's probably just state information referencing
579 if ((f->fr_ref > 1) && f->fr_grp) {
583 if (fg && fg->fg_head)
584 fg->fg_head->fr_ref--;
586 fr_delgroup(f->fr_grhead, fp->fr_flags,
588 fixskip(fprev, f, -1);
599 KMALLOC(f, frentry_t *);
601 if (fg && fg->fg_head)
602 fg->fg_head->fr_ref++;
603 bcopy((char *)fp, (char *)f, sizeof(*f));
608 if (req == SIOCINIFR || req == SIOCINAFR)
609 fixskip(fprev, f, 1);
611 group = f->fr_grhead;
613 fg = fr_addgroup(group, f, unit, set);
619 RWLOCK_EXIT(&ipf_mutex);
625 * routines below for saving IP headers to buffer
627 int iplopen(devp, flags, otype, cred)
632 minor_t min = getminor(*devp);
635 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
637 if ((fr_running <= 0) || !(otype & OTYP_CHR))
639 min = (IPL_LOGMAX < min) ? ENXIO : 0;
644 int iplclose(dev, flags, otype, cred)
649 minor_t min = getminor(dev);
652 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
654 min = (IPL_LOGMAX < min) ? ENXIO : 0;
661 * both of these must operate with at least splnet() lest they be
662 * called during packet processing and cause an inconsistancy to appear in
665 int iplread(dev, uio, cp)
667 register struct uio *uio;
671 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
673 return ipflog_read(getminor(dev), uio);
675 #endif /* IPFILTER_LOG */
679 * send_reset - this could conceivably be a call to tcp_respond(), but that
680 * requires a large amount of setting up and isn't any more efficient.
682 int send_reset(oip, fin)
686 tcphdr_t *tcp, *tcp2;
690 ip6_t *ip6, *oip6 = (ip6_t *)oip;
694 tcp = (struct tcphdr *)fin->fin_dp;
695 if (tcp->th_flags & TH_RST)
697 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
700 hlen = sizeof(ip6_t);
704 hlen += sizeof(*tcp2);
705 if ((m = (mblk_t *)allocb(hlen + 16, BPRI_HI)) == NULL)
710 m->b_wptr = m->b_rptr + hlen;
711 bzero((char *)m->b_rptr, hlen);
712 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
713 tcp2->th_dport = tcp->th_sport;
714 tcp2->th_sport = tcp->th_dport;
715 if (tcp->th_flags & TH_ACK) {
716 tcp2->th_seq = tcp->th_ack;
717 tcp2->th_flags = TH_RST|TH_ACK;
719 tcp2->th_ack = ntohl(tcp->th_seq);
720 tcp2->th_ack += tlen;
721 tcp2->th_ack = htonl(tcp2->th_ack);
722 tcp2->th_flags = TH_RST;
724 tcp2->th_off = sizeof(struct tcphdr) >> 2;
725 tcp2->th_flags = TH_RST|TH_ACK;
728 * This is to get around a bug in the Solaris 2.4/2.5 TCP checksum
729 * computation that is done by their put routine.
731 tcp2->th_sum = htons(0x14);
733 if (fin->fin_v == 6) {
734 ip6 = (ip6_t *)m->b_rptr;
735 ip6->ip6_src = oip6->ip6_dst;
736 ip6->ip6_dst = oip6->ip6_src;
737 ip6->ip6_plen = htons(sizeof(*tcp));
738 ip6->ip6_nxt = IPPROTO_TCP;
742 ip = (ip_t *)m->b_rptr;
743 ip->ip_src.s_addr = oip->ip_dst.s_addr;
744 ip->ip_dst.s_addr = oip->ip_src.s_addr;
745 ip->ip_hl = sizeof(*ip) >> 2;
746 ip->ip_p = IPPROTO_TCP;
747 ip->ip_len = htons(sizeof(*ip) + sizeof(*tcp));
748 ip->ip_tos = oip->ip_tos;
750 return send_ip(fin, m);
754 int static send_ip(fin, m)
758 RWLOCK_EXIT(&ipfs_mutex);
759 RWLOCK_EXIT(&ipf_solaris);
761 if (fin->fin_v == 6) {
762 extern void ip_wput_v6 __P((queue_t *, mblk_t *));
765 ip6 = (ip6_t *)m->b_rptr;
769 ip_wput_v6(((qif_t *)fin->fin_qif)->qf_ill->ill_wq, m);
775 ip = (ip_t *)m->b_rptr;
776 ip->ip_v = IPVERSION;
778 ip_wput(((qif_t *)fin->fin_qif)->qf_ill->ill_wq, m);
780 READ_ENTER(&ipf_solaris);
781 READ_ENTER(&ipfs_mutex);
786 int send_icmp_err(oip, type, fin, dst)
804 if ((type < 0) || (type > ICMP_MAXTYPE))
807 code = fin->fin_icode;
809 if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int)))
817 if (oip->ip_v == 6) {
820 sz += MIN(m->b_wptr - m->b_rptr, 512);
821 hlen = sizeof(ip6_t);
822 type = icmptoicmp6types[type];
823 if (type == ICMP6_DST_UNREACH)
824 code = icmptoicmp6unreach[code];
828 if ((oip->ip_p == IPPROTO_ICMP) &&
829 !(fin->fin_fi.fi_fl & FI_SHORT))
830 switch (ntohs(fin->fin_data[0]) >> 8)
841 sz = sizeof(ip_t) * 2;
842 sz += 8; /* 64 bits of data */
846 sz += offsetof(struct icmp, icmp_ip);
847 if ((mb = (mblk_t *)allocb((size_t)sz + 16, BPRI_HI)) == NULL)
851 mb->b_wptr = mb->b_rptr + sz;
852 bzero((char *)mb->b_rptr, (size_t)sz);
853 icmp = (struct icmp *)(mb->b_rptr + sizeof(*ip));
854 icmp->icmp_type = type;
855 icmp->icmp_code = code;
856 icmp->icmp_cksum = 0;
858 if (type == ICMP_UNREACH && (il = qif->qf_ill) &&
859 fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
860 icmp->icmp_nextmtu = htons(il->ill_max_frag);
864 if (oip->ip_v == 6) {
865 struct in6_addr dst6;
869 if (fr_ifpaddr(6, ((qif_t *)fin->fin_qif)->qf_ill,
870 (struct in_addr *)&dst6) == -1)
873 dst6 = oip6->ip6_dst;
877 ip6 = (ip6_t *)mb->b_rptr;
881 ip6->ip6_plen = htons(sz);
882 ip6->ip6_nxt = IPPROTO_ICMPV6;
884 ip6->ip6_dst = oip6->ip6_src;
885 sz -= offsetof(struct icmp, icmp_ip);
886 bcopy((char *)m->b_rptr, (char *)&icmp->icmp_ip, sz);
887 icmp->icmp_cksum = csz - sizeof(ip6_t);
891 ip = (ip_t *)mb->b_rptr;
892 ip->ip_v = IPVERSION;
893 ip->ip_hl = (sizeof(*ip) >> 2);
894 ip->ip_p = IPPROTO_ICMP;
895 ip->ip_id = oip->ip_id;
898 ip->ip_tos = oip->ip_tos;
899 ip->ip_len = (u_short)htons(sz);
901 if (fr_ifpaddr(4, ((qif_t *)fin->fin_qif)->qf_ill,
907 ip->ip_dst = oip->ip_src;
908 bcopy((char *)oip, (char *)&icmp->icmp_ip, sizeof(*oip));
909 bcopy((char *)oip + (oip->ip_hl << 2),
910 (char *)&icmp->icmp_ip + sizeof(*oip), 8);
911 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
916 * Need to exit out of these so we don't recursively call rw_enter
919 return send_ip(fin, mb);