2 * Copyright (C) 1993-2001 by Darren Reed.
4 * See the IPFILTER.LICENCE file for details on licencing.
6 * I hate legaleese, don't you ?
9 static const char sccsid[] = "%W% %G% (C) 1993-2000 Darren Reed";
10 static const char rcsid[] = "@(#)$Id: ip_sfil.c,v 2.23.2.18 2002/06/06 10:47:26 darrenr Exp $";
13 #include <sys/types.h>
14 #include <sys/errno.h>
15 #include <sys/param.h>
16 #include <sys/cpuvar.h>
18 #include <sys/ioctl.h>
19 #include <sys/filio.h>
20 #include <sys/systm.h>
23 #include <sys/sunddi.h>
24 #include <sys/ksynch.h>
26 #include <sys/mkdev.h>
27 #include <sys/protosw.h>
28 #include <sys/socket.h>
29 #include <sys/dditypes.h>
30 #include <sys/cmn_err.h>
33 #include <net/route.h>
34 #include <netinet/in.h>
35 #include <netinet/in_systm.h>
36 #include <netinet/ip.h>
37 #include <netinet/ip_var.h>
38 #include <netinet/tcp.h>
39 #include <netinet/udp.h>
40 #include <netinet/tcpip.h>
41 #include <netinet/ip_icmp.h>
42 #include "ip_compat.h"
44 # include <netinet/icmp6.h>
52 #include <inet/ip_ire.h>
54 #define MIN(a,b) (((a)<(b))?(a):(b))
58 extern fr_flags, fr_active;
61 int ipl_unreach = ICMP_UNREACH_HOST;
62 u_long ipl_frouteok[2] = {0, 0};
63 static int frzerostats __P((caddr_t));
65 static u_int *ip_ttl_ptr;
67 static u_long *ip_ttl_ptr;
70 static int frrequest __P((minor_t, int, caddr_t, int));
71 static int send_ip __P((fr_info_t *fin, mblk_t *m));
72 kmutex_t ipl_mutex, ipf_authmx, ipf_rw;
73 KRWLOCK_T ipf_mutex, ipfs_mutex, ipf_solaris;
74 KRWLOCK_T ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth;
75 kcondvar_t iplwait, ipfauthwait;
83 cmn_err(CE_CONT, "ipldetach()\n");
86 for (i = IPL_LOGMAX; i >= 0; i--)
89 i = FR_INQUE|FR_OUTQUE;
90 (void) frflush(IPL_LOGIPF, FR_INQUE|FR_OUTQUE);
95 cv_destroy(&ipfauthwait);
96 mutex_destroy(&ipf_authmx);
97 mutex_destroy(&ipl_mutex);
98 mutex_destroy(&ipf_rw);
99 RW_DESTROY(&ipf_mutex);
100 RW_DESTROY(&ipf_frag);
101 RW_DESTROY(&ipf_state);
102 RW_DESTROY(&ipf_natfrag);
103 RW_DESTROY(&ipf_nat);
104 RW_DESTROY(&ipf_auth);
105 RW_DESTROY(&ipfs_mutex);
106 /* NOTE: This lock is acquired in ipf_detach */
107 RWLOCK_EXIT(&ipf_solaris);
108 RW_DESTROY(&ipf_solaris);
113 int iplattach __P((void))
118 cmn_err(CE_CONT, "iplattach()\n");
120 bzero((char *)frcache, sizeof(frcache));
121 mutex_init(&ipf_rw, "ipf rw mutex", MUTEX_DRIVER, NULL);
122 mutex_init(&ipl_mutex, "ipf log mutex", MUTEX_DRIVER, NULL);
123 mutex_init(&ipf_authmx, "ipf auth log mutex", MUTEX_DRIVER, NULL);
124 RWLOCK_INIT(&ipf_solaris, "ipf filter load/unload mutex", NULL);
125 RWLOCK_INIT(&ipf_mutex, "ipf filter rwlock", NULL);
126 RWLOCK_INIT(&ipfs_mutex, "ipf solaris mutex", NULL);
127 RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock", NULL);
128 RWLOCK_INIT(&ipf_state, "ipf IP state rwlock", NULL);
129 RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock", NULL);
130 RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock", NULL);
131 RWLOCK_INIT(&ipf_auth, "ipf IP User-Auth rwlock", NULL);
132 cv_init(&iplwait, "ipl condvar", CV_DRIVER, NULL);
133 cv_init(&ipfauthwait, "ipf auth condvar", CV_DRIVER, NULL);
137 if (nat_init() == -1)
139 if (fr_stateinit() == -1)
141 if (appr_init() == -1)
146 * XXX - There is no terminator for this array, so it is not possible
147 * to tell if what we are looking for is missing and go off the end
151 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
152 ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
160 static int frzerostats(data)
167 error = IWCOPYPTR((caddr_t)&fio, data, sizeof(fio));
171 bzero((char *)frstats, sizeof(*frstats) * 2);
178 * Filter ioctl interface.
180 int iplioctl(dev, cmd, data, mode, cp, rp)
196 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
197 dev, cmd, data, mode, cp, rp);
199 unit = getminor(dev);
200 if (IPL_LOGMAX < unit)
203 if (fr_running == 0 && (cmd != SIOCFRENB || unit != IPL_LOGIPF))
209 READ_ENTER(&ipf_solaris);
210 if (unit == IPL_LOGNAT) {
211 error = nat_ioctl((caddr_t)data, cmd, mode);
212 RWLOCK_EXIT(&ipf_solaris);
215 if (unit == IPL_LOGSTATE) {
216 error = fr_state_ioctl((caddr_t)data, cmd, mode);
217 RWLOCK_EXIT(&ipf_solaris);
220 if (unit == IPL_LOGAUTH) {
221 error = fr_auth_ioctl((caddr_t)data, mode, cmd, NULL, NULL);
222 RWLOCK_EXIT(&ipf_solaris);
231 if (!(mode & FWRITE))
234 error = IRCOPY((caddr_t)data, (caddr_t)&enable,
239 if (!(mode & FWRITE))
242 WRITE_ENTER(&ipf_mutex);
243 error = IRCOPY((caddr_t)data, (caddr_t)&fr_flags,
245 RWLOCK_EXIT(&ipf_mutex);
249 error = IWCOPY((caddr_t)&fr_flags, (caddr_t)data,
258 if (!(mode & FWRITE))
261 error = frrequest(unit, cmd, (caddr_t)data, fr_active);
266 if (!(mode & FWRITE))
269 error = frrequest(unit, cmd, (caddr_t)data,
273 if (!(mode & FWRITE))
276 WRITE_ENTER(&ipf_mutex);
277 bzero((char *)frcache, sizeof(frcache[0]) * 2);
278 error = IWCOPY((caddr_t)&fr_active, (caddr_t)data,
282 fr_active = 1 - fr_active;
283 RWLOCK_EXIT(&ipf_mutex);
290 READ_ENTER(&ipf_mutex);
292 RWLOCK_EXIT(&ipf_mutex);
293 error = IWCOPYPTR((caddr_t)&fio, (caddr_t)data, sizeof(fio));
299 if (!(mode & FWRITE))
302 error = frzerostats((caddr_t)data);
305 if (!(mode & FWRITE))
308 error = IRCOPY((caddr_t)data, (caddr_t)&tmp,
311 tmp = frflush(unit, tmp);
312 error = IWCOPY((caddr_t)&tmp, (caddr_t)data,
320 error = IRCOPY((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
331 if (!(mode & FWRITE))
334 tmp = ipflog_clear(unit);
335 error = IWCOPY((caddr_t)&tmp, (caddr_t)data,
341 #endif /* IPFILTER_LOG */
343 if (!(mode & FWRITE))
349 error = IWCOPYPTR((caddr_t)ipfr_fragstats(), (caddr_t)data,
355 int copy = (int)iplused[IPL_LOGIPF];
357 error = IWCOPY((caddr_t)©, (caddr_t)data, sizeof(copy));
367 RWLOCK_EXIT(&ipf_solaris);
372 ill_t *get_unit(name, v)
376 size_t len = strlen(name) + 1; /* includes \0 */
386 for (il = ill_g_head; il; il = il->ill_next)
387 if ((len == il->ill_name_length) && (il->ill_sap == sap) &&
388 !strncmp(il->ill_name, name, len))
394 static int frrequest(unit, req, data, set)
399 register frentry_t *fp, *f, **fprev;
400 register frentry_t **ftail;
401 frgroup_t *fg = NULL;
402 int error = 0, in, i;
412 error = IRCOPYPTR(data, (caddr_t)fp, sizeof(*fp));
418 fp->fr_sap = IP_DL_SAP;
419 else if (fp->fr_v == 6)
420 fp->fr_sap = IP6_DL_SAP;
427 WRITE_ENTER(&ipf_mutex);
429 * Check that the group number does exist and that if a head group
430 * has been specified, doesn't exist.
432 if ((req != SIOCZRLST) && ((req == SIOCINAFR) || (req == SIOCINIFR) ||
433 (req == SIOCADAFR) || (req == SIOCADIFR)) && fp->fr_grhead &&
434 fr_findgroup(fp->fr_grhead, fp->fr_flags, unit, set, NULL)) {
438 if ((req != SIOCZRLST) && fp->fr_group &&
439 !fr_findgroup(fp->fr_group, fp->fr_flags, unit, set, NULL)) {
444 in = (fp->fr_flags & FR_INQUE) ? 0 : 1;
446 if (unit == IPL_LOGAUTH)
447 ftail = fprev = &ipauth;
448 else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 4))
449 ftail = fprev = &ipacct[in][set];
450 else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 4))
451 ftail = fprev = &ipfilter[in][set];
453 else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 6))
454 ftail = fprev = &ipacct6[in][set];
455 else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 6))
456 ftail = fprev = &ipfilter6[in][set];
463 group = fp->fr_group;
465 fg = fr_findgroup(group, fp->fr_flags, unit, set, NULL);
470 ftail = fprev = fg->fg_start;
473 bzero((char *)frcache, sizeof(frcache[0]) * 2);
475 for (i = 0; i < 4; i++) {
476 if ((fp->fr_ifnames[i][1] == '\0') &&
477 ((fp->fr_ifnames[i][0] == '-') ||
478 (fp->fr_ifnames[i][0] == '*'))) {
479 fp->fr_ifas[i] = NULL;
480 } else if (*fp->fr_ifnames[i]) {
481 fp->fr_ifas[i] = GETUNIT(fp->fr_ifnames[i], fp->fr_v);
483 fp->fr_ifas[i] = (void *)-1;
489 fp->fr_flags &= ~FR_DUP;
490 if (*fdp->fd_ifname) {
491 ill = get_unit(fdp->fd_ifname, (int)fp->fr_v);
494 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 4)) {
496 ire = ire_ctable_lookup(ipif->ipif_local_addr, 0,
497 IRE_LOCAL, NULL, NULL,
500 ire = ire_lookup_myaddr(ipif->ipif_local_addr);
505 fp->fr_flags |= FR_DUP;
508 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 6)) {
509 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, 0,
510 IRE_LOCAL, NULL, NULL,
515 fp->fr_flags |= FR_DUP;
518 fdp->fd_ifp = (struct ifnet *)ire;
523 if (*fdp->fd_ifname) {
524 ill = get_unit(fdp->fd_ifname, (int)fp->fr_v);
527 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 4)) {
529 ire = ire_ctable_lookup(ipif->ipif_local_addr, 0,
530 IRE_LOCAL, NULL, NULL,
533 ire = ire_lookup_myaddr(ipif->ipif_local_addr);
539 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 6)) {
540 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, 0,
541 IRE_LOCAL, NULL, NULL,
547 fdp->fd_ifp = (struct ifnet *)ire;
551 * Look for a matching filter rule, but don't include the next or
552 * interface pointer in the comparison (fr_next, fr_ifa).
554 for (fp->fr_cksum = 0, p = (u_int *)&fp->fr_ip, pp = &fp->fr_cksum;
558 for (; (f = *ftail); ftail = &f->fr_next)
559 if ((fp->fr_cksum == f->fr_cksum) &&
560 !bcmp((char *)&f->fr_ip, (char *)&fp->fr_ip, FR_CMPSIZ))
564 * If zero'ing statistics, copy current to caller and zero.
566 if (req == SIOCZRLST) {
571 MUTEX_DOWNGRADE(&ipf_mutex);
572 error = IWCOPYPTR((caddr_t)f, data, sizeof(*f));
581 if (req != SIOCINAFR && req != SIOCINIFR)
587 while (--fp->fr_hits && (f = *ftail))
594 if (req == SIOCRMAFR || req == SIOCRMIFR) {
599 * Only return EBUSY if there is a group list, else
600 * it's probably just state information referencing
603 if ((f->fr_ref > 1) && f->fr_grp) {
607 if (fg && fg->fg_head)
608 fg->fg_head->fr_ref--;
610 fr_delgroup(f->fr_grhead, fp->fr_flags,
612 fixskip(fprev, f, -1);
623 KMALLOC(f, frentry_t *);
625 if (fg && fg->fg_head)
626 fg->fg_head->fr_ref++;
627 bcopy((char *)fp, (char *)f, sizeof(*f));
632 if (req == SIOCINIFR || req == SIOCINAFR)
633 fixskip(fprev, f, 1);
635 group = f->fr_grhead;
637 fg = fr_addgroup(group, f, unit, set);
643 RWLOCK_EXIT(&ipf_mutex);
649 * routines below for saving IP headers to buffer
651 int iplopen(devp, flags, otype, cred)
656 minor_t min = getminor(*devp);
659 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
661 if ((fr_running <= 0) || !(otype & OTYP_CHR))
663 min = (IPL_LOGMAX < min) ? ENXIO : 0;
668 int iplclose(dev, flags, otype, cred)
673 minor_t min = getminor(dev);
676 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
678 min = (IPL_LOGMAX < min) ? ENXIO : 0;
685 * both of these must operate with at least splnet() lest they be
686 * called during packet processing and cause an inconsistancy to appear in
689 int iplread(dev, uio, cp)
691 register struct uio *uio;
695 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
697 return ipflog_read(getminor(dev), uio);
699 #endif /* IPFILTER_LOG */
703 * send_reset - this could conceivably be a call to tcp_respond(), but that
704 * requires a large amount of setting up and isn't any more efficient.
706 int send_reset(oip, fin)
710 tcphdr_t *tcp, *tcp2;
714 ip6_t *ip6, *oip6 = (ip6_t *)oip;
718 tcp = (struct tcphdr *)fin->fin_dp;
719 if (tcp->th_flags & TH_RST)
721 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
724 hlen = sizeof(ip6_t);
728 hlen += sizeof(*tcp2);
729 if ((m = (mblk_t *)allocb(hlen + 16, BPRI_HI)) == NULL)
734 m->b_wptr = m->b_rptr + hlen;
735 bzero((char *)m->b_rptr, hlen);
736 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
737 tcp2->th_dport = tcp->th_sport;
738 tcp2->th_sport = tcp->th_dport;
739 if (tcp->th_flags & TH_ACK) {
740 tcp2->th_seq = tcp->th_ack;
741 tcp2->th_flags = TH_RST|TH_ACK;
743 tcp2->th_ack = ntohl(tcp->th_seq);
744 tcp2->th_ack += tlen;
745 tcp2->th_ack = htonl(tcp2->th_ack);
746 tcp2->th_flags = TH_RST;
748 tcp2->th_off = sizeof(struct tcphdr) >> 2;
749 tcp2->th_flags = TH_RST|TH_ACK;
752 * This is to get around a bug in the Solaris 2.4/2.5 TCP checksum
753 * computation that is done by their put routine.
755 tcp2->th_sum = htons(0x14);
757 if (fin->fin_v == 6) {
758 ip6 = (ip6_t *)m->b_rptr;
759 ip6->ip6_src = oip6->ip6_dst;
760 ip6->ip6_dst = oip6->ip6_src;
761 ip6->ip6_plen = htons(sizeof(*tcp));
762 ip6->ip6_nxt = IPPROTO_TCP;
766 ip = (ip_t *)m->b_rptr;
767 ip->ip_src.s_addr = oip->ip_dst.s_addr;
768 ip->ip_dst.s_addr = oip->ip_src.s_addr;
769 ip->ip_hl = sizeof(*ip) >> 2;
770 ip->ip_p = IPPROTO_TCP;
771 ip->ip_len = htons(sizeof(*ip) + sizeof(*tcp));
772 ip->ip_tos = oip->ip_tos;
774 return send_ip(fin, m);
778 int static send_ip(fin, m)
782 RWLOCK_EXIT(&ipfs_mutex);
783 RWLOCK_EXIT(&ipf_solaris);
785 if (fin->fin_v == 6) {
786 extern void ip_wput_v6 __P((queue_t *, mblk_t *));
789 ip6 = (ip6_t *)m->b_rptr;
793 ip_wput_v6(((qif_t *)fin->fin_qif)->qf_ill->ill_wq, m);
799 ip = (ip_t *)m->b_rptr;
800 ip->ip_v = IPVERSION;
801 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
802 ip_wput(((qif_t *)fin->fin_qif)->qf_ill->ill_wq, m);
804 READ_ENTER(&ipf_solaris);
805 READ_ENTER(&ipfs_mutex);
810 int send_icmp_err(oip, type, fin, dst)
828 if ((type < 0) || (type > ICMP_MAXTYPE))
831 code = fin->fin_icode;
833 if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int)))
841 if (oip->ip_v == 6) {
844 sz += MIN(m->b_wptr - m->b_rptr, 512);
845 hlen = sizeof(ip6_t);
846 type = icmptoicmp6types[type];
847 if (type == ICMP6_DST_UNREACH)
848 code = icmptoicmp6unreach[code];
852 if ((oip->ip_p == IPPROTO_ICMP) &&
853 !(fin->fin_fi.fi_fl & FI_SHORT))
854 switch (ntohs(fin->fin_data[0]) >> 8)
865 sz = sizeof(ip_t) * 2;
866 sz += 8; /* 64 bits of data */
870 sz += offsetof(struct icmp, icmp_ip);
871 if ((mb = (mblk_t *)allocb((size_t)sz + 16, BPRI_HI)) == NULL)
875 mb->b_wptr = mb->b_rptr + sz;
876 bzero((char *)mb->b_rptr, (size_t)sz);
877 icmp = (struct icmp *)(mb->b_rptr + sizeof(*ip));
878 icmp->icmp_type = type;
879 icmp->icmp_code = code;
880 icmp->icmp_cksum = 0;
882 if (type == ICMP_UNREACH && (il = qif->qf_ill) &&
883 fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
884 icmp->icmp_nextmtu = htons(il->ill_max_frag);
888 if (oip->ip_v == 6) {
889 struct in6_addr dst6;
893 if (fr_ifpaddr(6, ((qif_t *)fin->fin_qif)->qf_ill,
894 (struct in_addr *)&dst6) == -1)
897 dst6 = oip6->ip6_dst;
901 ip6 = (ip6_t *)mb->b_rptr;
905 ip6->ip6_plen = htons(sz);
906 ip6->ip6_nxt = IPPROTO_ICMPV6;
908 ip6->ip6_dst = oip6->ip6_src;
909 sz -= offsetof(struct icmp, icmp_ip);
910 bcopy((char *)m->b_rptr, (char *)&icmp->icmp_ip, sz);
911 icmp->icmp_cksum = csz - sizeof(ip6_t);
915 ip = (ip_t *)mb->b_rptr;
916 ip->ip_v = IPVERSION;
917 ip->ip_hl = (sizeof(*ip) >> 2);
918 ip->ip_p = IPPROTO_ICMP;
919 ip->ip_id = oip->ip_id;
921 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
922 ip->ip_tos = oip->ip_tos;
923 ip->ip_len = (u_short)htons(sz);
925 if (fr_ifpaddr(4, ((qif_t *)fin->fin_qif)->qf_ill,
931 ip->ip_dst = oip->ip_src;
932 bcopy((char *)oip, (char *)&icmp->icmp_ip, sizeof(*oip));
933 bcopy((char *)oip + (oip->ip_hl << 2),
934 (char *)&icmp->icmp_ip + sizeof(*oip), 8);
935 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
940 * Need to exit out of these so we don't recursively call rw_enter
943 return send_ip(fin, mb);