2 * Copyright (C) 1993-2001 by Darren Reed.
4 * See the IPFILTER.LICENCE file for details on licencing.
6 * I hate legaleese, don't you ?
9 static const char sccsid[] = "%W% %G% (C) 1993-2000 Darren Reed";
10 static const char rcsid[] = "@(#)$Id: ip_sfil.c,v 2.23.2.20 2002/08/28 12:42:44 darrenr Exp $";
13 #include <sys/types.h>
14 #include <sys/errno.h>
15 #include <sys/param.h>
16 #include <sys/cpuvar.h>
18 #include <sys/ioctl.h>
19 #include <sys/filio.h>
20 #include <sys/systm.h>
23 #include <sys/sunddi.h>
24 #include <sys/ksynch.h>
26 #include <sys/mkdev.h>
27 #include <sys/protosw.h>
28 #include <sys/socket.h>
29 #include <sys/dditypes.h>
30 #include <sys/cmn_err.h>
33 #include <net/route.h>
34 #include <netinet/in.h>
35 #include <netinet/in_systm.h>
36 #include <netinet/ip.h>
37 #include <netinet/ip_var.h>
38 #include <netinet/tcp.h>
39 #include <netinet/udp.h>
40 #include <netinet/tcpip.h>
41 #include <netinet/ip_icmp.h>
42 #include "ip_compat.h"
44 # include <netinet/icmp6.h>
52 #include <inet/ip_ire.h>
54 #define MIN(a,b) (((a)<(b))?(a):(b))
58 extern fr_flags, fr_active;
61 int ipl_unreach = ICMP_UNREACH_HOST;
62 u_long ipl_frouteok[2] = {0, 0};
63 static int frzerostats __P((caddr_t));
65 static u_int *ip_ttl_ptr;
66 static u_int *ip_mtudisc;
68 static u_long *ip_ttl_ptr;
69 static u_long *ip_mtudisc;
72 static int frrequest __P((minor_t, int, caddr_t, int));
73 static int send_ip __P((fr_info_t *fin, mblk_t *m));
74 kmutex_t ipl_mutex, ipf_authmx, ipf_rw;
75 KRWLOCK_T ipf_mutex, ipfs_mutex, ipf_solaris;
76 KRWLOCK_T ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth;
77 kcondvar_t iplwait, ipfauthwait;
85 cmn_err(CE_CONT, "ipldetach()\n");
88 for (i = IPL_LOGMAX; i >= 0; i--)
91 i = frflush(IPL_LOGIPF, FR_INQUE|FR_OUTQUE|FR_INACTIVE);
92 i += frflush(IPL_LOGIPF, FR_INQUE|FR_OUTQUE);
97 cv_destroy(&ipfauthwait);
98 mutex_destroy(&ipf_authmx);
99 mutex_destroy(&ipl_mutex);
100 mutex_destroy(&ipf_rw);
101 RW_DESTROY(&ipf_mutex);
102 RW_DESTROY(&ipf_frag);
103 RW_DESTROY(&ipf_state);
104 RW_DESTROY(&ipf_natfrag);
105 RW_DESTROY(&ipf_nat);
106 RW_DESTROY(&ipf_auth);
107 RW_DESTROY(&ipfs_mutex);
108 /* NOTE: This lock is acquired in ipf_detach */
109 RWLOCK_EXIT(&ipf_solaris);
110 RW_DESTROY(&ipf_solaris);
115 int iplattach __P((void))
120 cmn_err(CE_CONT, "iplattach()\n");
122 bzero((char *)frcache, sizeof(frcache));
123 mutex_init(&ipf_rw, "ipf rw mutex", MUTEX_DRIVER, NULL);
124 mutex_init(&ipl_mutex, "ipf log mutex", MUTEX_DRIVER, NULL);
125 mutex_init(&ipf_authmx, "ipf auth log mutex", MUTEX_DRIVER, NULL);
126 RWLOCK_INIT(&ipf_solaris, "ipf filter load/unload mutex", NULL);
127 RWLOCK_INIT(&ipf_mutex, "ipf filter rwlock", NULL);
128 RWLOCK_INIT(&ipfs_mutex, "ipf solaris mutex", NULL);
129 RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock", NULL);
130 RWLOCK_INIT(&ipf_state, "ipf IP state rwlock", NULL);
131 RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock", NULL);
132 RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock", NULL);
133 RWLOCK_INIT(&ipf_auth, "ipf IP User-Auth rwlock", NULL);
134 cv_init(&iplwait, "ipl condvar", CV_DRIVER, NULL);
135 cv_init(&ipfauthwait, "ipf auth condvar", CV_DRIVER, NULL);
139 if (nat_init() == -1)
141 if (fr_stateinit() == -1)
143 if (appr_init() == -1)
149 * XXX - There is no terminator for this array, so it is not possible
150 * to tell if what we are looking for is missing and go off the end
154 if (strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl") == 0) {
155 ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
156 } else if (strcmp(ip_param_arr[i].ip_param_name,
157 "ip_path_mtu_discovery") == 0) {
158 ip_mtudisc = &ip_param_arr[i].ip_param_value;
161 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL)
168 static int frzerostats(data)
175 error = IWCOPYPTR((caddr_t)&fio, data, sizeof(fio));
179 bzero((char *)frstats, sizeof(*frstats) * 2);
186 * Filter ioctl interface.
188 int iplioctl(dev, cmd, data, mode, cp, rp)
204 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
205 dev, cmd, data, mode, cp, rp);
207 unit = getminor(dev);
208 if (IPL_LOGMAX < unit)
211 if (fr_running == 0 && (cmd != SIOCFRENB || unit != IPL_LOGIPF))
217 READ_ENTER(&ipf_solaris);
218 if (unit == IPL_LOGNAT) {
219 error = nat_ioctl((caddr_t)data, cmd, mode);
220 RWLOCK_EXIT(&ipf_solaris);
223 if (unit == IPL_LOGSTATE) {
224 error = fr_state_ioctl((caddr_t)data, cmd, mode);
225 RWLOCK_EXIT(&ipf_solaris);
228 if (unit == IPL_LOGAUTH) {
229 error = fr_auth_ioctl((caddr_t)data, mode, cmd, NULL, NULL);
230 RWLOCK_EXIT(&ipf_solaris);
239 if (!(mode & FWRITE))
242 error = IRCOPY((caddr_t)data, (caddr_t)&enable,
247 if (!(mode & FWRITE))
250 WRITE_ENTER(&ipf_mutex);
251 error = IRCOPY((caddr_t)data, (caddr_t)&fr_flags,
253 RWLOCK_EXIT(&ipf_mutex);
257 error = IWCOPY((caddr_t)&fr_flags, (caddr_t)data,
266 if (!(mode & FWRITE))
269 error = frrequest(unit, cmd, (caddr_t)data, fr_active);
274 if (!(mode & FWRITE))
277 error = frrequest(unit, cmd, (caddr_t)data,
281 if (!(mode & FWRITE))
284 WRITE_ENTER(&ipf_mutex);
285 bzero((char *)frcache, sizeof(frcache[0]) * 2);
286 error = IWCOPY((caddr_t)&fr_active, (caddr_t)data,
290 fr_active = 1 - fr_active;
291 RWLOCK_EXIT(&ipf_mutex);
298 READ_ENTER(&ipf_mutex);
300 RWLOCK_EXIT(&ipf_mutex);
301 error = IWCOPYPTR((caddr_t)&fio, (caddr_t)data, sizeof(fio));
307 if (!(mode & FWRITE))
310 error = frzerostats((caddr_t)data);
313 if (!(mode & FWRITE))
316 error = IRCOPY((caddr_t)data, (caddr_t)&tmp,
319 tmp = frflush(unit, tmp);
320 error = IWCOPY((caddr_t)&tmp, (caddr_t)data,
328 error = IRCOPY((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
339 if (!(mode & FWRITE))
342 tmp = ipflog_clear(unit);
343 error = IWCOPY((caddr_t)&tmp, (caddr_t)data,
349 #endif /* IPFILTER_LOG */
351 if (!(mode & FWRITE))
357 error = IWCOPYPTR((caddr_t)ipfr_fragstats(), (caddr_t)data,
363 int copy = (int)iplused[IPL_LOGIPF];
365 error = IWCOPY((caddr_t)©, (caddr_t)data, sizeof(copy));
375 RWLOCK_EXIT(&ipf_solaris);
380 ill_t *get_unit(name, v)
384 size_t len = strlen(name) + 1; /* includes \0 */
394 for (il = ill_g_head; il; il = il->ill_next)
395 if ((len == il->ill_name_length) && (il->ill_sap == sap) &&
396 !strncmp(il->ill_name, name, len))
402 static int frrequest(unit, req, data, set)
407 register frentry_t *fp, *f, **fprev;
408 register frentry_t **ftail;
409 frgroup_t *fg = NULL;
410 int error = 0, in, i;
420 error = IRCOPYPTR(data, (caddr_t)fp, sizeof(*fp));
426 fp->fr_sap = IP_DL_SAP;
427 else if (fp->fr_v == 6)
428 fp->fr_sap = IP6_DL_SAP;
435 WRITE_ENTER(&ipf_mutex);
437 * Check that the group number does exist and that if a head group
438 * has been specified, doesn't exist.
440 if ((req != SIOCZRLST) && ((req == SIOCINAFR) || (req == SIOCINIFR) ||
441 (req == SIOCADAFR) || (req == SIOCADIFR)) && fp->fr_grhead &&
442 fr_findgroup(fp->fr_grhead, fp->fr_flags, unit, set, NULL)) {
446 if ((req != SIOCZRLST) && fp->fr_group &&
447 !fr_findgroup(fp->fr_group, fp->fr_flags, unit, set, NULL)) {
452 in = (fp->fr_flags & FR_INQUE) ? 0 : 1;
454 if (unit == IPL_LOGAUTH)
455 ftail = fprev = &ipauth;
456 else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 4))
457 ftail = fprev = &ipacct[in][set];
458 else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 4))
459 ftail = fprev = &ipfilter[in][set];
461 else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 6))
462 ftail = fprev = &ipacct6[in][set];
463 else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 6))
464 ftail = fprev = &ipfilter6[in][set];
471 group = fp->fr_group;
473 fg = fr_findgroup(group, fp->fr_flags, unit, set, NULL);
478 ftail = fprev = fg->fg_start;
481 bzero((char *)frcache, sizeof(frcache[0]) * 2);
483 for (i = 0; i < 4; i++) {
484 if ((fp->fr_ifnames[i][1] == '\0') &&
485 ((fp->fr_ifnames[i][0] == '-') ||
486 (fp->fr_ifnames[i][0] == '*'))) {
487 fp->fr_ifas[i] = NULL;
488 } else if (*fp->fr_ifnames[i]) {
489 fp->fr_ifas[i] = GETUNIT(fp->fr_ifnames[i], fp->fr_v);
491 fp->fr_ifas[i] = (void *)-1;
497 fp->fr_flags &= ~FR_DUP;
498 if (*fdp->fd_ifname) {
499 ill = get_unit(fdp->fd_ifname, (int)fp->fr_v);
502 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 4)) {
504 ire = ire_ctable_lookup(ipif->ipif_local_addr, 0,
505 IRE_LOCAL, NULL, NULL,
508 ire = ire_lookup_myaddr(ipif->ipif_local_addr);
513 fp->fr_flags |= FR_DUP;
516 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 6)) {
517 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, 0,
518 IRE_LOCAL, NULL, NULL,
523 fp->fr_flags |= FR_DUP;
526 fdp->fd_ifp = (struct ifnet *)ire;
531 if (*fdp->fd_ifname) {
532 ill = get_unit(fdp->fd_ifname, (int)fp->fr_v);
535 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 4)) {
537 ire = ire_ctable_lookup(ipif->ipif_local_addr, 0,
538 IRE_LOCAL, NULL, NULL,
541 ire = ire_lookup_myaddr(ipif->ipif_local_addr);
547 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 6)) {
548 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, 0,
549 IRE_LOCAL, NULL, NULL,
555 fdp->fd_ifp = (struct ifnet *)ire;
559 * Look for a matching filter rule, but don't include the next or
560 * interface pointer in the comparison (fr_next, fr_ifa).
562 for (fp->fr_cksum = 0, p = (u_int *)&fp->fr_ip, pp = &fp->fr_cksum;
566 for (; (f = *ftail); ftail = &f->fr_next)
567 if ((fp->fr_cksum == f->fr_cksum) &&
568 !bcmp((char *)&f->fr_ip, (char *)&fp->fr_ip, FR_CMPSIZ))
572 * If zero'ing statistics, copy current to caller and zero.
574 if (req == SIOCZRLST) {
579 MUTEX_DOWNGRADE(&ipf_mutex);
580 error = IWCOPYPTR((caddr_t)f, data, sizeof(*f));
589 if (req != SIOCINAFR && req != SIOCINIFR)
595 while (--fp->fr_hits && (f = *ftail))
602 if (req == SIOCRMAFR || req == SIOCRMIFR) {
607 * Only return EBUSY if there is a group list, else
608 * it's probably just state information referencing
611 if ((f->fr_ref > 1) && f->fr_grp) {
615 if (fg && fg->fg_head)
616 fg->fg_head->fr_ref--;
618 fr_delgroup(f->fr_grhead, fp->fr_flags,
620 fixskip(fprev, f, -1);
631 KMALLOC(f, frentry_t *);
633 if (fg && fg->fg_head)
634 fg->fg_head->fr_ref++;
635 bcopy((char *)fp, (char *)f, sizeof(*f));
640 if (req == SIOCINIFR || req == SIOCINAFR)
641 fixskip(fprev, f, 1);
643 group = f->fr_grhead;
645 fg = fr_addgroup(group, f, unit, set);
651 RWLOCK_EXIT(&ipf_mutex);
657 * routines below for saving IP headers to buffer
659 int iplopen(devp, flags, otype, cred)
664 minor_t min = getminor(*devp);
667 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
669 if ((fr_running <= 0) || !(otype & OTYP_CHR))
671 min = (IPL_LOGMAX < min) ? ENXIO : 0;
676 int iplclose(dev, flags, otype, cred)
681 minor_t min = getminor(dev);
684 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
686 min = (IPL_LOGMAX < min) ? ENXIO : 0;
693 * both of these must operate with at least splnet() lest they be
694 * called during packet processing and cause an inconsistancy to appear in
697 int iplread(dev, uio, cp)
699 register struct uio *uio;
703 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
705 return ipflog_read(getminor(dev), uio);
707 #endif /* IPFILTER_LOG */
711 * send_reset - this could conceivably be a call to tcp_respond(), but that
712 * requires a large amount of setting up and isn't any more efficient.
714 int send_reset(oip, fin)
718 tcphdr_t *tcp, *tcp2;
722 ip6_t *ip6, *oip6 = (ip6_t *)oip;
726 tcp = (struct tcphdr *)fin->fin_dp;
727 if (tcp->th_flags & TH_RST)
729 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
732 hlen = sizeof(ip6_t);
736 hlen += sizeof(*tcp2);
737 if ((m = (mblk_t *)allocb(hlen + 16, BPRI_HI)) == NULL)
742 m->b_wptr = m->b_rptr + hlen;
743 bzero((char *)m->b_rptr, hlen);
744 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
745 tcp2->th_dport = tcp->th_sport;
746 tcp2->th_sport = tcp->th_dport;
747 if (tcp->th_flags & TH_ACK) {
748 tcp2->th_seq = tcp->th_ack;
749 tcp2->th_flags = TH_RST|TH_ACK;
751 tcp2->th_ack = ntohl(tcp->th_seq);
752 tcp2->th_ack += tlen;
753 tcp2->th_ack = htonl(tcp2->th_ack);
754 tcp2->th_flags = TH_RST;
756 tcp2->th_off = sizeof(struct tcphdr) >> 2;
757 tcp2->th_flags = TH_RST|TH_ACK;
760 * This is to get around a bug in the Solaris 2.4/2.5 TCP checksum
761 * computation that is done by their put routine.
763 tcp2->th_sum = htons(0x14);
765 if (fin->fin_v == 6) {
766 ip6 = (ip6_t *)m->b_rptr;
767 ip6->ip6_src = oip6->ip6_dst;
768 ip6->ip6_dst = oip6->ip6_src;
769 ip6->ip6_plen = htons(sizeof(*tcp));
770 ip6->ip6_nxt = IPPROTO_TCP;
774 ip = (ip_t *)m->b_rptr;
775 ip->ip_src.s_addr = oip->ip_dst.s_addr;
776 ip->ip_dst.s_addr = oip->ip_src.s_addr;
777 ip->ip_hl = sizeof(*ip) >> 2;
778 ip->ip_p = IPPROTO_TCP;
779 ip->ip_len = htons(sizeof(*ip) + sizeof(*tcp));
780 ip->ip_tos = oip->ip_tos;
782 return send_ip(fin, m);
786 int static send_ip(fin, m)
790 RWLOCK_EXIT(&ipfs_mutex);
791 RWLOCK_EXIT(&ipf_solaris);
793 if (fin->fin_v == 6) {
794 extern void ip_wput_v6 __P((queue_t *, mblk_t *));
797 ip6 = (ip6_t *)m->b_rptr;
801 ip_wput_v6(((qif_t *)fin->fin_qif)->qf_ill->ill_wq, m);
807 ip = (ip_t *)m->b_rptr;
808 ip->ip_v = IPVERSION;
809 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
810 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
811 ip_wput(((qif_t *)fin->fin_qif)->qf_ill->ill_wq, m);
813 READ_ENTER(&ipf_solaris);
814 READ_ENTER(&ipfs_mutex);
819 int send_icmp_err(oip, type, fin, dst)
837 if ((type < 0) || (type > ICMP_MAXTYPE))
840 code = fin->fin_icode;
842 if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int)))
850 if (oip->ip_v == 6) {
853 sz += MIN(m->b_wptr - m->b_rptr, 512);
854 hlen = sizeof(ip6_t);
855 type = icmptoicmp6types[type];
856 if (type == ICMP6_DST_UNREACH)
857 code = icmptoicmp6unreach[code];
861 if ((oip->ip_p == IPPROTO_ICMP) &&
862 !(fin->fin_fi.fi_fl & FI_SHORT))
863 switch (ntohs(fin->fin_data[0]) >> 8)
874 sz = sizeof(ip_t) * 2;
875 sz += 8; /* 64 bits of data */
879 sz += offsetof(struct icmp, icmp_ip);
880 if ((mb = (mblk_t *)allocb((size_t)sz + 16, BPRI_HI)) == NULL)
884 mb->b_wptr = mb->b_rptr + sz;
885 bzero((char *)mb->b_rptr, (size_t)sz);
886 icmp = (struct icmp *)(mb->b_rptr + sizeof(*ip));
887 icmp->icmp_type = type;
888 icmp->icmp_code = code;
889 icmp->icmp_cksum = 0;
891 if (type == ICMP_UNREACH && (il = qif->qf_ill) &&
892 fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
893 icmp->icmp_nextmtu = htons(il->ill_max_frag);
897 if (oip->ip_v == 6) {
898 struct in6_addr dst6;
902 if (fr_ifpaddr(6, ((qif_t *)fin->fin_qif)->qf_ill,
903 (struct in_addr *)&dst6) == -1)
906 dst6 = oip6->ip6_dst;
910 ip6 = (ip6_t *)mb->b_rptr;
914 ip6->ip6_plen = htons(sz);
915 ip6->ip6_nxt = IPPROTO_ICMPV6;
917 ip6->ip6_dst = oip6->ip6_src;
918 sz -= offsetof(struct icmp, icmp_ip);
919 bcopy((char *)m->b_rptr, (char *)&icmp->icmp_ip, sz);
920 icmp->icmp_cksum = csz - sizeof(ip6_t);
924 ip = (ip_t *)mb->b_rptr;
925 ip->ip_v = IPVERSION;
926 ip->ip_hl = (sizeof(*ip) >> 2);
927 ip->ip_p = IPPROTO_ICMP;
928 ip->ip_id = oip->ip_id;
930 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
931 ip->ip_tos = oip->ip_tos;
932 ip->ip_len = (u_short)htons(sz);
934 if (fr_ifpaddr(4, ((qif_t *)fin->fin_qif)->qf_ill,
940 ip->ip_dst = oip->ip_src;
941 bcopy((char *)oip, (char *)&icmp->icmp_ip, sizeof(*oip));
942 bcopy((char *)oip + (oip->ip_hl << 2),
943 (char *)&icmp->icmp_ip + sizeof(*oip), 8);
944 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
949 * Need to exit out of these so we don't recursively call rw_enter
952 return send_ip(fin, mb);