]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/ip_icmp.c
This commit was generated by cvs2svn to compensate for changes in r160814,
[FreeBSD/FreeBSD.git] / sys / netinet / ip_icmp.c
1 /*-
2  * Copyright (c) 1982, 1986, 1988, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *      @(#)ip_icmp.c   8.2 (Berkeley) 1/4/94
30  * $FreeBSD$
31  */
32
33 #include "opt_ipsec.h"
34 #include "opt_mac.h"
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/mac.h>
39 #include <sys/mbuf.h>
40 #include <sys/protosw.h>
41 #include <sys/socket.h>
42 #include <sys/time.h>
43 #include <sys/kernel.h>
44 #include <sys/sysctl.h>
45
46 #include <net/if.h>
47 #include <net/if_types.h>
48 #include <net/route.h>
49
50 #include <netinet/in.h>
51 #include <netinet/in_pcb.h>
52 #include <netinet/in_systm.h>
53 #include <netinet/in_var.h>
54 #include <netinet/ip.h>
55 #include <netinet/ip_icmp.h>
56 #include <netinet/ip_var.h>
57 #include <netinet/ip_options.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcpip.h>
61 #include <netinet/icmp_var.h>
62
63 #ifdef IPSEC
64 #include <netinet6/ipsec.h>
65 #include <netkey/key.h>
66 #endif
67
68 #ifdef FAST_IPSEC
69 #include <netipsec/ipsec.h>
70 #include <netipsec/key.h>
71 #define IPSEC
72 #endif
73
74 #include <machine/in_cksum.h>
75
76 /*
77  * ICMP routines: error generation, receive packet processing, and
78  * routines to turnaround packets back to the originator, and
79  * host table maintenance routines.
80  */
81
82 struct  icmpstat icmpstat;
83 SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW,
84         &icmpstat, icmpstat, "");
85
86 static int      icmpmaskrepl = 0;
87 SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
88         &icmpmaskrepl, 0, "Reply to ICMP Address Mask Request packets.");
89
90 static u_int    icmpmaskfake = 0;
91 SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW,
92         &icmpmaskfake, 0, "Fake reply to ICMP Address Mask Request packets.");
93
94 static int      drop_redirect = 0;
95 SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
96         &drop_redirect, 0, "Ignore ICMP redirects");
97
98 static int      log_redirect = 0;
99 SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
100         &log_redirect, 0, "Log ICMP redirects to the console");
101
102 static int      icmplim = 200;
103 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
104         &icmplim, 0, "Maximum number of ICMP responses per second");
105
106 static int      icmplim_output = 1;
107 SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
108         &icmplim_output, 0, "Enable rate limiting of ICMP responses");
109
110 static char     reply_src[IFNAMSIZ];
111 SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW,
112         &reply_src, IFNAMSIZ, "icmp reply source for non-local packets.");
113
114 static int      icmp_rfi = 0;
115 SYSCTL_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_RW,
116         &icmp_rfi, 0, "ICMP reply from incoming interface for "
117         "non-local packets");
118
119 static int      icmp_quotelen = 8;
120 SYSCTL_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_RW,
121         &icmp_quotelen, 0, "Number of bytes from original packet to "
122         "quote in ICMP reply");
123
124 /*
125  * ICMP broadcast echo sysctl
126  */
127
128 static int      icmpbmcastecho = 0;
129 SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
130         &icmpbmcastecho, 0, "");
131
132
133 #ifdef ICMPPRINTFS
134 int     icmpprintfs = 0;
135 #endif
136
137 static void     icmp_reflect(struct mbuf *);
138 static void     icmp_send(struct mbuf *, struct mbuf *);
139
140 extern  struct protosw inetsw[];
141
142 /*
143  * Generate an error packet of type error
144  * in response to bad packet ip.
145  */
146 void
147 icmp_error(n, type, code, dest, mtu)
148         struct mbuf *n;
149         int type, code;
150         n_long dest;
151         int mtu;
152 {
153         register struct ip *oip = mtod(n, struct ip *), *nip;
154         register unsigned oiphlen = oip->ip_hl << 2;
155         register struct icmp *icp;
156         register struct mbuf *m;
157         unsigned icmplen, icmpelen, nlen;
158
159         KASSERT((u_int)type <= ICMP_MAXTYPE, ("%s: illegal ICMP type", __func__));
160 #ifdef ICMPPRINTFS
161         if (icmpprintfs)
162                 printf("icmp_error(%p, %x, %d)\n", oip, type, code);
163 #endif
164         if (type != ICMP_REDIRECT)
165                 icmpstat.icps_error++;
166         /*
167          * Don't send error:
168          *  if the original packet was encrypted.
169          *  if not the first fragment of message.
170          *  in response to a multicast or broadcast packet.
171          *  if the old packet protocol was an ICMP error message.
172          */
173         if (n->m_flags & M_DECRYPTED)
174                 goto freeit;
175         if (oip->ip_off & ~(IP_MF|IP_DF))
176                 goto freeit;
177         if (n->m_flags & (M_BCAST|M_MCAST))
178                 goto freeit;
179         if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
180           n->m_len >= oiphlen + ICMP_MINLEN &&
181           !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiphlen))->icmp_type)) {
182                 icmpstat.icps_oldicmp++;
183                 goto freeit;
184         }
185         /* Drop if IP header plus 8 bytes is not contignous in first mbuf. */
186         if (oiphlen + 8 > n->m_len)
187                 goto freeit;
188         /*
189          * Calculate length to quote from original packet and
190          * prevent the ICMP mbuf from overflowing.
191          * Unfortunatly this is non-trivial since ip_forward()
192          * sends us truncated packets.
193          */
194         nlen = m_length(n, NULL);
195         if (oip->ip_p == IPPROTO_TCP) {
196                 struct tcphdr *th;
197                 int tcphlen;
198
199                 if (oiphlen + sizeof(struct tcphdr) > n->m_len &&
200                     n->m_next == NULL)
201                         goto stdreply;
202                 if (n->m_len < oiphlen + sizeof(struct tcphdr) &&
203                     ((n = m_pullup(n, oiphlen + sizeof(struct tcphdr))) == NULL))
204                         goto freeit;
205                 th = (struct tcphdr *)((caddr_t)oip + oiphlen);
206                 tcphlen = th->th_off << 2;
207                 if (tcphlen < sizeof(struct tcphdr))
208                         goto freeit;
209                 if (oip->ip_len < oiphlen + tcphlen)
210                         goto freeit;
211                 if (oiphlen + tcphlen > n->m_len && n->m_next == NULL)
212                         goto stdreply;
213                 if (n->m_len < oiphlen + tcphlen && 
214                     ((n = m_pullup(n, oiphlen + tcphlen)) == NULL))
215                         goto freeit;
216                 icmpelen = max(tcphlen, min(icmp_quotelen, oip->ip_len - oiphlen));
217         } else
218 stdreply:       icmpelen = max(8, min(icmp_quotelen, oip->ip_len - oiphlen));
219
220         icmplen = min(oiphlen + icmpelen, nlen);
221         if (icmplen < sizeof(struct ip))
222                 goto freeit;
223
224         if (MHLEN > sizeof(struct ip) + ICMP_MINLEN + icmplen)
225                 m = m_gethdr(M_DONTWAIT, MT_DATA);
226         else
227                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
228         if (m == NULL)
229                 goto freeit;
230 #ifdef MAC
231         mac_create_mbuf_netlayer(n, m);
232 #endif
233         icmplen = min(icmplen, M_TRAILINGSPACE(m) - sizeof(struct ip) - ICMP_MINLEN);
234         m_align(m, ICMP_MINLEN + icmplen);
235         m->m_len = ICMP_MINLEN + icmplen;
236
237         icp = mtod(m, struct icmp *);
238         icmpstat.icps_outhist[type]++;
239         icp->icmp_type = type;
240         if (type == ICMP_REDIRECT)
241                 icp->icmp_gwaddr.s_addr = dest;
242         else {
243                 icp->icmp_void = 0;
244                 /*
245                  * The following assignments assume an overlay with the
246                  * just zeroed icmp_void field.
247                  */
248                 if (type == ICMP_PARAMPROB) {
249                         icp->icmp_pptr = code;
250                         code = 0;
251                 } else if (type == ICMP_UNREACH &&
252                         code == ICMP_UNREACH_NEEDFRAG && mtu) {
253                         icp->icmp_nextmtu = htons(mtu);
254                 }
255         }
256         icp->icmp_code = code;
257
258         /*
259          * Copy the quotation into ICMP message and
260          * convert quoted IP header back to network representation.
261          */
262         m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
263         nip = &icp->icmp_ip;
264         nip->ip_len = htons(nip->ip_len);
265         nip->ip_off = htons(nip->ip_off);
266
267         /*
268          * Set up ICMP message mbuf and copy old IP header (without options
269          * in front of ICMP message.
270          * If the original mbuf was meant to bypass the firewall, the error
271          * reply should bypass as well.
272          */
273         m->m_flags |= n->m_flags & M_SKIP_FIREWALL;
274         m->m_data -= sizeof(struct ip);
275         m->m_len += sizeof(struct ip);
276         m->m_pkthdr.len = m->m_len;
277         m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
278         nip = mtod(m, struct ip *);
279         bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
280         nip->ip_len = m->m_len;
281         nip->ip_v = IPVERSION;
282         nip->ip_hl = 5;
283         nip->ip_p = IPPROTO_ICMP;
284         nip->ip_tos = 0;
285         icmp_reflect(m);
286
287 freeit:
288         m_freem(n);
289 }
290
291 /*
292  * Process a received ICMP message.
293  */
294 void
295 icmp_input(m, off)
296         struct mbuf *m;
297         int off;
298 {
299         struct icmp *icp;
300         struct in_ifaddr *ia;
301         struct ip *ip = mtod(m, struct ip *);
302         struct sockaddr_in icmpsrc, icmpdst, icmpgw;
303         int hlen = off;
304         int icmplen = ip->ip_len;
305         int i, code;
306         void (*ctlfunc)(int, struct sockaddr *, void *);
307
308         /*
309          * Locate icmp structure in mbuf, and check
310          * that not corrupted and of at least minimum length.
311          */
312 #ifdef ICMPPRINTFS
313         if (icmpprintfs) {
314                 char buf[4 * sizeof "123"];
315                 strcpy(buf, inet_ntoa(ip->ip_src));
316                 printf("icmp_input from %s to %s, len %d\n",
317                        buf, inet_ntoa(ip->ip_dst), icmplen);
318         }
319 #endif
320         if (icmplen < ICMP_MINLEN) {
321                 icmpstat.icps_tooshort++;
322                 goto freeit;
323         }
324         i = hlen + min(icmplen, ICMP_ADVLENMIN);
325         if (m->m_len < i && (m = m_pullup(m, i)) == 0)  {
326                 icmpstat.icps_tooshort++;
327                 return;
328         }
329         ip = mtod(m, struct ip *);
330         m->m_len -= hlen;
331         m->m_data += hlen;
332         icp = mtod(m, struct icmp *);
333         if (in_cksum(m, icmplen)) {
334                 icmpstat.icps_checksum++;
335                 goto freeit;
336         }
337         m->m_len += hlen;
338         m->m_data -= hlen;
339
340         if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
341                 /*
342                  * Deliver very specific ICMP type only.
343                  */
344                 switch (icp->icmp_type) {
345                 case ICMP_UNREACH:
346                 case ICMP_TIMXCEED:
347                         break;
348                 default:
349                         goto freeit;
350                 }
351         }
352
353 #ifdef ICMPPRINTFS
354         if (icmpprintfs)
355                 printf("icmp_input, type %d code %d\n", icp->icmp_type,
356                     icp->icmp_code);
357 #endif
358
359         /*
360          * Message type specific processing.
361          */
362         if (icp->icmp_type > ICMP_MAXTYPE)
363                 goto raw;
364
365         /* Initialize */
366         bzero(&icmpsrc, sizeof(icmpsrc));
367         icmpsrc.sin_len = sizeof(struct sockaddr_in);
368         icmpsrc.sin_family = AF_INET;
369         bzero(&icmpdst, sizeof(icmpdst));
370         icmpdst.sin_len = sizeof(struct sockaddr_in);
371         icmpdst.sin_family = AF_INET;
372         bzero(&icmpgw, sizeof(icmpgw));
373         icmpgw.sin_len = sizeof(struct sockaddr_in);
374         icmpgw.sin_family = AF_INET;
375
376         icmpstat.icps_inhist[icp->icmp_type]++;
377         code = icp->icmp_code;
378         switch (icp->icmp_type) {
379
380         case ICMP_UNREACH:
381                 switch (code) {
382                         case ICMP_UNREACH_NET:
383                         case ICMP_UNREACH_HOST:
384                         case ICMP_UNREACH_SRCFAIL:
385                         case ICMP_UNREACH_NET_UNKNOWN:
386                         case ICMP_UNREACH_HOST_UNKNOWN:
387                         case ICMP_UNREACH_ISOLATED:
388                         case ICMP_UNREACH_TOSNET:
389                         case ICMP_UNREACH_TOSHOST:
390                         case ICMP_UNREACH_HOST_PRECEDENCE:
391                         case ICMP_UNREACH_PRECEDENCE_CUTOFF:
392                                 code = PRC_UNREACH_NET;
393                                 break;
394
395                         case ICMP_UNREACH_NEEDFRAG:
396                                 code = PRC_MSGSIZE;
397                                 break;
398
399                         /*
400                          * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
401                          * Treat subcodes 2,3 as immediate RST
402                          */
403                         case ICMP_UNREACH_PROTOCOL:
404                         case ICMP_UNREACH_PORT:
405                                 code = PRC_UNREACH_PORT;
406                                 break;
407
408                         case ICMP_UNREACH_NET_PROHIB:
409                         case ICMP_UNREACH_HOST_PROHIB:
410                         case ICMP_UNREACH_FILTER_PROHIB:
411                                 code = PRC_UNREACH_ADMIN_PROHIB;
412                                 break;
413
414                         default:
415                                 goto badcode;
416                 }
417                 goto deliver;
418
419         case ICMP_TIMXCEED:
420                 if (code > 1)
421                         goto badcode;
422                 code += PRC_TIMXCEED_INTRANS;
423                 goto deliver;
424
425         case ICMP_PARAMPROB:
426                 if (code > 1)
427                         goto badcode;
428                 code = PRC_PARAMPROB;
429                 goto deliver;
430
431         case ICMP_SOURCEQUENCH:
432                 if (code)
433                         goto badcode;
434                 code = PRC_QUENCH;
435         deliver:
436                 /*
437                  * Problem with datagram; advise higher level routines.
438                  */
439                 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
440                     icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
441                         icmpstat.icps_badlen++;
442                         goto freeit;
443                 }
444                 icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len);
445                 /* Discard ICMP's in response to multicast packets */
446                 if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
447                         goto badcode;
448 #ifdef ICMPPRINTFS
449                 if (icmpprintfs)
450                         printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
451 #endif
452                 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
453                 /*
454                  * XXX if the packet contains [IPv4 AH TCP], we can't make a
455                  * notification to TCP layer.
456                  */
457                 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
458                 if (ctlfunc)
459                         (*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
460                                    (void *)&icp->icmp_ip);
461                 break;
462
463         badcode:
464                 icmpstat.icps_badcode++;
465                 break;
466
467         case ICMP_ECHO:
468                 if (!icmpbmcastecho
469                     && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
470                         icmpstat.icps_bmcastecho++;
471                         break;
472                 }
473                 icp->icmp_type = ICMP_ECHOREPLY;
474                 if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
475                         goto freeit;
476                 else
477                         goto reflect;
478
479         case ICMP_TSTAMP:
480                 if (!icmpbmcastecho
481                     && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
482                         icmpstat.icps_bmcasttstamp++;
483                         break;
484                 }
485                 if (icmplen < ICMP_TSLEN) {
486                         icmpstat.icps_badlen++;
487                         break;
488                 }
489                 icp->icmp_type = ICMP_TSTAMPREPLY;
490                 icp->icmp_rtime = iptime();
491                 icp->icmp_ttime = icp->icmp_rtime;      /* bogus, do later! */
492                 if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
493                         goto freeit;
494                 else
495                         goto reflect;
496
497         case ICMP_MASKREQ:
498                 if (icmpmaskrepl == 0)
499                         break;
500                 /*
501                  * We are not able to respond with all ones broadcast
502                  * unless we receive it over a point-to-point interface.
503                  */
504                 if (icmplen < ICMP_MASKLEN)
505                         break;
506                 switch (ip->ip_dst.s_addr) {
507
508                 case INADDR_BROADCAST:
509                 case INADDR_ANY:
510                         icmpdst.sin_addr = ip->ip_src;
511                         break;
512
513                 default:
514                         icmpdst.sin_addr = ip->ip_dst;
515                 }
516                 ia = (struct in_ifaddr *)ifaof_ifpforaddr(
517                             (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
518                 if (ia == 0)
519                         break;
520                 if (ia->ia_ifp == 0)
521                         break;
522                 icp->icmp_type = ICMP_MASKREPLY;
523                 if (icmpmaskfake == 0)
524                         icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
525                 else
526                         icp->icmp_mask = icmpmaskfake;
527                 if (ip->ip_src.s_addr == 0) {
528                         if (ia->ia_ifp->if_flags & IFF_BROADCAST)
529                             ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
530                         else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
531                             ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
532                 }
533 reflect:
534                 ip->ip_len += hlen;     /* since ip_input deducts this */
535                 icmpstat.icps_reflect++;
536                 icmpstat.icps_outhist[icp->icmp_type]++;
537                 icmp_reflect(m);
538                 return;
539
540         case ICMP_REDIRECT:
541                 if (log_redirect) {
542                         u_long src, dst, gw;
543
544                         src = ntohl(ip->ip_src.s_addr);
545                         dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
546                         gw = ntohl(icp->icmp_gwaddr.s_addr);
547                         printf("icmp redirect from %d.%d.%d.%d: "
548                                "%d.%d.%d.%d => %d.%d.%d.%d\n",
549                                (int)(src >> 24), (int)((src >> 16) & 0xff),
550                                (int)((src >> 8) & 0xff), (int)(src & 0xff),
551                                (int)(dst >> 24), (int)((dst >> 16) & 0xff),
552                                (int)((dst >> 8) & 0xff), (int)(dst & 0xff),
553                                (int)(gw >> 24), (int)((gw >> 16) & 0xff),
554                                (int)((gw >> 8) & 0xff), (int)(gw & 0xff));
555                 }
556                 /*
557                  * RFC1812 says we must ignore ICMP redirects if we
558                  * are acting as router.
559                  */
560                 if (drop_redirect || ipforwarding)
561                         break;
562                 if (code > 3)
563                         goto badcode;
564                 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
565                     icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
566                         icmpstat.icps_badlen++;
567                         break;
568                 }
569                 /*
570                  * Short circuit routing redirects to force
571                  * immediate change in the kernel's routing
572                  * tables.  The message is also handed to anyone
573                  * listening on a raw socket (e.g. the routing
574                  * daemon for use in updating its tables).
575                  */
576                 icmpgw.sin_addr = ip->ip_src;
577                 icmpdst.sin_addr = icp->icmp_gwaddr;
578 #ifdef  ICMPPRINTFS
579                 if (icmpprintfs) {
580                         char buf[4 * sizeof "123"];
581                         strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst));
582
583                         printf("redirect dst %s to %s\n",
584                                buf, inet_ntoa(icp->icmp_gwaddr));
585                 }
586 #endif
587                 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
588                 rtredirect((struct sockaddr *)&icmpsrc,
589                   (struct sockaddr *)&icmpdst,
590                   (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
591                   (struct sockaddr *)&icmpgw);
592                 pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
593 #ifdef IPSEC
594                 key_sa_routechange((struct sockaddr *)&icmpsrc);
595 #endif
596                 break;
597
598         /*
599          * No kernel processing for the following;
600          * just fall through to send to raw listener.
601          */
602         case ICMP_ECHOREPLY:
603         case ICMP_ROUTERADVERT:
604         case ICMP_ROUTERSOLICIT:
605         case ICMP_TSTAMPREPLY:
606         case ICMP_IREQREPLY:
607         case ICMP_MASKREPLY:
608         default:
609                 break;
610         }
611
612 raw:
613         rip_input(m, off);
614         return;
615
616 freeit:
617         m_freem(m);
618 }
619
620 /*
621  * Reflect the ip packet back to the source
622  */
623 static void
624 icmp_reflect(m)
625         struct mbuf *m;
626 {
627         struct ip *ip = mtod(m, struct ip *);
628         struct ifaddr *ifa;
629         struct ifnet *ifn;
630         struct in_ifaddr *ia;
631         struct in_addr t;
632         struct mbuf *opts = 0;
633         int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
634
635         if (!in_canforward(ip->ip_src) &&
636             ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
637              (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
638                 m_freem(m);     /* Bad return address */
639                 icmpstat.icps_badaddr++;
640                 goto done;      /* Ip_output() will check for broadcast */
641         }
642         t = ip->ip_dst;
643         ip->ip_dst = ip->ip_src;
644
645         /*
646          * Source selection for ICMP replies:
647          *
648          * If the incoming packet was addressed directly to one of our
649          * own addresses, use dst as the src for the reply.
650          */
651         LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash)
652                 if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
653                         goto match;
654         /*
655          * If the incoming packet was addressed to one of our broadcast
656          * addresses, use the first non-broadcast address which corresponds
657          * to the incoming interface.
658          */
659         if (m->m_pkthdr.rcvif != NULL &&
660             m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
661                 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
662                         if (ifa->ifa_addr->sa_family != AF_INET)
663                                 continue;
664                         ia = ifatoia(ifa);
665                         if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
666                             t.s_addr)
667                                 goto match;
668                 }
669         }
670         /*
671          * If the packet was transiting through us, use the address of
672          * the interface the packet came through in.  If that interface
673          * doesn't have a suitable IP address, the normal selection
674          * criteria apply.
675          */
676         if (icmp_rfi && m->m_pkthdr.rcvif != NULL) {
677                 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
678                         if (ifa->ifa_addr->sa_family != AF_INET)
679                                 continue;
680                         ia = ifatoia(ifa);
681                         goto match;
682                 }
683         }
684         /*
685          * If the incoming packet was not addressed directly to us, use
686          * designated interface for icmp replies specified by sysctl
687          * net.inet.icmp.reply_src (default not set). Otherwise continue
688          * with normal source selection.
689          */
690         if (reply_src[0] != '\0' && (ifn = ifunit(reply_src))) {
691                 TAILQ_FOREACH(ifa, &ifn->if_addrhead, ifa_link) {
692                         if (ifa->ifa_addr->sa_family != AF_INET)
693                                 continue;
694                         ia = ifatoia(ifa);
695                         goto match;
696                 }
697         }
698         /*
699          * If the packet was transiting through us, use the address of
700          * the interface that is the closest to the packet source.
701          * When we don't have a route back to the packet source, stop here
702          * and drop the packet.
703          */
704         ia = ip_rtaddr(ip->ip_dst);
705         if (ia == NULL) {
706                 m_freem(m);
707                 icmpstat.icps_noroute++;
708                 goto done;
709         }
710 match:
711 #ifdef MAC
712         mac_reflect_mbuf_icmp(m);
713 #endif
714         t = IA_SIN(ia)->sin_addr;
715         ip->ip_src = t;
716         ip->ip_ttl = ip_defttl;
717
718         if (optlen > 0) {
719                 register u_char *cp;
720                 int opt, cnt;
721                 u_int len;
722
723                 /*
724                  * Retrieve any source routing from the incoming packet;
725                  * add on any record-route or timestamp options.
726                  */
727                 cp = (u_char *) (ip + 1);
728                 if ((opts = ip_srcroute(m)) == 0 &&
729                     (opts = m_gethdr(M_DONTWAIT, MT_DATA))) {
730                         opts->m_len = sizeof(struct in_addr);
731                         mtod(opts, struct in_addr *)->s_addr = 0;
732                 }
733                 if (opts) {
734 #ifdef ICMPPRINTFS
735                     if (icmpprintfs)
736                             printf("icmp_reflect optlen %d rt %d => ",
737                                 optlen, opts->m_len);
738 #endif
739                     for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
740                             opt = cp[IPOPT_OPTVAL];
741                             if (opt == IPOPT_EOL)
742                                     break;
743                             if (opt == IPOPT_NOP)
744                                     len = 1;
745                             else {
746                                     if (cnt < IPOPT_OLEN + sizeof(*cp))
747                                             break;
748                                     len = cp[IPOPT_OLEN];
749                                     if (len < IPOPT_OLEN + sizeof(*cp) ||
750                                         len > cnt)
751                                             break;
752                             }
753                             /*
754                              * Should check for overflow, but it "can't happen"
755                              */
756                             if (opt == IPOPT_RR || opt == IPOPT_TS ||
757                                 opt == IPOPT_SECURITY) {
758                                     bcopy((caddr_t)cp,
759                                         mtod(opts, caddr_t) + opts->m_len, len);
760                                     opts->m_len += len;
761                             }
762                     }
763                     /* Terminate & pad, if necessary */
764                     cnt = opts->m_len % 4;
765                     if (cnt) {
766                             for (; cnt < 4; cnt++) {
767                                     *(mtod(opts, caddr_t) + opts->m_len) =
768                                         IPOPT_EOL;
769                                     opts->m_len++;
770                             }
771                     }
772 #ifdef ICMPPRINTFS
773                     if (icmpprintfs)
774                             printf("%d\n", opts->m_len);
775 #endif
776                 }
777                 /*
778                  * Now strip out original options by copying rest of first
779                  * mbuf's data back, and adjust the IP length.
780                  */
781                 ip->ip_len -= optlen;
782                 ip->ip_v = IPVERSION;
783                 ip->ip_hl = 5;
784                 m->m_len -= optlen;
785                 if (m->m_flags & M_PKTHDR)
786                         m->m_pkthdr.len -= optlen;
787                 optlen += sizeof(struct ip);
788                 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
789                          (unsigned)(m->m_len - sizeof(struct ip)));
790         }
791         m_tag_delete_nonpersistent(m);
792         m->m_flags &= ~(M_BCAST|M_MCAST);
793         icmp_send(m, opts);
794 done:
795         if (opts)
796                 (void)m_free(opts);
797 }
798
799 /*
800  * Send an icmp packet back to the ip level,
801  * after supplying a checksum.
802  */
803 static void
804 icmp_send(m, opts)
805         register struct mbuf *m;
806         struct mbuf *opts;
807 {
808         register struct ip *ip = mtod(m, struct ip *);
809         register int hlen;
810         register struct icmp *icp;
811
812         hlen = ip->ip_hl << 2;
813         m->m_data += hlen;
814         m->m_len -= hlen;
815         icp = mtod(m, struct icmp *);
816         icp->icmp_cksum = 0;
817         icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
818         m->m_data -= hlen;
819         m->m_len += hlen;
820         m->m_pkthdr.rcvif = (struct ifnet *)0;
821 #ifdef ICMPPRINTFS
822         if (icmpprintfs) {
823                 char buf[4 * sizeof "123"];
824                 strcpy(buf, inet_ntoa(ip->ip_dst));
825                 printf("icmp_send dst %s src %s\n",
826                        buf, inet_ntoa(ip->ip_src));
827         }
828 #endif
829         (void) ip_output(m, opts, NULL, 0, NULL, NULL);
830 }
831
832 n_time
833 iptime()
834 {
835         struct timeval atv;
836         u_long t;
837
838         getmicrotime(&atv);
839         t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
840         return (htonl(t));
841 }
842
843 /*
844  * Return the next larger or smaller MTU plateau (table from RFC 1191)
845  * given current value MTU.  If DIR is less than zero, a larger plateau
846  * is returned; otherwise, a smaller value is returned.
847  */
848 int
849 ip_next_mtu(mtu, dir)
850         int mtu;
851         int dir;
852 {
853         static int mtutab[] = {
854                 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1280, 1006, 508,
855                 296, 68, 0
856         };
857         int i, size;
858
859         size = (sizeof mtutab) / (sizeof mtutab[0]);
860         if (dir >= 0) {
861                 for (i = 0; i < size; i++)
862                         if (mtu > mtutab[i])
863                                 return mtutab[i];
864         } else {
865                 for (i = size - 1; i >= 0; i--)
866                         if (mtu < mtutab[i])
867                                 return mtutab[i];
868                 if (mtu == mtutab[0])
869                         return mtutab[0];
870         }
871         return 0;
872 }
873
874
875 /*
876  * badport_bandlim() - check for ICMP bandwidth limit
877  *
878  *      Return 0 if it is ok to send an ICMP error response, -1 if we have
879  *      hit our bandwidth limit and it is not ok.
880  *
881  *      If icmplim is <= 0, the feature is disabled and 0 is returned.
882  *
883  *      For now we separate the TCP and UDP subsystems w/ different 'which'
884  *      values.  We may eventually remove this separation (and simplify the
885  *      code further).
886  *
887  *      Note that the printing of the error message is delayed so we can
888  *      properly print the icmp error rate that the system was trying to do
889  *      (i.e. 22000/100 pps, etc...).  This can cause long delays in printing
890  *      the 'final' error, but it doesn't make sense to solve the printing
891  *      delay with more complex code.
892  */
893
894 int
895 badport_bandlim(int which)
896 {
897 #define N(a)    (sizeof (a) / sizeof (a[0]))
898         static struct rate {
899                 const char      *type;
900                 struct timeval  lasttime;
901                 int             curpps;
902         } rates[BANDLIM_MAX+1] = {
903                 { "icmp unreach response" },
904                 { "icmp ping response" },
905                 { "icmp tstamp response" },
906                 { "closed port RST response" },
907                 { "open port RST response" }
908         };
909
910         /*
911          * Return ok status if feature disabled or argument out of range.
912          */
913         if (icmplim > 0 && (u_int) which < N(rates)) {
914                 struct rate *r = &rates[which];
915                 int opps = r->curpps;
916
917                 if (!ppsratecheck(&r->lasttime, &r->curpps, icmplim))
918                         return -1;      /* discard packet */
919                 /*
920                  * If we've dropped below the threshold after having
921                  * rate-limited traffic print the message.  This preserves
922                  * the previous behaviour at the expense of added complexity.
923                  */
924                 if (icmplim_output && opps > icmplim)
925                         printf("Limiting %s from %d to %d packets/sec\n",
926                                 r->type, opps, icmplim);
927         }
928         return 0;                       /* okay to send packet */
929 #undef N
930 }