]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/ip_output.c
Always compile PFIL_HOOKS into the kernel and remove the associated kernel
[FreeBSD/FreeBSD.git] / sys / netinet / ip_output.c
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
30  * $FreeBSD$
31  */
32
33 #include "opt_ipfw.h"
34 #include "opt_ipsec.h"
35 #include "opt_mac.h"
36 #include "opt_mbuf_stress_test.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/mac.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48
49 #include <net/if.h>
50 #include <net/netisr.h>
51 #include <net/pfil.h>
52 #include <net/route.h>
53
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57 #include <netinet/in_pcb.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip_var.h>
60
61
62 #include <machine/in_cksum.h>
63
64 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
65
66 #ifdef IPSEC
67 #include <netinet6/ipsec.h>
68 #include <netkey/key.h>
69 #ifdef IPSEC_DEBUG
70 #include <netkey/key_debug.h>
71 #else
72 #define KEYDEBUG(lev,arg)
73 #endif
74 #endif /*IPSEC*/
75
76 #ifdef FAST_IPSEC
77 #include <netipsec/ipsec.h>
78 #include <netipsec/xform.h>
79 #include <netipsec/key.h>
80 #endif /*FAST_IPSEC*/
81
82 #define print_ip(x, a, y)        printf("%s %d.%d.%d.%d%s",\
83                                 x, (ntohl(a.s_addr)>>24)&0xFF,\
84                                   (ntohl(a.s_addr)>>16)&0xFF,\
85                                   (ntohl(a.s_addr)>>8)&0xFF,\
86                                   (ntohl(a.s_addr))&0xFF, y);
87
88 u_short ip_id;
89
90 #ifdef MBUF_STRESS_TEST
91 int mbuf_frag_size = 0;
92 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
93         &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
94 #endif
95
96 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
97 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
98 static void     ip_mloopback
99         (struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
100 static int      ip_getmoptions
101         (struct sockopt *, struct ip_moptions *);
102 static int      ip_pcbopts(int, struct mbuf **, struct mbuf *);
103 static int      ip_setmoptions
104         (struct sockopt *, struct ip_moptions **);
105
106 int     ip_optcopy(struct ip *, struct ip *);
107
108
109 extern  struct protosw inetsw[];
110
111 /*
112  * IP output.  The packet in mbuf chain m contains a skeletal IP
113  * header (with len, off, ttl, proto, tos, src, dst).
114  * The mbuf chain containing the packet will be freed.
115  * The mbuf opt, if present, will not be freed.
116  * In the IP forwarding case, the packet will arrive with options already
117  * inserted, so must have a NULL opt pointer.
118  */
119 int
120 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
121         int flags, struct ip_moptions *imo, struct inpcb *inp)
122 {
123         struct ip *ip;
124         struct ifnet *ifp = NULL;       /* keep compiler happy */
125         struct mbuf *m0;
126         int hlen = sizeof (struct ip);
127         int len, error = 0;
128         struct sockaddr_in *dst = NULL; /* keep compiler happy */
129         struct in_ifaddr *ia = NULL;
130         int isbroadcast, sw_csum;
131         struct route iproute;
132         struct in_addr odst;
133 #ifdef IPFIREWALL_FORWARD
134         struct m_tag *fwd_tag = NULL;
135 #endif
136 #ifdef IPSEC
137         struct secpolicy *sp = NULL;
138 #endif
139 #ifdef FAST_IPSEC
140         struct secpolicy *sp = NULL;
141         struct tdb_ident *tdbi;
142         struct m_tag *mtag;
143         int s;
144 #endif /* FAST_IPSEC */
145
146         M_ASSERTPKTHDR(m);
147         
148         if (ro == NULL) {
149                 ro = &iproute;
150                 bzero(ro, sizeof (*ro));
151         }
152
153         if (inp != NULL)
154                 INP_LOCK_ASSERT(inp);
155
156         if (opt) {
157                 len = 0;
158                 m = ip_insertoptions(m, opt, &len);
159                 if (len != 0)
160                         hlen = len;
161         }
162         ip = mtod(m, struct ip *);
163
164         /*
165          * Fill in IP header.  If we are not allowing fragmentation,
166          * then the ip_id field is meaningless, but we don't set it
167          * to zero.  Doing so causes various problems when devices along
168          * the path (routers, load balancers, firewalls, etc.) illegally
169          * disable DF on our packet.  Note that a 16-bit counter
170          * will wrap around in less than 10 seconds at 100 Mbit/s on a
171          * medium with MTU 1500.  See Steven M. Bellovin, "A Technique
172          * for Counting NATted Hosts", Proc. IMW'02, available at
173          * <http://www.research.att.com/~smb/papers/fnat.pdf>.
174          */
175         if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
176                 ip->ip_v = IPVERSION;
177                 ip->ip_hl = hlen >> 2;
178                 ip->ip_id = ip_newid();
179                 ipstat.ips_localout++;
180         } else {
181                 hlen = ip->ip_hl << 2;
182         }
183
184         dst = (struct sockaddr_in *)&ro->ro_dst;
185 again:
186         /*
187          * If there is a cached route,
188          * check that it is to the same destination
189          * and is still up.  If not, free it and try again.
190          * The address family should also be checked in case of sharing the
191          * cache with IPv6.
192          */
193         if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
194                           dst->sin_family != AF_INET ||
195                           dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
196                 RTFREE(ro->ro_rt);
197                 ro->ro_rt = (struct rtentry *)0;
198         }
199 #ifdef IPFIREWALL_FORWARD
200         if (ro->ro_rt == NULL && fwd_tag == NULL) {
201 #else
202         if (ro->ro_rt == NULL) {
203 #endif
204                 bzero(dst, sizeof(*dst));
205                 dst->sin_family = AF_INET;
206                 dst->sin_len = sizeof(*dst);
207                 dst->sin_addr = ip->ip_dst;
208         }
209         /*
210          * If routing to interface only,
211          * short circuit routing lookup.
212          */
213         if (flags & IP_ROUTETOIF) {
214                 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
215                     (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
216                         ipstat.ips_noroute++;
217                         error = ENETUNREACH;
218                         goto bad;
219                 }
220                 ifp = ia->ia_ifp;
221                 ip->ip_ttl = 1;
222                 isbroadcast = in_broadcast(dst->sin_addr, ifp);
223         } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
224             imo != NULL && imo->imo_multicast_ifp != NULL) {
225                 /*
226                  * Bypass the normal routing lookup for multicast
227                  * packets if the interface is specified.
228                  */
229                 ifp = imo->imo_multicast_ifp;
230                 IFP_TO_IA(ifp, ia);
231                 isbroadcast = 0;        /* fool gcc */
232         } else {
233                 /*
234                  * We want to do any cloning requested by the link layer,
235                  * as this is probably required in all cases for correct
236                  * operation (as it is for ARP).
237                  */
238                 if (ro->ro_rt == NULL)
239                         rtalloc_ign(ro, 0);
240                 if (ro->ro_rt == NULL) {
241                         ipstat.ips_noroute++;
242                         error = EHOSTUNREACH;
243                         goto bad;
244                 }
245                 ia = ifatoia(ro->ro_rt->rt_ifa);
246                 ifp = ro->ro_rt->rt_ifp;
247                 ro->ro_rt->rt_rmx.rmx_pksent++;
248                 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
249                         dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
250                 if (ro->ro_rt->rt_flags & RTF_HOST)
251                         isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
252                 else
253                         isbroadcast = in_broadcast(dst->sin_addr, ifp);
254         }
255         if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
256                 struct in_multi *inm;
257
258                 m->m_flags |= M_MCAST;
259                 /*
260                  * IP destination address is multicast.  Make sure "dst"
261                  * still points to the address in "ro".  (It may have been
262                  * changed to point to a gateway address, above.)
263                  */
264                 dst = (struct sockaddr_in *)&ro->ro_dst;
265                 /*
266                  * See if the caller provided any multicast options
267                  */
268                 if (imo != NULL) {
269                         ip->ip_ttl = imo->imo_multicast_ttl;
270                         if (imo->imo_multicast_vif != -1)
271                                 ip->ip_src.s_addr =
272                                     ip_mcast_src ?
273                                     ip_mcast_src(imo->imo_multicast_vif) :
274                                     INADDR_ANY;
275                 } else
276                         ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
277                 /*
278                  * Confirm that the outgoing interface supports multicast.
279                  */
280                 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
281                         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
282                                 ipstat.ips_noroute++;
283                                 error = ENETUNREACH;
284                                 goto bad;
285                         }
286                 }
287                 /*
288                  * If source address not specified yet, use address
289                  * of outgoing interface.
290                  */
291                 if (ip->ip_src.s_addr == INADDR_ANY) {
292                         /* Interface may have no addresses. */
293                         if (ia != NULL)
294                                 ip->ip_src = IA_SIN(ia)->sin_addr;
295                 }
296
297                 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
298                 if (inm != NULL &&
299                    (imo == NULL || imo->imo_multicast_loop)) {
300                         /*
301                          * If we belong to the destination multicast group
302                          * on the outgoing interface, and the caller did not
303                          * forbid loopback, loop back a copy.
304                          */
305                         ip_mloopback(ifp, m, dst, hlen);
306                 }
307                 else {
308                         /*
309                          * If we are acting as a multicast router, perform
310                          * multicast forwarding as if the packet had just
311                          * arrived on the interface to which we are about
312                          * to send.  The multicast forwarding function
313                          * recursively calls this function, using the
314                          * IP_FORWARDING flag to prevent infinite recursion.
315                          *
316                          * Multicasts that are looped back by ip_mloopback(),
317                          * above, will be forwarded by the ip_input() routine,
318                          * if necessary.
319                          */
320                         if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
321                                 /*
322                                  * If rsvp daemon is not running, do not
323                                  * set ip_moptions. This ensures that the packet
324                                  * is multicast and not just sent down one link
325                                  * as prescribed by rsvpd.
326                                  */
327                                 if (!rsvp_on)
328                                         imo = NULL;
329                                 if (ip_mforward &&
330                                     ip_mforward(ip, ifp, m, imo) != 0) {
331                                         m_freem(m);
332                                         goto done;
333                                 }
334                         }
335                 }
336
337                 /*
338                  * Multicasts with a time-to-live of zero may be looped-
339                  * back, above, but must not be transmitted on a network.
340                  * Also, multicasts addressed to the loopback interface
341                  * are not sent -- the above call to ip_mloopback() will
342                  * loop back a copy if this host actually belongs to the
343                  * destination group on the loopback interface.
344                  */
345                 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
346                         m_freem(m);
347                         goto done;
348                 }
349
350                 goto sendit;
351         }
352 #ifndef notdef
353         /*
354          * If the source address is not specified yet, use the address
355          * of the outoing interface. In case, keep note we did that, so
356          * if the the firewall changes the next-hop causing the output
357          * interface to change, we can fix that.
358          */
359         if (ip->ip_src.s_addr == INADDR_ANY) {
360                 /* Interface may have no addresses. */
361                 if (ia != NULL) {
362                         ip->ip_src = IA_SIN(ia)->sin_addr;
363                 }
364         }
365 #endif /* notdef */
366         /*
367          * Verify that we have any chance at all of being able to queue the
368          * packet or packet fragments, unless ALTQ is enabled on the given
369          * interface in which case packetdrop should be done by queueing.
370          */
371 #ifdef ALTQ
372         if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
373             ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
374             ifp->if_snd.ifq_maxlen))
375 #else
376         if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
377             ifp->if_snd.ifq_maxlen)
378 #endif /* ALTQ */
379         {
380                 error = ENOBUFS;
381                 ipstat.ips_odropped++;
382                 goto bad;
383         }
384
385         /*
386          * Look for broadcast address and
387          * verify user is allowed to send
388          * such a packet.
389          */
390         if (isbroadcast) {
391                 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
392                         error = EADDRNOTAVAIL;
393                         goto bad;
394                 }
395                 if ((flags & IP_ALLOWBROADCAST) == 0) {
396                         error = EACCES;
397                         goto bad;
398                 }
399                 /* don't allow broadcast messages to be fragmented */
400                 if (ip->ip_len > ifp->if_mtu) {
401                         error = EMSGSIZE;
402                         goto bad;
403                 }
404                 if (flags & IP_SENDONES)
405                         ip->ip_dst.s_addr = INADDR_BROADCAST;
406                 m->m_flags |= M_BCAST;
407         } else {
408                 m->m_flags &= ~M_BCAST;
409         }
410
411 sendit:
412 #ifdef IPSEC
413         /* get SP for this packet */
414         if (inp == NULL)
415                 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
416                     flags, &error);
417         else
418                 sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
419
420         if (sp == NULL) {
421                 ipsecstat.out_inval++;
422                 goto bad;
423         }
424
425         error = 0;
426
427         /* check policy */
428         switch (sp->policy) {
429         case IPSEC_POLICY_DISCARD:
430                 /*
431                  * This packet is just discarded.
432                  */
433                 ipsecstat.out_polvio++;
434                 goto bad;
435
436         case IPSEC_POLICY_BYPASS:
437         case IPSEC_POLICY_NONE:
438         case IPSEC_POLICY_TCP:
439                 /* no need to do IPsec. */
440                 goto skip_ipsec;
441         
442         case IPSEC_POLICY_IPSEC:
443                 if (sp->req == NULL) {
444                         /* acquire a policy */
445                         error = key_spdacquire(sp);
446                         goto bad;
447                 }
448                 break;
449
450         case IPSEC_POLICY_ENTRUST:
451         default:
452                 printf("ip_output: Invalid policy found. %d\n", sp->policy);
453         }
454     {
455         struct ipsec_output_state state;
456         bzero(&state, sizeof(state));
457         state.m = m;
458         if (flags & IP_ROUTETOIF) {
459                 state.ro = &iproute;
460                 bzero(&iproute, sizeof(iproute));
461         } else
462                 state.ro = ro;
463         state.dst = (struct sockaddr *)dst;
464
465         ip->ip_sum = 0;
466
467         /*
468          * XXX
469          * delayed checksums are not currently compatible with IPsec
470          */
471         if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
472                 in_delayed_cksum(m);
473                 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
474         }
475
476         ip->ip_len = htons(ip->ip_len);
477         ip->ip_off = htons(ip->ip_off);
478
479         error = ipsec4_output(&state, sp, flags);
480
481         m = state.m;
482         if (flags & IP_ROUTETOIF) {
483                 /*
484                  * if we have tunnel mode SA, we may need to ignore
485                  * IP_ROUTETOIF.
486                  */
487                 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
488                         flags &= ~IP_ROUTETOIF;
489                         ro = state.ro;
490                 }
491         } else
492                 ro = state.ro;
493         dst = (struct sockaddr_in *)state.dst;
494         if (error) {
495                 /* mbuf is already reclaimed in ipsec4_output. */
496                 m = NULL;
497                 switch (error) {
498                 case EHOSTUNREACH:
499                 case ENETUNREACH:
500                 case EMSGSIZE:
501                 case ENOBUFS:
502                 case ENOMEM:
503                         break;
504                 default:
505                         printf("ip4_output (ipsec): error code %d\n", error);
506                         /*fall through*/
507                 case ENOENT:
508                         /* don't show these error codes to the user */
509                         error = 0;
510                         break;
511                 }
512                 goto bad;
513         }
514
515         /* be sure to update variables that are affected by ipsec4_output() */
516         ip = mtod(m, struct ip *);
517         hlen = ip->ip_hl << 2;
518         if (ro->ro_rt == NULL) {
519                 if ((flags & IP_ROUTETOIF) == 0) {
520                         printf("ip_output: "
521                                 "can't update route after IPsec processing\n");
522                         error = EHOSTUNREACH;   /*XXX*/
523                         goto bad;
524                 }
525         } else {
526                 if (state.encap) {
527                         ia = ifatoia(ro->ro_rt->rt_ifa);
528                         ifp = ro->ro_rt->rt_ifp;
529                 }
530         }
531     }
532
533         /* make it flipped, again. */
534         ip->ip_len = ntohs(ip->ip_len);
535         ip->ip_off = ntohs(ip->ip_off);
536 skip_ipsec:
537 #endif /*IPSEC*/
538 #ifdef FAST_IPSEC
539         /*
540          * Check the security policy (SP) for the packet and, if
541          * required, do IPsec-related processing.  There are two
542          * cases here; the first time a packet is sent through
543          * it will be untagged and handled by ipsec4_checkpolicy.
544          * If the packet is resubmitted to ip_output (e.g. after
545          * AH, ESP, etc. processing), there will be a tag to bypass
546          * the lookup and related policy checking.
547          */
548         mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
549         s = splnet();
550         if (mtag != NULL) {
551                 tdbi = (struct tdb_ident *)(mtag + 1);
552                 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
553                 if (sp == NULL)
554                         error = -EINVAL;        /* force silent drop */
555                 m_tag_delete(m, mtag);
556         } else {
557                 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
558                                         &error, inp);
559         }
560         /*
561          * There are four return cases:
562          *    sp != NULL                    apply IPsec policy
563          *    sp == NULL, error == 0        no IPsec handling needed
564          *    sp == NULL, error == -EINVAL  discard packet w/o error
565          *    sp == NULL, error != 0        discard packet, report error
566          */
567         if (sp != NULL) {
568                 /* Loop detection, check if ipsec processing already done */
569                 KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
570                 for (mtag = m_tag_first(m); mtag != NULL;
571                      mtag = m_tag_next(m, mtag)) {
572                         if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
573                                 continue;
574                         if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
575                             mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
576                                 continue;
577                         /*
578                          * Check if policy has an SA associated with it.
579                          * This can happen when an SP has yet to acquire
580                          * an SA; e.g. on first reference.  If it occurs,
581                          * then we let ipsec4_process_packet do its thing.
582                          */
583                         if (sp->req->sav == NULL)
584                                 break;
585                         tdbi = (struct tdb_ident *)(mtag + 1);
586                         if (tdbi->spi == sp->req->sav->spi &&
587                             tdbi->proto == sp->req->sav->sah->saidx.proto &&
588                             bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
589                                  sizeof (union sockaddr_union)) == 0) {
590                                 /*
591                                  * No IPsec processing is needed, free
592                                  * reference to SP.
593                                  *
594                                  * NB: null pointer to avoid free at
595                                  *     done: below.
596                                  */
597                                 KEY_FREESP(&sp), sp = NULL;
598                                 splx(s);
599                                 goto spd_done;
600                         }
601                 }
602
603                 /*
604                  * Do delayed checksums now because we send before
605                  * this is done in the normal processing path.
606                  */
607                 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
608                         in_delayed_cksum(m);
609                         m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
610                 }
611
612                 ip->ip_len = htons(ip->ip_len);
613                 ip->ip_off = htons(ip->ip_off);
614
615                 /* NB: callee frees mbuf */
616                 error = ipsec4_process_packet(m, sp->req, flags, 0);
617                 /*
618                  * Preserve KAME behaviour: ENOENT can be returned
619                  * when an SA acquire is in progress.  Don't propagate
620                  * this to user-level; it confuses applications.
621                  *
622                  * XXX this will go away when the SADB is redone.
623                  */
624                 if (error == ENOENT)
625                         error = 0;
626                 splx(s);
627                 goto done;
628         } else {
629                 splx(s);
630
631                 if (error != 0) {
632                         /*
633                          * Hack: -EINVAL is used to signal that a packet
634                          * should be silently discarded.  This is typically
635                          * because we asked key management for an SA and
636                          * it was delayed (e.g. kicked up to IKE).
637                          */
638                         if (error == -EINVAL)
639                                 error = 0;
640                         goto bad;
641                 } else {
642                         /* No IPsec processing for this packet. */
643                 }
644 #ifdef notyet
645                 /*
646                  * If deferred crypto processing is needed, check that
647                  * the interface supports it.
648                  */ 
649                 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
650                 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
651                         /* notify IPsec to do its own crypto */
652                         ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
653                         error = EHOSTUNREACH;
654                         goto bad;
655                 }
656 #endif
657         }
658 spd_done:
659 #endif /* FAST_IPSEC */
660
661         /* Jump over all PFIL processing if hooks are not active. */
662         if (inet_pfil_hook.ph_busy_count == -1)
663                 goto passout;
664
665         /* Run through list of hooks for output packets. */
666         odst.s_addr = ip->ip_dst.s_addr;
667         error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT);
668         if (error != 0 || m == NULL)
669                 goto done;
670
671         ip = mtod(m, struct ip *);
672
673         /* See if destination IP address was changed by packet filter. */
674         if (odst.s_addr != ip->ip_dst.s_addr) {
675                 m->m_flags |= M_SKIP_FIREWALL;
676                 if (in_localip(ip->ip_dst)) {
677                         m->m_flags |= M_FASTFWD_OURS;
678                         if (m->m_pkthdr.rcvif == NULL)
679                                 m->m_pkthdr.rcvif = ifunit("lo0");
680                         if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
681                                 m->m_pkthdr.csum_flags |=
682                                     CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
683                                 m->m_pkthdr.csum_data = 0xffff;
684                         }
685                         m->m_pkthdr.csum_flags |=
686                             CSUM_IP_CHECKED | CSUM_IP_VALID;
687
688                         error = netisr_queue(NETISR_IP, m);
689                         goto done;
690                 } else
691                         goto again;
692         }
693
694 #ifdef IPFIREWALL_FORWARD
695         /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
696         if (m->m_flags & M_FASTFWD_OURS) {
697                 if (m->m_pkthdr.rcvif == NULL)
698                         m->m_pkthdr.rcvif = ifunit("lo0");
699                 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
700                         m->m_pkthdr.csum_flags |=
701                             CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
702                         m->m_pkthdr.csum_data = 0xffff;
703                 }
704                 m->m_pkthdr.csum_flags |=
705                             CSUM_IP_CHECKED | CSUM_IP_VALID;
706
707                 error = netisr_queue(NETISR_IP, m);
708                 goto done;
709         }
710         /* Or forward to some other address? */
711         fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
712         if (fwd_tag) {
713                 if (!in_localip(ip->ip_src) && !in_localaddr(ip->ip_dst)) {
714                         dst = (struct sockaddr_in *)&ro->ro_dst;
715                         bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
716                         m->m_flags |= M_SKIP_FIREWALL;
717                         m_tag_delete(m, fwd_tag);
718                         goto again;
719                 } else {
720                         m_tag_delete(m, fwd_tag);
721                         /* Continue. */
722                 }
723         }
724 #endif
725
726 passout:
727         /* 127/8 must not appear on wire - RFC1122. */
728         if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
729             (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
730                 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
731                         ipstat.ips_badaddr++;
732                         error = EADDRNOTAVAIL;
733                         goto bad;
734                 }
735         }
736
737         m->m_pkthdr.csum_flags |= CSUM_IP;
738         sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
739         if (sw_csum & CSUM_DELAY_DATA) {
740                 in_delayed_cksum(m);
741                 sw_csum &= ~CSUM_DELAY_DATA;
742         }
743         m->m_pkthdr.csum_flags &= ifp->if_hwassist;
744
745         /*
746          * If small enough for interface, or the interface will take
747          * care of the fragmentation for us, can just send directly.
748          */
749         if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT &&
750             ((ip->ip_off & IP_DF) == 0))) {
751                 ip->ip_len = htons(ip->ip_len);
752                 ip->ip_off = htons(ip->ip_off);
753                 ip->ip_sum = 0;
754                 if (sw_csum & CSUM_DELAY_IP)
755                         ip->ip_sum = in_cksum(m, hlen);
756
757                 /* Record statistics for this interface address. */
758                 if (!(flags & IP_FORWARDING) && ia) {
759                         ia->ia_ifa.if_opackets++;
760                         ia->ia_ifa.if_obytes += m->m_pkthdr.len;
761                 }
762
763 #ifdef IPSEC
764                 /* clean ipsec history once it goes out of the node */
765                 ipsec_delaux(m);
766 #endif
767
768 #ifdef MBUF_STRESS_TEST
769                 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
770                         m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
771 #endif
772                 error = (*ifp->if_output)(ifp, m,
773                                 (struct sockaddr *)dst, ro->ro_rt);
774                 goto done;
775         }
776
777         if (ip->ip_off & IP_DF) {
778                 error = EMSGSIZE;
779                 /*
780                  * This case can happen if the user changed the MTU
781                  * of an interface after enabling IP on it.  Because
782                  * most netifs don't keep track of routes pointing to
783                  * them, there is no way for one to update all its
784                  * routes when the MTU is changed.
785                  */
786                 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
787                     (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
788                         ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
789                 }
790                 ipstat.ips_cantfrag++;
791                 goto bad;
792         }
793
794         /*
795          * Too large for interface; fragment if possible. If successful,
796          * on return, m will point to a list of packets to be sent.
797          */
798         error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
799         if (error)
800                 goto bad;
801         for (; m; m = m0) {
802                 m0 = m->m_nextpkt;
803                 m->m_nextpkt = 0;
804 #ifdef IPSEC
805                 /* clean ipsec history once it goes out of the node */
806                 ipsec_delaux(m);
807 #endif
808                 if (error == 0) {
809                         /* Record statistics for this interface address. */
810                         if (ia != NULL) {
811                                 ia->ia_ifa.if_opackets++;
812                                 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
813                         }
814                         
815                         error = (*ifp->if_output)(ifp, m,
816                             (struct sockaddr *)dst, ro->ro_rt);
817                 } else
818                         m_freem(m);
819         }
820
821         if (error == 0)
822                 ipstat.ips_fragmented++;
823
824 done:
825         if (ro == &iproute && ro->ro_rt) {
826                 RTFREE(ro->ro_rt);
827         }
828 #ifdef IPSEC
829         if (sp != NULL) {
830                 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
831                         printf("DP ip_output call free SP:%p\n", sp));
832                 key_freesp(sp);
833         }
834 #endif
835 #ifdef FAST_IPSEC
836         if (sp != NULL)
837                 KEY_FREESP(&sp);
838 #endif
839         return (error);
840 bad:
841         m_freem(m);
842         goto done;
843 }
844
845 /*
846  * Create a chain of fragments which fit the given mtu. m_frag points to the
847  * mbuf to be fragmented; on return it points to the chain with the fragments.
848  * Return 0 if no error. If error, m_frag may contain a partially built
849  * chain of fragments that should be freed by the caller.
850  *
851  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
852  * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
853  */
854 int
855 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
856             u_long if_hwassist_flags, int sw_csum)
857 {
858         int error = 0;
859         int hlen = ip->ip_hl << 2;
860         int len = (mtu - hlen) & ~7;    /* size of payload in each fragment */
861         int off;
862         struct mbuf *m0 = *m_frag;      /* the original packet          */
863         int firstlen;
864         struct mbuf **mnext;
865         int nfrags;
866
867         if (ip->ip_off & IP_DF) {       /* Fragmentation not allowed */
868                 ipstat.ips_cantfrag++;
869                 return EMSGSIZE;
870         }
871
872         /*
873          * Must be able to put at least 8 bytes per fragment.
874          */
875         if (len < 8)
876                 return EMSGSIZE;
877
878         /*
879          * If the interface will not calculate checksums on
880          * fragmented packets, then do it here.
881          */
882         if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
883             (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
884                 in_delayed_cksum(m0);
885                 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
886         }
887
888         if (len > PAGE_SIZE) {
889                 /* 
890                  * Fragment large datagrams such that each segment 
891                  * contains a multiple of PAGE_SIZE amount of data, 
892                  * plus headers. This enables a receiver to perform 
893                  * page-flipping zero-copy optimizations.
894                  *
895                  * XXX When does this help given that sender and receiver
896                  * could have different page sizes, and also mtu could
897                  * be less than the receiver's page size ?
898                  */
899                 int newlen;
900                 struct mbuf *m;
901
902                 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
903                         off += m->m_len;
904
905                 /*
906                  * firstlen (off - hlen) must be aligned on an 
907                  * 8-byte boundary
908                  */
909                 if (off < hlen)
910                         goto smart_frag_failure;
911                 off = ((off - hlen) & ~7) + hlen;
912                 newlen = (~PAGE_MASK) & mtu;
913                 if ((newlen + sizeof (struct ip)) > mtu) {
914                         /* we failed, go back the default */
915 smart_frag_failure:
916                         newlen = len;
917                         off = hlen + len;
918                 }
919                 len = newlen;
920
921         } else {
922                 off = hlen + len;
923         }
924
925         firstlen = off - hlen;
926         mnext = &m0->m_nextpkt;         /* pointer to next packet */
927
928         /*
929          * Loop through length of segment after first fragment,
930          * make new header and copy data of each part and link onto chain.
931          * Here, m0 is the original packet, m is the fragment being created.
932          * The fragments are linked off the m_nextpkt of the original
933          * packet, which after processing serves as the first fragment.
934          */
935         for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
936                 struct ip *mhip;        /* ip header on the fragment */
937                 struct mbuf *m;
938                 int mhlen = sizeof (struct ip);
939
940                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
941                 if (m == NULL) {
942                         error = ENOBUFS;
943                         ipstat.ips_odropped++;
944                         goto done;
945                 }
946                 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
947                 /*
948                  * In the first mbuf, leave room for the link header, then
949                  * copy the original IP header including options. The payload
950                  * goes into an additional mbuf chain returned by m_copy().
951                  */
952                 m->m_data += max_linkhdr;
953                 mhip = mtod(m, struct ip *);
954                 *mhip = *ip;
955                 if (hlen > sizeof (struct ip)) {
956                         mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
957                         mhip->ip_v = IPVERSION;
958                         mhip->ip_hl = mhlen >> 2;
959                 }
960                 m->m_len = mhlen;
961                 /* XXX do we need to add ip->ip_off below ? */
962                 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
963                 if (off + len >= ip->ip_len) {  /* last fragment */
964                         len = ip->ip_len - off;
965                         m->m_flags |= M_LASTFRAG;
966                 } else
967                         mhip->ip_off |= IP_MF;
968                 mhip->ip_len = htons((u_short)(len + mhlen));
969                 m->m_next = m_copy(m0, off, len);
970                 if (m->m_next == NULL) {        /* copy failed */
971                         m_free(m);
972                         error = ENOBUFS;        /* ??? */
973                         ipstat.ips_odropped++;
974                         goto done;
975                 }
976                 m->m_pkthdr.len = mhlen + len;
977                 m->m_pkthdr.rcvif = (struct ifnet *)0;
978 #ifdef MAC
979                 mac_create_fragment(m0, m);
980 #endif
981                 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
982                 mhip->ip_off = htons(mhip->ip_off);
983                 mhip->ip_sum = 0;
984                 if (sw_csum & CSUM_DELAY_IP)
985                         mhip->ip_sum = in_cksum(m, mhlen);
986                 *mnext = m;
987                 mnext = &m->m_nextpkt;
988         }
989         ipstat.ips_ofragments += nfrags;
990
991         /* set first marker for fragment chain */
992         m0->m_flags |= M_FIRSTFRAG | M_FRAG;
993         m0->m_pkthdr.csum_data = nfrags;
994
995         /*
996          * Update first fragment by trimming what's been copied out
997          * and updating header.
998          */
999         m_adj(m0, hlen + firstlen - ip->ip_len);
1000         m0->m_pkthdr.len = hlen + firstlen;
1001         ip->ip_len = htons((u_short)m0->m_pkthdr.len);
1002         ip->ip_off |= IP_MF;
1003         ip->ip_off = htons(ip->ip_off);
1004         ip->ip_sum = 0;
1005         if (sw_csum & CSUM_DELAY_IP)
1006                 ip->ip_sum = in_cksum(m0, hlen);
1007
1008 done:
1009         *m_frag = m0;
1010         return error;
1011 }
1012
1013 void
1014 in_delayed_cksum(struct mbuf *m)
1015 {
1016         struct ip *ip;
1017         u_short csum, offset;
1018
1019         ip = mtod(m, struct ip *);
1020         offset = ip->ip_hl << 2 ;
1021         csum = in_cksum_skip(m, ip->ip_len, offset);
1022         if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1023                 csum = 0xffff;
1024         offset += m->m_pkthdr.csum_data;        /* checksum offset */
1025
1026         if (offset + sizeof(u_short) > m->m_len) {
1027                 printf("delayed m_pullup, m->len: %d  off: %d  p: %d\n",
1028                     m->m_len, offset, ip->ip_p);
1029                 /*
1030                  * XXX
1031                  * this shouldn't happen, but if it does, the
1032                  * correct behavior may be to insert the checksum
1033                  * in the existing chain instead of rearranging it.
1034                  */
1035                 m = m_pullup(m, offset + sizeof(u_short));
1036         }
1037         *(u_short *)(m->m_data + offset) = csum;
1038 }
1039
1040 /*
1041  * Insert IP options into preformed packet.
1042  * Adjust IP destination as required for IP source routing,
1043  * as indicated by a non-zero in_addr at the start of the options.
1044  *
1045  * XXX This routine assumes that the packet has no options in place.
1046  */
1047 static struct mbuf *
1048 ip_insertoptions(m, opt, phlen)
1049         register struct mbuf *m;
1050         struct mbuf *opt;
1051         int *phlen;
1052 {
1053         register struct ipoption *p = mtod(opt, struct ipoption *);
1054         struct mbuf *n;
1055         register struct ip *ip = mtod(m, struct ip *);
1056         unsigned optlen;
1057
1058         optlen = opt->m_len - sizeof(p->ipopt_dst);
1059         if (optlen + ip->ip_len > IP_MAXPACKET) {
1060                 *phlen = 0;
1061                 return (m);             /* XXX should fail */
1062         }
1063         if (p->ipopt_dst.s_addr)
1064                 ip->ip_dst = p->ipopt_dst;
1065         if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1066                 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1067                 if (n == NULL) {
1068                         *phlen = 0;
1069                         return (m);
1070                 }
1071                 n->m_pkthdr.rcvif = (struct ifnet *)0;
1072 #ifdef MAC
1073                 mac_create_mbuf_from_mbuf(m, n);
1074 #endif
1075                 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1076                 m->m_len -= sizeof(struct ip);
1077                 m->m_data += sizeof(struct ip);
1078                 n->m_next = m;
1079                 m = n;
1080                 m->m_len = optlen + sizeof(struct ip);
1081                 m->m_data += max_linkhdr;
1082                 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1083         } else {
1084                 m->m_data -= optlen;
1085                 m->m_len += optlen;
1086                 m->m_pkthdr.len += optlen;
1087                 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1088         }
1089         ip = mtod(m, struct ip *);
1090         bcopy(p->ipopt_list, ip + 1, optlen);
1091         *phlen = sizeof(struct ip) + optlen;
1092         ip->ip_v = IPVERSION;
1093         ip->ip_hl = *phlen >> 2;
1094         ip->ip_len += optlen;
1095         return (m);
1096 }
1097
1098 /*
1099  * Copy options from ip to jp,
1100  * omitting those not copied during fragmentation.
1101  */
1102 int
1103 ip_optcopy(ip, jp)
1104         struct ip *ip, *jp;
1105 {
1106         register u_char *cp, *dp;
1107         int opt, optlen, cnt;
1108
1109         cp = (u_char *)(ip + 1);
1110         dp = (u_char *)(jp + 1);
1111         cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1112         for (; cnt > 0; cnt -= optlen, cp += optlen) {
1113                 opt = cp[0];
1114                 if (opt == IPOPT_EOL)
1115                         break;
1116                 if (opt == IPOPT_NOP) {
1117                         /* Preserve for IP mcast tunnel's LSRR alignment. */
1118                         *dp++ = IPOPT_NOP;
1119                         optlen = 1;
1120                         continue;
1121                 }
1122
1123                 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
1124                     ("ip_optcopy: malformed ipv4 option"));
1125                 optlen = cp[IPOPT_OLEN];
1126                 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
1127                     ("ip_optcopy: malformed ipv4 option"));
1128
1129                 /* bogus lengths should have been caught by ip_dooptions */
1130                 if (optlen > cnt)
1131                         optlen = cnt;
1132                 if (IPOPT_COPIED(opt)) {
1133                         bcopy(cp, dp, optlen);
1134                         dp += optlen;
1135                 }
1136         }
1137         for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1138                 *dp++ = IPOPT_EOL;
1139         return (optlen);
1140 }
1141
1142 /*
1143  * IP socket option processing.
1144  */
1145 int
1146 ip_ctloutput(so, sopt)
1147         struct socket *so;
1148         struct sockopt *sopt;
1149 {
1150         struct  inpcb *inp = sotoinpcb(so);
1151         int     error, optval;
1152
1153         error = optval = 0;
1154         if (sopt->sopt_level != IPPROTO_IP) {
1155                 return (EINVAL);
1156         }
1157
1158         switch (sopt->sopt_dir) {
1159         case SOPT_SET:
1160                 switch (sopt->sopt_name) {
1161                 case IP_OPTIONS:
1162 #ifdef notyet
1163                 case IP_RETOPTS:
1164 #endif
1165                 {
1166                         struct mbuf *m;
1167                         if (sopt->sopt_valsize > MLEN) {
1168                                 error = EMSGSIZE;
1169                                 break;
1170                         }
1171                         MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER);
1172                         if (m == NULL) {
1173                                 error = ENOBUFS;
1174                                 break;
1175                         }
1176                         m->m_len = sopt->sopt_valsize;
1177                         error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1178                                             m->m_len);
1179                         
1180                         return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1181                                            m));
1182                 }
1183
1184                 case IP_TOS:
1185                 case IP_TTL:
1186                 case IP_RECVOPTS:
1187                 case IP_RECVRETOPTS:
1188                 case IP_RECVDSTADDR:
1189                 case IP_RECVTTL:
1190                 case IP_RECVIF:
1191                 case IP_FAITH:
1192                 case IP_ONESBCAST:
1193                         error = sooptcopyin(sopt, &optval, sizeof optval,
1194                                             sizeof optval);
1195                         if (error)
1196                                 break;
1197
1198                         switch (sopt->sopt_name) {
1199                         case IP_TOS:
1200                                 inp->inp_ip_tos = optval;
1201                                 break;
1202
1203                         case IP_TTL:
1204                                 inp->inp_ip_ttl = optval;
1205                                 break;
1206 #define OPTSET(bit) do {                                                \
1207         INP_LOCK(inp);                                                  \
1208         if (optval)                                                     \
1209                 inp->inp_flags |= bit;                                  \
1210         else                                                            \
1211                 inp->inp_flags &= ~bit;                                 \
1212         INP_UNLOCK(inp);                                                \
1213 } while (0)
1214
1215                         case IP_RECVOPTS:
1216                                 OPTSET(INP_RECVOPTS);
1217                                 break;
1218
1219                         case IP_RECVRETOPTS:
1220                                 OPTSET(INP_RECVRETOPTS);
1221                                 break;
1222
1223                         case IP_RECVDSTADDR:
1224                                 OPTSET(INP_RECVDSTADDR);
1225                                 break;
1226
1227                         case IP_RECVTTL:
1228                                 OPTSET(INP_RECVTTL);
1229                                 break;
1230
1231                         case IP_RECVIF:
1232                                 OPTSET(INP_RECVIF);
1233                                 break;
1234
1235                         case IP_FAITH:
1236                                 OPTSET(INP_FAITH);
1237                                 break;
1238
1239                         case IP_ONESBCAST:
1240                                 OPTSET(INP_ONESBCAST);
1241                                 break;
1242                         }
1243                         break;
1244 #undef OPTSET
1245
1246                 case IP_MULTICAST_IF:
1247                 case IP_MULTICAST_VIF:
1248                 case IP_MULTICAST_TTL:
1249                 case IP_MULTICAST_LOOP:
1250                 case IP_ADD_MEMBERSHIP:
1251                 case IP_DROP_MEMBERSHIP:
1252                         error = ip_setmoptions(sopt, &inp->inp_moptions);
1253                         break;
1254
1255                 case IP_PORTRANGE:
1256                         error = sooptcopyin(sopt, &optval, sizeof optval,
1257                                             sizeof optval);
1258                         if (error)
1259                                 break;
1260
1261                         INP_LOCK(inp);
1262                         switch (optval) {
1263                         case IP_PORTRANGE_DEFAULT:
1264                                 inp->inp_flags &= ~(INP_LOWPORT);
1265                                 inp->inp_flags &= ~(INP_HIGHPORT);
1266                                 break;
1267
1268                         case IP_PORTRANGE_HIGH:
1269                                 inp->inp_flags &= ~(INP_LOWPORT);
1270                                 inp->inp_flags |= INP_HIGHPORT;
1271                                 break;
1272
1273                         case IP_PORTRANGE_LOW:
1274                                 inp->inp_flags &= ~(INP_HIGHPORT);
1275                                 inp->inp_flags |= INP_LOWPORT;
1276                                 break;
1277
1278                         default:
1279                                 error = EINVAL;
1280                                 break;
1281                         }
1282                         INP_UNLOCK(inp);
1283                         break;
1284
1285 #if defined(IPSEC) || defined(FAST_IPSEC)
1286                 case IP_IPSEC_POLICY:
1287                 {
1288                         caddr_t req;
1289                         size_t len = 0;
1290                         int priv;
1291                         struct mbuf *m;
1292                         int optname;
1293
1294                         if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1295                                 break;
1296                         if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1297                                 break;
1298                         priv = (sopt->sopt_td != NULL &&
1299                                 suser(sopt->sopt_td) != 0) ? 0 : 1;
1300                         req = mtod(m, caddr_t);
1301                         len = m->m_len;
1302                         optname = sopt->sopt_name;
1303                         error = ipsec4_set_policy(inp, optname, req, len, priv);
1304                         m_freem(m);
1305                         break;
1306                 }
1307 #endif /*IPSEC*/
1308
1309                 default:
1310                         error = ENOPROTOOPT;
1311                         break;
1312                 }
1313                 break;
1314
1315         case SOPT_GET:
1316                 switch (sopt->sopt_name) {
1317                 case IP_OPTIONS:
1318                 case IP_RETOPTS:
1319                         if (inp->inp_options)
1320                                 error = sooptcopyout(sopt, 
1321                                                      mtod(inp->inp_options,
1322                                                           char *),
1323                                                      inp->inp_options->m_len);
1324                         else
1325                                 sopt->sopt_valsize = 0;
1326                         break;
1327
1328                 case IP_TOS:
1329                 case IP_TTL:
1330                 case IP_RECVOPTS:
1331                 case IP_RECVRETOPTS:
1332                 case IP_RECVDSTADDR:
1333                 case IP_RECVTTL:
1334                 case IP_RECVIF:
1335                 case IP_PORTRANGE:
1336                 case IP_FAITH:
1337                 case IP_ONESBCAST:
1338                         switch (sopt->sopt_name) {
1339
1340                         case IP_TOS:
1341                                 optval = inp->inp_ip_tos;
1342                                 break;
1343
1344                         case IP_TTL:
1345                                 optval = inp->inp_ip_ttl;
1346                                 break;
1347
1348 #define OPTBIT(bit)     (inp->inp_flags & bit ? 1 : 0)
1349
1350                         case IP_RECVOPTS:
1351                                 optval = OPTBIT(INP_RECVOPTS);
1352                                 break;
1353
1354                         case IP_RECVRETOPTS:
1355                                 optval = OPTBIT(INP_RECVRETOPTS);
1356                                 break;
1357
1358                         case IP_RECVDSTADDR:
1359                                 optval = OPTBIT(INP_RECVDSTADDR);
1360                                 break;
1361
1362                         case IP_RECVTTL:
1363                                 optval = OPTBIT(INP_RECVTTL);
1364                                 break;
1365
1366                         case IP_RECVIF:
1367                                 optval = OPTBIT(INP_RECVIF);
1368                                 break;
1369
1370                         case IP_PORTRANGE:
1371                                 if (inp->inp_flags & INP_HIGHPORT)
1372                                         optval = IP_PORTRANGE_HIGH;
1373                                 else if (inp->inp_flags & INP_LOWPORT)
1374                                         optval = IP_PORTRANGE_LOW;
1375                                 else
1376                                         optval = 0;
1377                                 break;
1378
1379                         case IP_FAITH:
1380                                 optval = OPTBIT(INP_FAITH);
1381                                 break;
1382
1383                         case IP_ONESBCAST:
1384                                 optval = OPTBIT(INP_ONESBCAST);
1385                                 break;
1386                         }
1387                         error = sooptcopyout(sopt, &optval, sizeof optval);
1388                         break;
1389
1390                 case IP_MULTICAST_IF:
1391                 case IP_MULTICAST_VIF:
1392                 case IP_MULTICAST_TTL:
1393                 case IP_MULTICAST_LOOP:
1394                 case IP_ADD_MEMBERSHIP:
1395                 case IP_DROP_MEMBERSHIP:
1396                         error = ip_getmoptions(sopt, inp->inp_moptions);
1397                         break;
1398
1399 #if defined(IPSEC) || defined(FAST_IPSEC)
1400                 case IP_IPSEC_POLICY:
1401                 {
1402                         struct mbuf *m = NULL;
1403                         caddr_t req = NULL;
1404                         size_t len = 0;
1405
1406                         if (m != 0) {
1407                                 req = mtod(m, caddr_t);
1408                                 len = m->m_len;
1409                         }
1410                         error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1411                         if (error == 0)
1412                                 error = soopt_mcopyout(sopt, m); /* XXX */
1413                         if (error == 0)
1414                                 m_freem(m);
1415                         break;
1416                 }
1417 #endif /*IPSEC*/
1418
1419                 default:
1420                         error = ENOPROTOOPT;
1421                         break;
1422                 }
1423                 break;
1424         }
1425         return (error);
1426 }
1427
1428 /*
1429  * Set up IP options in pcb for insertion in output packets.
1430  * Store in mbuf with pointer in pcbopt, adding pseudo-option
1431  * with destination address if source routed.
1432  */
1433 static int
1434 ip_pcbopts(optname, pcbopt, m)
1435         int optname;
1436         struct mbuf **pcbopt;
1437         register struct mbuf *m;
1438 {
1439         register int cnt, optlen;
1440         register u_char *cp;
1441         u_char opt;
1442
1443         /* turn off any old options */
1444         if (*pcbopt)
1445                 (void)m_free(*pcbopt);
1446         *pcbopt = 0;
1447         if (m == (struct mbuf *)0 || m->m_len == 0) {
1448                 /*
1449                  * Only turning off any previous options.
1450                  */
1451                 if (m)
1452                         (void)m_free(m);
1453                 return (0);
1454         }
1455
1456         if (m->m_len % sizeof(int32_t))
1457                 goto bad;
1458         /*
1459          * IP first-hop destination address will be stored before
1460          * actual options; move other options back
1461          * and clear it when none present.
1462          */
1463         if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1464                 goto bad;
1465         cnt = m->m_len;
1466         m->m_len += sizeof(struct in_addr);
1467         cp = mtod(m, u_char *) + sizeof(struct in_addr);
1468         bcopy(mtod(m, void *), cp, (unsigned)cnt);
1469         bzero(mtod(m, void *), sizeof(struct in_addr));
1470
1471         for (; cnt > 0; cnt -= optlen, cp += optlen) {
1472                 opt = cp[IPOPT_OPTVAL];
1473                 if (opt == IPOPT_EOL)
1474                         break;
1475                 if (opt == IPOPT_NOP)
1476                         optlen = 1;
1477                 else {
1478                         if (cnt < IPOPT_OLEN + sizeof(*cp))
1479                                 goto bad;
1480                         optlen = cp[IPOPT_OLEN];
1481                         if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1482                                 goto bad;
1483                 }
1484                 switch (opt) {
1485
1486                 default:
1487                         break;
1488
1489                 case IPOPT_LSRR:
1490                 case IPOPT_SSRR:
1491                         /*
1492                          * user process specifies route as:
1493                          *      ->A->B->C->D
1494                          * D must be our final destination (but we can't
1495                          * check that since we may not have connected yet).
1496                          * A is first hop destination, which doesn't appear in
1497                          * actual IP option, but is stored before the options.
1498                          */
1499                         if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1500                                 goto bad;
1501                         m->m_len -= sizeof(struct in_addr);
1502                         cnt -= sizeof(struct in_addr);
1503                         optlen -= sizeof(struct in_addr);
1504                         cp[IPOPT_OLEN] = optlen;
1505                         /*
1506                          * Move first hop before start of options.
1507                          */
1508                         bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1509                             sizeof(struct in_addr));
1510                         /*
1511                          * Then copy rest of options back
1512                          * to close up the deleted entry.
1513                          */
1514                         bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
1515                             &cp[IPOPT_OFFSET+1],
1516                             (unsigned)cnt - (IPOPT_MINOFF - 1));
1517                         break;
1518                 }
1519         }
1520         if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1521                 goto bad;
1522         *pcbopt = m;
1523         return (0);
1524
1525 bad:
1526         (void)m_free(m);
1527         return (EINVAL);
1528 }
1529
1530 /*
1531  * XXX
1532  * The whole multicast option thing needs to be re-thought.
1533  * Several of these options are equally applicable to non-multicast
1534  * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1535  * standard option (IP_TTL).
1536  */
1537
1538 /*
1539  * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1540  */
1541 static struct ifnet *
1542 ip_multicast_if(a, ifindexp)
1543         struct in_addr *a;
1544         int *ifindexp;
1545 {
1546         int ifindex;
1547         struct ifnet *ifp;
1548
1549         if (ifindexp)
1550                 *ifindexp = 0;
1551         if (ntohl(a->s_addr) >> 24 == 0) {
1552                 ifindex = ntohl(a->s_addr) & 0xffffff;
1553                 if (ifindex < 0 || if_index < ifindex)
1554                         return NULL;
1555                 ifp = ifnet_byindex(ifindex);
1556                 if (ifindexp)
1557                         *ifindexp = ifindex;
1558         } else {
1559                 INADDR_TO_IFP(*a, ifp);
1560         }
1561         return ifp;
1562 }
1563
1564 /*
1565  * Set the IP multicast options in response to user setsockopt().
1566  */
1567 static int
1568 ip_setmoptions(sopt, imop)
1569         struct sockopt *sopt;
1570         struct ip_moptions **imop;
1571 {
1572         int error = 0;
1573         int i;
1574         struct in_addr addr;
1575         struct ip_mreq mreq;
1576         struct ifnet *ifp;
1577         struct ip_moptions *imo = *imop;
1578         struct route ro;
1579         struct sockaddr_in *dst;
1580         int ifindex;
1581         int s;
1582
1583         if (imo == NULL) {
1584                 /*
1585                  * No multicast option buffer attached to the pcb;
1586                  * allocate one and initialize to default values.
1587                  */
1588                 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1589                     M_WAITOK);
1590
1591                 if (imo == NULL)
1592                         return (ENOBUFS);
1593                 *imop = imo;
1594                 imo->imo_multicast_ifp = NULL;
1595                 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1596                 imo->imo_multicast_vif = -1;
1597                 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1598                 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1599                 imo->imo_num_memberships = 0;
1600         }
1601
1602         switch (sopt->sopt_name) {
1603         /* store an index number for the vif you wanna use in the send */
1604         case IP_MULTICAST_VIF:
1605                 if (legal_vif_num == 0) {
1606                         error = EOPNOTSUPP;
1607                         break;
1608                 }
1609                 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1610                 if (error)
1611                         break;
1612                 if (!legal_vif_num(i) && (i != -1)) {
1613                         error = EINVAL;
1614                         break;
1615                 }
1616                 imo->imo_multicast_vif = i;
1617                 break;
1618
1619         case IP_MULTICAST_IF:
1620                 /*
1621                  * Select the interface for outgoing multicast packets.
1622                  */
1623                 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1624                 if (error)
1625                         break;
1626                 /*
1627                  * INADDR_ANY is used to remove a previous selection.
1628                  * When no interface is selected, a default one is
1629                  * chosen every time a multicast packet is sent.
1630                  */
1631                 if (addr.s_addr == INADDR_ANY) {
1632                         imo->imo_multicast_ifp = NULL;
1633                         break;
1634                 }
1635                 /*
1636                  * The selected interface is identified by its local
1637                  * IP address.  Find the interface and confirm that
1638                  * it supports multicasting.
1639                  */
1640                 s = splimp();
1641                 ifp = ip_multicast_if(&addr, &ifindex);
1642                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1643                         splx(s);
1644                         error = EADDRNOTAVAIL;
1645                         break;
1646                 }
1647                 imo->imo_multicast_ifp = ifp;
1648                 if (ifindex)
1649                         imo->imo_multicast_addr = addr;
1650                 else
1651                         imo->imo_multicast_addr.s_addr = INADDR_ANY;
1652                 splx(s);
1653                 break;
1654
1655         case IP_MULTICAST_TTL:
1656                 /*
1657                  * Set the IP time-to-live for outgoing multicast packets.
1658                  * The original multicast API required a char argument,
1659                  * which is inconsistent with the rest of the socket API.
1660                  * We allow either a char or an int.
1661                  */
1662                 if (sopt->sopt_valsize == 1) {
1663                         u_char ttl;
1664                         error = sooptcopyin(sopt, &ttl, 1, 1);
1665                         if (error)
1666                                 break;
1667                         imo->imo_multicast_ttl = ttl;
1668                 } else {
1669                         u_int ttl;
1670                         error = sooptcopyin(sopt, &ttl, sizeof ttl, 
1671                                             sizeof ttl);
1672                         if (error)
1673                                 break;
1674                         if (ttl > 255)
1675                                 error = EINVAL;
1676                         else
1677                                 imo->imo_multicast_ttl = ttl;
1678                 }
1679                 break;
1680
1681         case IP_MULTICAST_LOOP:
1682                 /*
1683                  * Set the loopback flag for outgoing multicast packets.
1684                  * Must be zero or one.  The original multicast API required a
1685                  * char argument, which is inconsistent with the rest
1686                  * of the socket API.  We allow either a char or an int.
1687                  */
1688                 if (sopt->sopt_valsize == 1) {
1689                         u_char loop;
1690                         error = sooptcopyin(sopt, &loop, 1, 1);
1691                         if (error)
1692                                 break;
1693                         imo->imo_multicast_loop = !!loop;
1694                 } else {
1695                         u_int loop;
1696                         error = sooptcopyin(sopt, &loop, sizeof loop,
1697                                             sizeof loop);
1698                         if (error)
1699                                 break;
1700                         imo->imo_multicast_loop = !!loop;
1701                 }
1702                 break;
1703
1704         case IP_ADD_MEMBERSHIP:
1705                 /*
1706                  * Add a multicast group membership.
1707                  * Group must be a valid IP multicast address.
1708                  */
1709                 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1710                 if (error)
1711                         break;
1712
1713                 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1714                         error = EINVAL;
1715                         break;
1716                 }
1717                 s = splimp();
1718                 /*
1719                  * If no interface address was provided, use the interface of
1720                  * the route to the given multicast address.
1721                  */
1722                 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1723                         bzero((caddr_t)&ro, sizeof(ro));
1724                         dst = (struct sockaddr_in *)&ro.ro_dst;
1725                         dst->sin_len = sizeof(*dst);
1726                         dst->sin_family = AF_INET;
1727                         dst->sin_addr = mreq.imr_multiaddr;
1728                         rtalloc_ign(&ro, RTF_CLONING);
1729                         if (ro.ro_rt == NULL) {
1730                                 error = EADDRNOTAVAIL;
1731                                 splx(s);
1732                                 break;
1733                         }
1734                         ifp = ro.ro_rt->rt_ifp;
1735                         RTFREE(ro.ro_rt);
1736                 }
1737                 else {
1738                         ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1739                 }
1740
1741                 /*
1742                  * See if we found an interface, and confirm that it
1743                  * supports multicast.
1744                  */
1745                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1746                         error = EADDRNOTAVAIL;
1747                         splx(s);
1748                         break;
1749                 }
1750                 /*
1751                  * See if the membership already exists or if all the
1752                  * membership slots are full.
1753                  */
1754                 for (i = 0; i < imo->imo_num_memberships; ++i) {
1755                         if (imo->imo_membership[i]->inm_ifp == ifp &&
1756                             imo->imo_membership[i]->inm_addr.s_addr
1757                                                 == mreq.imr_multiaddr.s_addr)
1758                                 break;
1759                 }
1760                 if (i < imo->imo_num_memberships) {
1761                         error = EADDRINUSE;
1762                         splx(s);
1763                         break;
1764                 }
1765                 if (i == IP_MAX_MEMBERSHIPS) {
1766                         error = ETOOMANYREFS;
1767                         splx(s);
1768                         break;
1769                 }
1770                 /*
1771                  * Everything looks good; add a new record to the multicast
1772                  * address list for the given interface.
1773                  */
1774                 if ((imo->imo_membership[i] =
1775                     in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1776                         error = ENOBUFS;
1777                         splx(s);
1778                         break;
1779                 }
1780                 ++imo->imo_num_memberships;
1781                 splx(s);
1782                 break;
1783
1784         case IP_DROP_MEMBERSHIP:
1785                 /*
1786                  * Drop a multicast group membership.
1787                  * Group must be a valid IP multicast address.
1788                  */
1789                 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1790                 if (error)
1791                         break;
1792
1793                 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1794                         error = EINVAL;
1795                         break;
1796                 }
1797
1798                 s = splimp();
1799                 /*
1800                  * If an interface address was specified, get a pointer
1801                  * to its ifnet structure.
1802                  */
1803                 if (mreq.imr_interface.s_addr == INADDR_ANY)
1804                         ifp = NULL;
1805                 else {
1806                         ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1807                         if (ifp == NULL) {
1808                                 error = EADDRNOTAVAIL;
1809                                 splx(s);
1810                                 break;
1811                         }
1812                 }
1813                 /*
1814                  * Find the membership in the membership array.
1815                  */
1816                 for (i = 0; i < imo->imo_num_memberships; ++i) {
1817                         if ((ifp == NULL ||
1818                              imo->imo_membership[i]->inm_ifp == ifp) &&
1819                              imo->imo_membership[i]->inm_addr.s_addr ==
1820                              mreq.imr_multiaddr.s_addr)
1821                                 break;
1822                 }
1823                 if (i == imo->imo_num_memberships) {
1824                         error = EADDRNOTAVAIL;
1825                         splx(s);
1826                         break;
1827                 }
1828                 /*
1829                  * Give up the multicast address record to which the
1830                  * membership points.
1831                  */
1832                 in_delmulti(imo->imo_membership[i]);
1833                 /*
1834                  * Remove the gap in the membership array.
1835                  */
1836                 for (++i; i < imo->imo_num_memberships; ++i)
1837                         imo->imo_membership[i-1] = imo->imo_membership[i];
1838                 --imo->imo_num_memberships;
1839                 splx(s);
1840                 break;
1841
1842         default:
1843                 error = EOPNOTSUPP;
1844                 break;
1845         }
1846
1847         /*
1848          * If all options have default values, no need to keep the mbuf.
1849          */
1850         if (imo->imo_multicast_ifp == NULL &&
1851             imo->imo_multicast_vif == -1 &&
1852             imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1853             imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1854             imo->imo_num_memberships == 0) {
1855                 free(*imop, M_IPMOPTS);
1856                 *imop = NULL;
1857         }
1858
1859         return (error);
1860 }
1861
1862 /*
1863  * Return the IP multicast options in response to user getsockopt().
1864  */
1865 static int
1866 ip_getmoptions(sopt, imo)
1867         struct sockopt *sopt;
1868         register struct ip_moptions *imo;
1869 {
1870         struct in_addr addr;
1871         struct in_ifaddr *ia;
1872         int error, optval;
1873         u_char coptval;
1874
1875         error = 0;
1876         switch (sopt->sopt_name) {
1877         case IP_MULTICAST_VIF: 
1878                 if (imo != NULL)
1879                         optval = imo->imo_multicast_vif;
1880                 else
1881                         optval = -1;
1882                 error = sooptcopyout(sopt, &optval, sizeof optval);
1883                 break;
1884
1885         case IP_MULTICAST_IF:
1886                 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1887                         addr.s_addr = INADDR_ANY;
1888                 else if (imo->imo_multicast_addr.s_addr) {
1889                         /* return the value user has set */
1890                         addr = imo->imo_multicast_addr;
1891                 } else {
1892                         IFP_TO_IA(imo->imo_multicast_ifp, ia);
1893                         addr.s_addr = (ia == NULL) ? INADDR_ANY
1894                                 : IA_SIN(ia)->sin_addr.s_addr;
1895                 }
1896                 error = sooptcopyout(sopt, &addr, sizeof addr);
1897                 break;
1898
1899         case IP_MULTICAST_TTL:
1900                 if (imo == 0)
1901                         optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1902                 else
1903                         optval = coptval = imo->imo_multicast_ttl;
1904                 if (sopt->sopt_valsize == 1)
1905                         error = sooptcopyout(sopt, &coptval, 1);
1906                 else
1907                         error = sooptcopyout(sopt, &optval, sizeof optval);
1908                 break;
1909
1910         case IP_MULTICAST_LOOP:
1911                 if (imo == 0)
1912                         optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1913                 else
1914                         optval = coptval = imo->imo_multicast_loop;
1915                 if (sopt->sopt_valsize == 1)
1916                         error = sooptcopyout(sopt, &coptval, 1);
1917                 else
1918                         error = sooptcopyout(sopt, &optval, sizeof optval);
1919                 break;
1920
1921         default:
1922                 error = ENOPROTOOPT;
1923                 break;
1924         }
1925         return (error);
1926 }
1927
1928 /*
1929  * Discard the IP multicast options.
1930  */
1931 void
1932 ip_freemoptions(imo)
1933         register struct ip_moptions *imo;
1934 {
1935         register int i;
1936
1937         if (imo != NULL) {
1938                 for (i = 0; i < imo->imo_num_memberships; ++i)
1939                         in_delmulti(imo->imo_membership[i]);
1940                 free(imo, M_IPMOPTS);
1941         }
1942 }
1943
1944 /*
1945  * Routine called from ip_output() to loop back a copy of an IP multicast
1946  * packet to the input queue of a specified interface.  Note that this
1947  * calls the output routine of the loopback "driver", but with an interface
1948  * pointer that might NOT be a loopback interface -- evil, but easier than
1949  * replicating that code here.
1950  */
1951 static void
1952 ip_mloopback(ifp, m, dst, hlen)
1953         struct ifnet *ifp;
1954         register struct mbuf *m;
1955         register struct sockaddr_in *dst;
1956         int hlen;
1957 {
1958         register struct ip *ip;
1959         struct mbuf *copym;
1960
1961         copym = m_copy(m, 0, M_COPYALL);
1962         if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1963                 copym = m_pullup(copym, hlen);
1964         if (copym != NULL) {
1965                 /* If needed, compute the checksum and mark it as valid. */
1966                 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1967                         in_delayed_cksum(copym);
1968                         copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1969                         copym->m_pkthdr.csum_flags |=
1970                             CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1971                         copym->m_pkthdr.csum_data = 0xffff;
1972                 }
1973                 /*
1974                  * We don't bother to fragment if the IP length is greater
1975                  * than the interface's MTU.  Can this possibly matter?
1976                  */
1977                 ip = mtod(copym, struct ip *);
1978                 ip->ip_len = htons(ip->ip_len);
1979                 ip->ip_off = htons(ip->ip_off);
1980                 ip->ip_sum = 0;
1981                 ip->ip_sum = in_cksum(copym, hlen);
1982                 /*
1983                  * NB:
1984                  * It's not clear whether there are any lingering
1985                  * reentrancy problems in other areas which might
1986                  * be exposed by using ip_input directly (in
1987                  * particular, everything which modifies the packet
1988                  * in-place).  Yet another option is using the
1989                  * protosw directly to deliver the looped back
1990                  * packet.  For the moment, we'll err on the side
1991                  * of safety by using if_simloop().
1992                  */
1993 #if 1 /* XXX */
1994                 if (dst->sin_family != AF_INET) {
1995                         printf("ip_mloopback: bad address family %d\n",
1996                                                 dst->sin_family);
1997                         dst->sin_family = AF_INET;
1998                 }
1999 #endif
2000
2001 #ifdef notdef
2002                 copym->m_pkthdr.rcvif = ifp;
2003                 ip_input(copym);
2004 #else
2005                 if_simloop(ifp, copym, dst->sin_family, 0);
2006 #endif
2007         }
2008 }