2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * $KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/domain.h>
42 #include <sys/eventhandler.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
53 #include <net/if_var.h>
54 #include <net/netisr.h>
55 #include <net/route.h>
58 #include <netinet/in.h>
59 #include <netinet/in_var.h>
60 #include <netinet/ip6.h>
61 #include <netinet6/ip6_var.h>
62 #include <netinet/icmp6.h>
63 #include <netinet/in_systm.h> /* For ECN definitions. */
64 #include <netinet/ip.h> /* For ECN definitions. */
67 #include <security/mac/mac_framework.h>
70 /* Reassembly headers are stored in hash buckets. */
71 #define IP6REASS_NHASH_LOG2 10
72 #define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2)
73 #define IP6REASS_HMASK (IP6REASS_NHASH - 1)
75 static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *,
76 uint32_t bucket __unused);
77 static void frag6_deq(struct ip6asfrag *, uint32_t bucket __unused);
78 static void frag6_insque_head(struct ip6q *, struct ip6q *,
80 static void frag6_remque(struct ip6q *, uint32_t bucket);
81 static void frag6_freef(struct ip6q *, uint32_t bucket);
89 static MALLOC_DEFINE(M_FRAG6, "frag6", "IPv6 fragment reassembly header");
91 /* System wide (global) maximum and count of packets in reassembly queues. */
92 static int ip6_maxfrags;
93 static volatile u_int frag6_nfrags = 0;
95 /* Maximum and current packets in per-VNET reassembly queue. */
96 VNET_DEFINE_STATIC(int, ip6_maxfragpackets);
97 VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets);
98 #define V_ip6_maxfragpackets VNET(ip6_maxfragpackets)
99 #define V_frag6_nfragpackets VNET(frag6_nfragpackets)
101 /* Maximum per-VNET reassembly queues per bucket and fragments per packet. */
102 VNET_DEFINE_STATIC(int, ip6_maxfragbucketsize);
103 VNET_DEFINE_STATIC(int, ip6_maxfragsperpacket);
104 #define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize)
105 #define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket)
107 /* Per-VNET reassembly queue buckets. */
108 VNET_DEFINE_STATIC(struct ip6qbucket, ip6qb[IP6REASS_NHASH]);
109 VNET_DEFINE_STATIC(uint32_t, ip6qb_hashseed);
110 #define V_ip6qb VNET(ip6qb)
111 #define V_ip6qb_hashseed VNET(ip6qb_hashseed)
113 #define IP6QB_LOCK(_b) mtx_lock(&V_ip6qb[(_b)].lock)
114 #define IP6QB_TRYLOCK(_b) mtx_trylock(&V_ip6qb[(_b)].lock)
115 #define IP6QB_LOCK_ASSERT(_b) mtx_assert(&V_ip6qb[(_b)].lock, MA_OWNED)
116 #define IP6QB_UNLOCK(_b) mtx_unlock(&V_ip6qb[(_b)].lock)
117 #define IP6QB_HEAD(_b) (&V_ip6qb[(_b)].ip6q)
120 * By default, limit the number of IP6 fragments across all reassembly
121 * queues to 1/32 of the total number of mbuf clusters.
123 * Limit the total number of reassembly queues per VNET to the
124 * IP6 fragment limit, but ensure the limit will not allow any bucket
125 * to grow above 100 items. (The bucket limit is
126 * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct
127 * multiplier to reach a 100-item limit.)
128 * The 100-item limit was chosen as brief testing seems to show that
129 * this produces "reasonable" performance on some subset of systems
132 #define IP6_MAXFRAGS (nmbclusters / 32)
133 #define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50))
137 * Sysctls and helper function.
139 SYSCTL_DECL(_net_inet6_ip6);
142 frag6_set_bucketsize(void)
146 if ((i = V_ip6_maxfragpackets) > 0)
147 V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1);
150 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
151 CTLFLAG_RW, &ip6_maxfrags, 0,
152 "Maximum allowed number of outstanding IPv6 packet fragments. "
153 "A value of 0 means no fragmented packets will be accepted, while a "
154 "a value of -1 means no limit");
157 sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS)
161 val = V_ip6_maxfragpackets;
162 error = sysctl_handle_int(oidp, &val, 0, req);
163 if (error != 0 || !req->newptr)
165 V_ip6_maxfragpackets = val;
166 frag6_set_bucketsize();
169 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
170 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
171 sysctl_ip6_maxfragpackets, "I",
172 "Default maximum number of outstanding fragmented IPv6 packets. "
173 "A value of 0 means no fragmented packets will be accepted, while a "
174 "a value of -1 means no limit");
175 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket,
176 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0,
177 "Maximum allowed number of fragments per packet");
178 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize,
179 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0,
180 "Maximum number of reassembly queues per hash bucket");
184 * Remove the IPv6 fragmentation header from the mbuf.
187 ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
189 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
192 /* Delete frag6 header. */
193 if (m->m_len >= offset + sizeof(struct ip6_frag)) {
194 /* This is the only possible case with !PULLDOWN_TEST. */
195 bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
197 m->m_data += sizeof(struct ip6_frag);
198 m->m_len -= sizeof(struct ip6_frag);
200 /* This comes with no copy if the boundary is on cluster. */
201 if ((t = m_split(m, offset, wait)) == NULL)
203 m_adj(t, sizeof(struct ip6_frag));
207 m->m_flags |= M_FRAGMENTED;
212 * Free a fragment reassembly header and all associated datagrams.
215 frag6_freef(struct ip6q *q6, uint32_t bucket)
217 struct ip6asfrag *af6, *down6;
219 IP6QB_LOCK_ASSERT(bucket);
221 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
223 struct mbuf *m = IP6_REASS_MBUF(af6);
225 down6 = af6->ip6af_down;
226 frag6_deq(af6, bucket);
229 * Return ICMP time exceeded error for the 1st fragment.
230 * Just free other fragments.
232 if (af6->ip6af_off == 0) {
235 /* Adjust pointer. */
236 ip6 = mtod(m, struct ip6_hdr *);
238 /* Restore source and destination addresses. */
239 ip6->ip6_src = q6->ip6q_src;
240 ip6->ip6_dst = q6->ip6q_dst;
242 icmp6_error(m, ICMP6_TIME_EXCEEDED,
243 ICMP6_TIME_EXCEED_REASSEMBLY, 0);
249 frag6_remque(q6, bucket);
250 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
252 mac_ip6q_destroy(q6);
255 atomic_subtract_int(&V_frag6_nfragpackets, 1);
259 * Like in RFC2460, in RFC8200, fragment and reassembly rules do not agree with
260 * each other, in terms of next header field handling in fragment header.
261 * While the sender will use the same value for all of the fragmented packets,
262 * receiver is suggested not to check for consistency.
264 * Fragment rules (p18,p19):
265 * (2) A Fragment header containing:
266 * The Next Header value that identifies the first header
267 * after the Per-Fragment headers of the original packet.
268 * -> next header field is same for all fragments
270 * Reassembly rule (p20):
271 * The Next Header field of the last header of the Per-Fragment
272 * headers is obtained from the Next Header field of the first
273 * fragment's Fragment header.
274 * -> should grab it from the first fragment only
276 * The following note also contradicts with fragment rule - no one is going to
277 * send different fragment with different next header field.
279 * Additional note (p22) [not an error]:
280 * The Next Header values in the Fragment headers of different
281 * fragments of the same original packet may differ. Only the value
282 * from the Offset zero fragment packet is used for reassembly.
283 * -> should grab it from the first fragment only
285 * There is no explicit reason given in the RFC. Historical reason maybe?
291 frag6_input(struct mbuf **mp, int *offp, int proto)
293 struct mbuf *m = *mp, *t;
295 struct ip6_frag *ip6f;
296 struct ip6q *head, *q6;
297 struct ip6asfrag *af6, *ip6af, *af6dwn;
298 struct in6_ifaddr *ia;
299 int offset = *offp, nxt, i, next;
301 int fragoff, frgpartlen; /* must be larger than u_int16_t */
302 uint32_t hashkey[(sizeof(struct in6_addr) * 2 +
303 sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)];
304 uint32_t bucket, *hashkeyp;
305 struct ifnet *dstifp;
309 struct ip6_direct_ctx *ip6dc;
312 ip6 = mtod(m, struct ip6_hdr *);
313 #ifndef PULLDOWN_TEST
314 IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE);
315 ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
317 IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
319 return (IPPROTO_DONE);
323 /* Find the destination interface of the packet. */
324 ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
327 ifa_free(&ia->ia_ifa);
330 /* Jumbo payload cannot contain a fragment header. */
331 if (ip6->ip6_plen == 0) {
332 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
333 in6_ifstat_inc(dstifp, ifs6_reass_fail);
338 * Check whether fragment packet's fragment length is a
339 * multiple of 8 octets (unless it is the last one).
340 * sizeof(struct ip6_frag) == 8
341 * sizeof(struct ip6_hdr) = 40
343 if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
344 (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
345 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
346 offsetof(struct ip6_hdr, ip6_plen));
347 in6_ifstat_inc(dstifp, ifs6_reass_fail);
351 IP6STAT_INC(ip6s_fragments);
352 in6_ifstat_inc(dstifp, ifs6_reass_reqd);
354 /* Offset now points to data portion. */
355 offset += sizeof(struct ip6_frag);
358 * Handle "atomic" fragments (offset and m bit set to 0) upfront,
359 * unrelated to any reassembly. Still need to remove the frag hdr.
360 * See RFC 6946 and section 4.5 of RFC 8200.
362 if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
363 IP6STAT_INC(ip6s_atomicfrags);
364 /* XXX-BZ handle correctly. */
365 in6_ifstat_inc(dstifp, ifs6_reass_ok);
367 m->m_flags |= M_FRAGMENTED;
368 return (ip6f->ip6f_nxt);
371 /* Get fragment length and discard 0-byte fragments. */
372 frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
373 if (frgpartlen == 0) {
374 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
375 offsetof(struct ip6_hdr, ip6_plen));
376 in6_ifstat_inc(dstifp, ifs6_reass_fail);
377 IP6STAT_INC(ip6s_fragdropped);
381 /* Generate a hash value for fragment bucket selection. */
383 memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr));
384 hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
385 memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr));
386 hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
387 *hashkeyp = ip6f->ip6f_ident;
388 bucket = jenkins_hash32(hashkey, nitems(hashkey), V_ip6qb_hashseed);
389 bucket &= IP6REASS_HMASK;
390 head = IP6QB_HEAD(bucket);
394 * Enforce upper bound on number of fragments for the entire system.
395 * If maxfrag is 0, never accept fragments.
396 * If maxfrag is -1, accept all fragments without limitation.
398 if (ip6_maxfrags < 0)
400 else if (atomic_load_int(&frag6_nfrags) >= (u_int)ip6_maxfrags)
403 for (q6 = head->ip6q_next; q6 != head; q6 = q6->ip6q_next)
404 if (ip6f->ip6f_ident == q6->ip6q_ident &&
405 IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
406 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)
408 && mac_ip6q_match(m, q6)
415 /* The first fragment to arrive, create a reassembly queue. */
419 * Enforce upper bound on number of fragmented packets
420 * for which we attempt reassembly;
421 * If maxfragpackets is 0, never accept fragments.
422 * If maxfragpackets is -1, accept all fragments without
425 if (V_ip6_maxfragpackets < 0)
427 else if (V_ip6qb[bucket].count >= V_ip6_maxfragbucketsize ||
428 atomic_load_int(&V_frag6_nfragpackets) >=
429 (u_int)V_ip6_maxfragpackets)
431 atomic_add_int(&V_frag6_nfragpackets, 1);
433 /* Allocate IPv6 fragement packet queue entry. */
434 q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FRAG6,
439 if (mac_ip6q_init(q6, M_NOWAIT) != 0) {
443 mac_ip6q_create(m, q6);
445 frag6_insque_head(q6, head, bucket);
447 /* ip6q_nxt will be filled afterwards, from 1st fragment. */
448 q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
450 q6->ip6q_nxtp = (u_char *)nxtp;
452 q6->ip6q_ident = ip6f->ip6f_ident;
453 q6->ip6q_ttl = IPV6_FRAGTTL;
454 q6->ip6q_src = ip6->ip6_src;
455 q6->ip6q_dst = ip6->ip6_dst;
457 (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
458 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */
464 * If it is the 1st fragment, record the length of the
465 * unfragmentable part and the next header of the fragment header.
467 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
469 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
470 sizeof(struct ip6_frag);
471 q6->ip6q_nxt = ip6f->ip6f_nxt;
475 * Check that the reassembled packet would not exceed 65535 bytes
477 * If it would exceed, discard the fragment and return an ICMP error.
479 if (q6->ip6q_unfrglen >= 0) {
480 /* The 1st fragment has already arrived. */
481 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
482 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
483 offset - sizeof(struct ip6_frag) +
484 offsetof(struct ip6_frag, ip6f_offlg));
485 IP6QB_UNLOCK(bucket);
486 return (IPPROTO_DONE);
488 } else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
489 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
490 offset - sizeof(struct ip6_frag) +
491 offsetof(struct ip6_frag, ip6f_offlg));
492 IP6QB_UNLOCK(bucket);
493 return (IPPROTO_DONE);
496 * If it is the first fragment, do the above check for each
497 * fragment already stored in the reassembly queue.
500 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
502 af6dwn = af6->ip6af_down;
504 if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
506 struct mbuf *merr = IP6_REASS_MBUF(af6);
507 struct ip6_hdr *ip6err;
508 int erroff = af6->ip6af_offset;
510 /* Dequeue the fragment. */
511 frag6_deq(af6, bucket);
514 /* Adjust pointer. */
515 ip6err = mtod(merr, struct ip6_hdr *);
518 * Restore source and destination addresses
519 * in the erroneous IPv6 header.
521 ip6err->ip6_src = q6->ip6q_src;
522 ip6err->ip6_dst = q6->ip6q_dst;
524 icmp6_error(merr, ICMP6_PARAM_PROB,
525 ICMP6_PARAMPROB_HEADER,
526 erroff - sizeof(struct ip6_frag) +
527 offsetof(struct ip6_frag, ip6f_offlg));
532 /* Allocate an IPv6 fragement queue entry for this fragmented part. */
533 ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FRAG6,
537 ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
538 ip6af->ip6af_off = fragoff;
539 ip6af->ip6af_frglen = frgpartlen;
540 ip6af->ip6af_offset = offset;
541 IP6_REASS_MBUF(ip6af) = m;
544 af6 = (struct ip6asfrag *)q6;
548 /* Do duplicate, condition, and boundry checks. */
550 * Handle ECN by comparing this segment with the first one;
551 * if CE is set, do not lose CE.
552 * Drop if CE and not-ECT are mixed for the same packet.
554 ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
556 if (ecn == IPTOS_ECN_CE) {
557 if (ecn0 == IPTOS_ECN_NOTECT) {
558 free(ip6af, M_FRAG6);
561 if (ecn0 != IPTOS_ECN_CE)
562 q6->ip6q_ecn = IPTOS_ECN_CE;
564 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
565 free(ip6af, M_FRAG6);
569 /* Find a fragmented part which begins after this one does. */
570 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
571 af6 = af6->ip6af_down)
572 if (af6->ip6af_off > ip6af->ip6af_off)
576 * If the incoming framgent overlaps some existing fragments in
577 * the reassembly queue, drop both the new fragment and the
578 * entire reassembly queue. However, if the new fragment
579 * is an exact duplicate of an existing fragment, only silently
580 * drop the existing fragment and leave the fragmentation queue
581 * unchanged, as allowed by the RFC. (RFC 8200, 4.5)
583 if (af6->ip6af_up != (struct ip6asfrag *)q6) {
584 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
587 free(ip6af, M_FRAG6);
591 if (af6 != (struct ip6asfrag *)q6) {
592 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
594 free(ip6af, M_FRAG6);
602 mac_ip6q_update(m, q6);
606 * Stick new segment in its place; check for complete reassembly.
607 * If not complete, check fragment limit. Move to front of packet
608 * queue, as we are the most recently active fragmented packet.
610 frag6_enq(ip6af, af6->ip6af_up, bucket);
611 atomic_add_int(&frag6_nfrags, 1);
614 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
615 af6 = af6->ip6af_down) {
616 if (af6->ip6af_off != next) {
617 if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
618 IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag);
619 frag6_freef(q6, bucket);
621 IP6QB_UNLOCK(bucket);
624 next += af6->ip6af_frglen;
626 if (af6->ip6af_up->ip6af_mff) {
627 if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
628 IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag);
629 frag6_freef(q6, bucket);
631 IP6QB_UNLOCK(bucket);
635 /* Reassembly is complete; concatenate fragments. */
636 ip6af = q6->ip6q_down;
637 t = m = IP6_REASS_MBUF(ip6af);
638 af6 = ip6af->ip6af_down;
639 frag6_deq(ip6af, bucket);
640 while (af6 != (struct ip6asfrag *)q6) {
641 m->m_pkthdr.csum_flags &=
642 IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
643 m->m_pkthdr.csum_data +=
644 IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
646 af6dwn = af6->ip6af_down;
647 frag6_deq(af6, bucket);
650 m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
651 m_demote_pkthdr(IP6_REASS_MBUF(af6));
652 m_cat(t, IP6_REASS_MBUF(af6));
657 while (m->m_pkthdr.csum_data & 0xffff0000)
658 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
659 (m->m_pkthdr.csum_data >> 16);
661 /* Adjust offset to point where the original next header starts. */
662 offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
663 free(ip6af, M_FRAG6);
664 ip6 = mtod(m, struct ip6_hdr *);
665 ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
666 if (q6->ip6q_ecn == IPTOS_ECN_CE)
667 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
670 if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
671 frag6_remque(q6, bucket);
672 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
674 mac_ip6q_destroy(q6);
677 atomic_subtract_int(&V_frag6_nfragpackets, 1);
682 /* Set nxt(-hdr field value) to the original value. */
683 m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
686 frag6_remque(q6, bucket);
687 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
689 mac_ip6q_reassemble(q6, m);
690 mac_ip6q_destroy(q6);
693 atomic_subtract_int(&V_frag6_nfragpackets, 1);
695 if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
697 for (t = m; t; t = t->m_next)
699 m->m_pkthdr.len = plen;
703 mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc),
708 ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
709 ip6dc->ip6dc_nxt = nxt;
710 ip6dc->ip6dc_off = offset;
712 m_tag_prepend(m, mtag);
715 IP6QB_UNLOCK(bucket);
716 IP6STAT_INC(ip6s_reassembled);
717 in6_ifstat_inc(dstifp, ifs6_reass_ok);
720 /* Queue/dispatch for reprocessing. */
721 netisr_dispatch(NETISR_IPV6_DIRECT, m);
725 /* Tell launch routine the next header. */
732 IP6QB_UNLOCK(bucket);
733 in6_ifstat_inc(dstifp, ifs6_reass_fail);
734 IP6STAT_INC(ip6s_fragdropped);
740 * IPv6 reassembling timer processing;
741 * if a timer expires on a reassembly queue, discard it.
746 VNET_ITERATOR_DECL(vnet_iter);
747 struct ip6q *head, *q6;
750 VNET_LIST_RLOCK_NOSLEEP();
751 VNET_FOREACH(vnet_iter) {
752 CURVNET_SET(vnet_iter);
753 for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
755 head = IP6QB_HEAD(bucket);
756 q6 = head->ip6q_next;
759 * XXXJTL: This should never happen. This
760 * should turn into an assertion.
762 IP6QB_UNLOCK(bucket);
768 if (q6->ip6q_prev->ip6q_ttl == 0) {
769 IP6STAT_ADD(ip6s_fragtimeout,
770 q6->ip6q_prev->ip6q_nfrag);
771 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
772 frag6_freef(q6->ip6q_prev, bucket);
776 * If we are over the maximum number of fragments
777 * (due to the limit being lowered), drain off
778 * enough to get down to the new limit.
779 * Note that we drain all reassembly queues if
780 * maxfragpackets is 0 (fragmentation is disabled),
781 * and do not enforce a limit when maxfragpackets
784 while ((V_ip6_maxfragpackets == 0 ||
785 (V_ip6_maxfragpackets > 0 &&
786 V_ip6qb[bucket].count > V_ip6_maxfragbucketsize)) &&
787 head->ip6q_prev != head) {
788 IP6STAT_ADD(ip6s_fragoverflow,
789 q6->ip6q_prev->ip6q_nfrag);
790 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
791 frag6_freef(head->ip6q_prev, bucket);
793 IP6QB_UNLOCK(bucket);
796 * If we are still over the maximum number of fragmented
797 * packets, drain off enough to get down to the new limit.
800 while (V_ip6_maxfragpackets >= 0 &&
801 atomic_load_int(&V_frag6_nfragpackets) >
802 (u_int)V_ip6_maxfragpackets) {
804 head = IP6QB_HEAD(bucket);
805 if (head->ip6q_prev != head) {
806 IP6STAT_ADD(ip6s_fragoverflow,
807 q6->ip6q_prev->ip6q_nfrag);
808 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
809 frag6_freef(head->ip6q_prev, bucket);
811 IP6QB_UNLOCK(bucket);
812 bucket = (bucket + 1) % IP6REASS_NHASH;
816 VNET_LIST_RUNLOCK_NOSLEEP();
820 * Eventhandler to adjust limits in case nmbclusters change.
823 frag6_change(void *tag)
825 VNET_ITERATOR_DECL(vnet_iter);
827 ip6_maxfrags = IP6_MAXFRAGS;
828 VNET_LIST_RLOCK_NOSLEEP();
829 VNET_FOREACH(vnet_iter) {
830 CURVNET_SET(vnet_iter);
831 V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
832 frag6_set_bucketsize();
835 VNET_LIST_RUNLOCK_NOSLEEP();
839 * Initialise reassembly queue and fragment identifier.
847 V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
848 frag6_set_bucketsize();
849 for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
850 q6 = IP6QB_HEAD(bucket);
851 q6->ip6q_next = q6->ip6q_prev = q6;
852 mtx_init(&V_ip6qb[bucket].lock, "ip6qlock", NULL, MTX_DEF);
853 V_ip6qb[bucket].count = 0;
855 V_ip6qb_hashseed = arc4random();
856 V_ip6_maxfragsperpacket = 64;
857 if (!IS_DEFAULT_VNET(curvnet))
860 ip6_maxfrags = IP6_MAXFRAGS;
861 EVENTHANDLER_REGISTER(nmbclusters_change,
862 frag6_change, NULL, EVENTHANDLER_PRI_ANY);
866 * Drain off all datagram fragments.
871 VNET_ITERATOR_DECL(vnet_iter);
875 VNET_LIST_RLOCK_NOSLEEP();
876 VNET_FOREACH(vnet_iter) {
877 CURVNET_SET(vnet_iter);
878 for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
879 if (IP6QB_TRYLOCK(bucket) == 0)
881 head = IP6QB_HEAD(bucket);
882 while (head->ip6q_next != head) {
883 IP6STAT_INC(ip6s_fragdropped);
884 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
885 frag6_freef(head->ip6q_next, bucket);
887 IP6QB_UNLOCK(bucket);
891 VNET_LIST_RUNLOCK_NOSLEEP();
895 * Put an ip fragment on a reassembly chain.
896 * Like insque, but pointers in middle of structure.
899 frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6,
900 uint32_t bucket __unused)
903 IP6QB_LOCK_ASSERT(bucket);
906 af6->ip6af_down = up6->ip6af_down;
907 up6->ip6af_down->ip6af_up = af6;
908 up6->ip6af_down = af6;
912 * To frag6_enq as remque is to insque.
915 frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused)
918 IP6QB_LOCK_ASSERT(bucket);
920 af6->ip6af_up->ip6af_down = af6->ip6af_down;
921 af6->ip6af_down->ip6af_up = af6->ip6af_up;
925 frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket)
928 IP6QB_LOCK_ASSERT(bucket);
929 KASSERT(IP6QB_HEAD(bucket) == old,
930 ("%s: attempt to insert at head of wrong bucket"
931 " (bucket=%u, old=%p)", __func__, bucket, old));
933 new->ip6q_prev = old;
934 new->ip6q_next = old->ip6q_next;
935 old->ip6q_next->ip6q_prev= new;
936 old->ip6q_next = new;
937 V_ip6qb[bucket].count++;
941 frag6_remque(struct ip6q *p6, uint32_t bucket)
944 IP6QB_LOCK_ASSERT(bucket);
946 p6->ip6q_prev->ip6q_next = p6->ip6q_next;
947 p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
948 V_ip6qb[bucket].count--;