2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2007, Myricom Inc.
5 * Copyright (c) 2008, Intel Corporation.
6 * Copyright (c) 2012 The FreeBSD Foundation
7 * Copyright (c) 2016 Mellanox Technologies.
10 * Portions of this software were developed by Bjoern Zeeb
11 * under sponsorship from the FreeBSD Foundation.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
39 #include "opt_inet6.h"
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
50 #include <net/if_var.h>
51 #include <net/ethernet.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/in.h>
56 #include <netinet/ip6.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip_var.h>
59 #include <netinet/tcp.h>
60 #include <netinet/tcp_seq.h>
61 #include <netinet/tcp_lro.h>
62 #include <netinet/tcp_var.h>
64 #include <netinet6/ip6_var.h>
66 #include <machine/in_cksum.h>
68 static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
70 #define TCP_LRO_UPDATE_CSUM 1
71 #ifndef TCP_LRO_UPDATE_CSUM
72 #define TCP_LRO_INVALID_CSUM 0x0000
75 static void tcp_lro_rx_done(struct lro_ctrl *lc);
76 static int tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m,
77 uint32_t csum, int use_hash);
79 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
82 static unsigned tcp_lro_entries = TCP_LRO_ENTRIES;
83 SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, entries,
84 CTLFLAG_RDTUN | CTLFLAG_MPSAFE, &tcp_lro_entries, 0,
85 "default number of LRO entries");
88 tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_head *bucket,
92 LIST_INSERT_HEAD(&lc->lro_active, le, next);
93 LIST_INSERT_HEAD(bucket, le, hash_next);
97 tcp_lro_active_remove(struct lro_entry *le)
100 LIST_REMOVE(le, next); /* active list */
101 LIST_REMOVE(le, hash_next); /* hash bucket */
105 tcp_lro_init(struct lro_ctrl *lc)
107 return (tcp_lro_init_args(lc, NULL, tcp_lro_entries, 0));
111 tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
112 unsigned lro_entries, unsigned lro_mbufs)
114 struct lro_entry *le;
116 unsigned i, elements;
118 lc->lro_bad_csum = 0;
121 lc->lro_mbuf_count = 0;
122 lc->lro_mbuf_max = lro_mbufs;
123 lc->lro_cnt = lro_entries;
124 lc->lro_ackcnt_lim = TCP_LRO_ACKCNT_MAX;
125 lc->lro_length_lim = TCP_LRO_LENGTH_MAX;
127 LIST_INIT(&lc->lro_free);
128 LIST_INIT(&lc->lro_active);
130 /* create hash table to accelerate entry lookup */
131 if (lro_entries > lro_mbufs)
132 elements = lro_entries;
134 elements = lro_mbufs;
135 lc->lro_hash = phashinit_flags(elements, M_LRO, &lc->lro_hashsz,
137 if (lc->lro_hash == NULL) {
138 memset(lc, 0, sizeof(*lc));
142 /* compute size to allocate */
143 size = (lro_mbufs * sizeof(struct lro_mbuf_sort)) +
144 (lro_entries * sizeof(*le));
145 lc->lro_mbuf_data = (struct lro_mbuf_sort *)
146 malloc(size, M_LRO, M_NOWAIT | M_ZERO);
148 /* check for out of memory */
149 if (lc->lro_mbuf_data == NULL) {
150 free(lc->lro_hash, M_LRO);
151 memset(lc, 0, sizeof(*lc));
154 /* compute offset for LRO entries */
155 le = (struct lro_entry *)
156 (lc->lro_mbuf_data + lro_mbufs);
158 /* setup linked list */
159 for (i = 0; i != lro_entries; i++)
160 LIST_INSERT_HEAD(&lc->lro_free, le + i, next);
166 tcp_lro_free(struct lro_ctrl *lc)
168 struct lro_entry *le;
171 /* reset LRO free list */
172 LIST_INIT(&lc->lro_free);
174 /* free active mbufs, if any */
175 while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
176 tcp_lro_active_remove(le);
180 /* free hash table */
181 free(lc->lro_hash, M_LRO);
185 /* free mbuf array, if any */
186 for (x = 0; x != lc->lro_mbuf_count; x++)
187 m_freem(lc->lro_mbuf_data[x].mb);
188 lc->lro_mbuf_count = 0;
190 /* free allocated memory, if any */
191 free(lc->lro_mbuf_data, M_LRO);
192 lc->lro_mbuf_data = NULL;
195 #ifdef TCP_LRO_UPDATE_CSUM
197 tcp_lro_csum_th(struct tcphdr *th)
202 ch = th->th_sum = 0x0000;
213 ch = (ch >> 16) + (ch & 0xffff);
215 return (ch & 0xffff);
219 tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
220 uint16_t tcp_data_len, uint16_t csum)
227 /* Remove length from checksum. */
228 switch (le->eh_type) {
234 ip6 = (struct ip6_hdr *)l3hdr;
235 if (le->append_cnt == 0)
240 cx = ntohs(ip6->ip6_plen);
241 cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
251 ip4 = (struct ip *)l3hdr;
252 if (le->append_cnt == 0)
255 cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
257 cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
264 cs = 0; /* Keep compiler happy. */
270 /* Remove TCP header csum. */
271 cs = ~tcp_lro_csum_th(th);
274 c = (c >> 16) + (c & 0xffff);
281 tcp_lro_rx_done(struct lro_ctrl *lc)
283 struct lro_entry *le;
285 while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
286 tcp_lro_active_remove(le);
287 tcp_lro_flush(lc, le);
292 tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
294 struct lro_entry *le, *le_tmp;
297 if (LIST_EMPTY(&lc->lro_active))
301 timevalsub(&tv, timeout);
302 LIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
303 if (timevalcmp(&tv, &le->mtime, >=)) {
304 tcp_lro_active_remove(le);
305 tcp_lro_flush(lc, le);
311 tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
314 if (le->append_cnt > 0) {
318 p_len = htons(le->p_len);
319 switch (le->eh_type) {
326 ip6->ip6_plen = p_len;
327 th = (struct tcphdr *)(ip6 + 1);
328 le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
330 le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
338 #ifdef TCP_LRO_UPDATE_CSUM
344 #ifdef TCP_LRO_UPDATE_CSUM
345 /* Fix IP header checksum for new length. */
351 cl = (cl >> 16) + (cl & 0xffff);
355 ip4->ip_sum = TCP_LRO_INVALID_CSUM;
358 th = (struct tcphdr *)(ip4 + 1);
359 le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
360 CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
361 le->p_len += ETHER_HDR_LEN;
366 th = NULL; /* Keep compiler happy. */
368 le->m_head->m_pkthdr.csum_data = 0xffff;
369 le->m_head->m_pkthdr.len = le->p_len;
371 /* Incorporate the latest ACK into the TCP header. */
372 th->th_ack = le->ack_seq;
373 th->th_win = le->window;
374 /* Incorporate latest timestamp into the TCP header. */
375 if (le->timestamp != 0) {
378 ts_ptr = (uint32_t *)(th + 1);
379 ts_ptr[1] = htonl(le->tsval);
380 ts_ptr[2] = le->tsecr;
382 #ifdef TCP_LRO_UPDATE_CSUM
383 /* Update the TCP header checksum. */
384 le->ulp_csum += p_len;
385 le->ulp_csum += tcp_lro_csum_th(th);
386 while (le->ulp_csum > 0xffff)
387 le->ulp_csum = (le->ulp_csum >> 16) +
388 (le->ulp_csum & 0xffff);
389 th->th_sum = (le->ulp_csum & 0xffff);
390 th->th_sum = ~th->th_sum;
392 th->th_sum = TCP_LRO_INVALID_CSUM;
396 le->m_head->m_pkthdr.lro_nsegs = le->append_cnt + 1;
397 (*lc->ifp->if_input)(lc->ifp, le->m_head);
398 lc->lro_queued += le->append_cnt + 1;
400 bzero(le, sizeof(*le));
401 LIST_INSERT_HEAD(&lc->lro_free, le, next);
404 #ifdef HAVE_INLINE_FLSLL
405 #define tcp_lro_msb_64(x) (1ULL << (flsll(x) - 1))
407 static inline uint64_t
408 tcp_lro_msb_64(uint64_t x)
416 return (x & ~(x >> 1));
421 * The tcp_lro_sort() routine is comparable to qsort(), except it has
422 * a worst case complexity limit of O(MIN(N,64)*N), where N is the
423 * number of elements to sort and 64 is the number of sequence bits
424 * available. The algorithm is bit-slicing the 64-bit sequence number,
425 * sorting one bit at a time from the most significant bit until the
426 * least significant one, skipping the constant bits. This is
427 * typically called a radix sort.
430 tcp_lro_sort(struct lro_mbuf_sort *parray, uint32_t size)
432 struct lro_mbuf_sort temp;
439 /* for small arrays insertion sort is faster */
441 for (x = 1; x < size; x++) {
443 for (y = x; y > 0 && temp.seq < parray[y - 1].seq; y--)
444 parray[y] = parray[y - 1];
450 /* compute sequence bits which are constant */
453 for (x = 0; x != size; x++) {
454 ones |= parray[x].seq;
455 zeros |= ~parray[x].seq;
458 /* compute bits which are not constant into "ones" */
463 /* pick the most significant bit which is not constant */
464 ones = tcp_lro_msb_64(ones);
467 * Move entries having cleared sequence bits to the beginning
470 for (x = y = 0; y != size; y++) {
472 if (parray[y].seq & ones)
476 parray[x] = parray[y];
481 KASSERT(x != 0 && x != size, ("Memory is corrupted\n"));
484 tcp_lro_sort(parray, x);
493 tcp_lro_flush_all(struct lro_ctrl *lc)
499 /* check if no mbufs to flush */
500 if (lc->lro_mbuf_count == 0)
503 /* sort all mbufs according to stream */
504 tcp_lro_sort(lc->lro_mbuf_data, lc->lro_mbuf_count);
506 /* input data into LRO engine, stream by stream */
508 for (x = 0; x != lc->lro_mbuf_count; x++) {
512 mb = lc->lro_mbuf_data[x].mb;
514 /* get sequence number, masking away the packet index */
515 nseq = lc->lro_mbuf_data[x].seq & (-1ULL << 24);
517 /* check for new stream */
521 /* flush active streams */
525 /* add packet to LRO engine */
526 if (tcp_lro_rx2(lc, mb, 0, 0) != 0) {
527 /* input packet to network layer */
528 (*lc->ifp->if_input)(lc->ifp, mb);
534 /* flush active streams */
537 lc->lro_mbuf_count = 0;
542 tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
546 /* XXX-BZ we should check the flow-label. */
548 /* XXX-BZ We do not yet support ext. hdrs. */
549 if (ip6->ip6_nxt != IPPROTO_TCP)
550 return (TCP_LRO_NOT_SUPPORTED);
552 /* Find the TCP header. */
553 *th = (struct tcphdr *)(ip6 + 1);
561 tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
567 if (ip4->ip_p != IPPROTO_TCP)
568 return (TCP_LRO_NOT_SUPPORTED);
570 /* Ensure there are no options. */
571 if ((ip4->ip_hl << 2) != sizeof (*ip4))
572 return (TCP_LRO_CANNOT);
574 /* .. and the packet is not fragmented. */
575 if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
576 return (TCP_LRO_CANNOT);
578 /* Legacy IP has a header checksum that needs to be correct. */
579 csum_flags = m->m_pkthdr.csum_flags;
580 if (csum_flags & CSUM_IP_CHECKED) {
581 if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
583 return (TCP_LRO_CANNOT);
586 csum = in_cksum_hdr(ip4);
587 if (__predict_false((csum) != 0)) {
589 return (TCP_LRO_CANNOT);
593 /* Find the TCP header (we assured there are no IP options). */
594 *th = (struct tcphdr *)(ip4 + 1);
601 tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
603 struct lro_entry *le;
604 struct ether_header *eh;
606 struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
609 struct ip *ip4 = NULL; /* Keep compiler happy. */
612 void *l3hdr = NULL; /* Keep compiler happy. */
615 int error, ip_len, l;
616 uint16_t eh_type, tcp_data_len;
617 struct lro_head *bucket;
620 /* We expect a contiguous header [eh, ip, tcp]. */
622 eh = mtod(m, struct ether_header *);
623 eh_type = ntohs(eh->ether_type);
628 CURVNET_SET(lc->ifp->if_vnet);
629 if (V_ip6_forwarding != 0) {
630 /* XXX-BZ stats but changing lro_ctrl is a problem. */
632 return (TCP_LRO_CANNOT);
635 l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
636 error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
639 tcp_data_len = ntohs(ip6->ip6_plen);
640 ip_len = sizeof(*ip6) + tcp_data_len;
647 CURVNET_SET(lc->ifp->if_vnet);
648 if (V_ipforwarding != 0) {
649 /* XXX-BZ stats but changing lro_ctrl is a problem. */
651 return (TCP_LRO_CANNOT);
654 l3hdr = ip4 = (struct ip *)(eh + 1);
655 error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
658 ip_len = ntohs(ip4->ip_len);
659 tcp_data_len = ip_len - sizeof(*ip4);
663 /* XXX-BZ what happens in case of VLAN(s)? */
665 return (TCP_LRO_NOT_SUPPORTED);
669 * If the frame is padded beyond the end of the IP packet, then we must
670 * trim the extra bytes off.
672 l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
675 /* Truncated packet. */
676 return (TCP_LRO_CANNOT);
682 * Check TCP header constraints.
684 /* Ensure no bits set besides ACK or PSH. */
685 if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) {
686 if (th->th_flags & TH_SYN)
687 return (TCP_LRO_CANNOT);
689 * Make sure that previously seen segements/ACKs are delivered
690 * before this segement, e.g. FIN.
695 /* XXX-BZ We lose a ACK|PUSH flag concatenating multiple segments. */
696 /* XXX-BZ Ideally we'd flush on PUSH? */
699 * Check for timestamps.
700 * Since the only option we handle are timestamps, we only have to
701 * handle the simple case of aligned timestamps.
703 l = (th->th_off << 2);
706 ts_ptr = (uint32_t *)(th + 1);
707 if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
708 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
709 TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) {
711 * Make sure that previously seen segements/ACKs are delivered
712 * before this segement.
717 /* If the driver did not pass in the checksum, set it now. */
721 seq = ntohl(th->th_seq);
724 bucket = &lc->lro_hash[0];
725 } else if (M_HASHTYPE_ISHASH(m)) {
726 bucket = &lc->lro_hash[m->m_pkthdr.flowid % lc->lro_hashsz];
733 hash = ip4->ip_src.s_addr + ip4->ip_dst.s_addr;
738 hash = ip6->ip6_src.s6_addr32[0] +
739 ip6->ip6_dst.s6_addr32[0];
740 hash += ip6->ip6_src.s6_addr32[1] +
741 ip6->ip6_dst.s6_addr32[1];
742 hash += ip6->ip6_src.s6_addr32[2] +
743 ip6->ip6_dst.s6_addr32[2];
744 hash += ip6->ip6_src.s6_addr32[3] +
745 ip6->ip6_dst.s6_addr32[3];
752 hash += th->th_sport + th->th_dport;
753 bucket = &lc->lro_hash[hash % lc->lro_hashsz];
756 /* Try to find a matching previous segment. */
757 LIST_FOREACH(le, bucket, hash_next) {
758 if (le->eh_type != eh_type)
760 if (le->source_port != th->th_sport ||
761 le->dest_port != th->th_dport)
766 if (bcmp(&le->source_ip6, &ip6->ip6_src,
767 sizeof(struct in6_addr)) != 0 ||
768 bcmp(&le->dest_ip6, &ip6->ip6_dst,
769 sizeof(struct in6_addr)) != 0)
775 if (le->source_ip4 != ip4->ip_src.s_addr ||
776 le->dest_ip4 != ip4->ip_dst.s_addr)
783 /* Timestamps mismatch; this is a FIN, etc */
784 tcp_lro_active_remove(le);
785 tcp_lro_flush(lc, le);
786 return (TCP_LRO_CANNOT);
789 /* Flush now if appending will result in overflow. */
790 if (le->p_len > (lc->lro_length_lim - tcp_data_len)) {
791 tcp_lro_active_remove(le);
792 tcp_lro_flush(lc, le);
796 /* Try to append the new segment. */
797 if (__predict_false(seq != le->next_seq ||
798 (tcp_data_len == 0 &&
799 le->ack_seq == th->th_ack &&
800 le->window == th->th_win))) {
801 /* Out of order packet or duplicate ACK. */
802 tcp_lro_active_remove(le);
803 tcp_lro_flush(lc, le);
804 return (TCP_LRO_CANNOT);
808 uint32_t tsval = ntohl(*(ts_ptr + 1));
809 /* Make sure timestamp values are increasing. */
810 /* XXX-BZ flip and use TSTMP_GEQ macro for this? */
811 if (__predict_false(le->tsval > tsval ||
813 return (TCP_LRO_CANNOT);
815 le->tsecr = *(ts_ptr + 2);
817 if (tcp_data_len || SEQ_GT(ntohl(th->th_ack), ntohl(le->ack_seq))) {
818 le->next_seq += tcp_data_len;
819 le->ack_seq = th->th_ack;
820 le->window = th->th_win;
822 } else if (th->th_ack == le->ack_seq) {
823 le->window = WIN_MAX(le->window, th->th_win);
826 /* no data and old ack */
831 #ifdef TCP_LRO_UPDATE_CSUM
832 le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
833 tcp_data_len, ~csum);
836 if (tcp_data_len == 0) {
839 * Flush this LRO entry, if this ACK should not
840 * be further delayed.
842 if (le->append_cnt >= lc->lro_ackcnt_lim) {
843 tcp_lro_active_remove(le);
844 tcp_lro_flush(lc, le);
849 le->p_len += tcp_data_len;
852 * Adjust the mbuf so that m_data points to the first byte of
853 * the ULP payload. Adjust the mbuf to avoid complications and
854 * append new segment to existing mbuf chain.
856 m_adj(m, m->m_pkthdr.len - tcp_data_len);
859 le->m_tail->m_next = m;
860 le->m_tail = m_last(m);
863 * If a possible next full length packet would cause an
864 * overflow, pro-actively flush now.
866 if (le->p_len > (lc->lro_length_lim - lc->ifp->if_mtu)) {
867 tcp_lro_active_remove(le);
868 tcp_lro_flush(lc, le);
870 getmicrotime(&le->mtime);
877 * Nothing to flush, but this segment can not be further
878 * aggregated/delayed.
880 return (TCP_LRO_CANNOT);
883 /* Try to find an empty slot. */
884 if (LIST_EMPTY(&lc->lro_free))
885 return (TCP_LRO_NO_ENTRIES);
887 /* Start a new segment chain. */
888 le = LIST_FIRST(&lc->lro_free);
889 LIST_REMOVE(le, next);
890 tcp_lro_active_insert(lc, bucket, le);
891 getmicrotime(&le->mtime);
893 /* Start filling in details. */
898 le->source_ip6 = ip6->ip6_src;
899 le->dest_ip6 = ip6->ip6_dst;
900 le->eh_type = eh_type;
901 le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
907 le->source_ip4 = ip4->ip_src.s_addr;
908 le->dest_ip4 = ip4->ip_dst.s_addr;
909 le->eh_type = eh_type;
910 le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
914 le->source_port = th->th_sport;
915 le->dest_port = th->th_dport;
917 le->next_seq = seq + tcp_data_len;
918 le->ack_seq = th->th_ack;
919 le->window = th->th_win;
922 le->tsval = ntohl(*(ts_ptr + 1));
923 le->tsecr = *(ts_ptr + 2);
926 #ifdef TCP_LRO_UPDATE_CSUM
928 * Do not touch the csum of the first packet. However save the
929 * "adjusted" checksum of just the source and destination addresses,
930 * the next header and the TCP payload. The length and TCP header
931 * parts may change, so we remove those from the saved checksum and
932 * re-add with final values on tcp_lro_flush() if needed.
934 KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
935 __func__, le, le->ulp_csum));
937 le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
939 th->th_sum = csum; /* Restore checksum on first packet. */
943 le->m_tail = m_last(m);
949 tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
952 return tcp_lro_rx2(lc, m, csum, 1);
956 tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
959 if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL ||
960 lc->lro_mbuf_max == 0)) {
966 /* check if packet is not LRO capable */
967 if (__predict_false(mb->m_pkthdr.csum_flags == 0 ||
968 (lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
970 /* input packet to network layer */
971 (*lc->ifp->if_input) (lc->ifp, mb);
975 /* create sequence number */
976 lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
977 (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
978 (((uint64_t)mb->m_pkthdr.flowid) << 24) |
979 ((uint64_t)lc->lro_mbuf_count);
982 lc->lro_mbuf_data[lc->lro_mbuf_count].mb = mb;
984 /* flush if array is full */
985 if (__predict_false(++lc->lro_mbuf_count == lc->lro_mbuf_max))
986 tcp_lro_flush_all(lc);