1 /******************************************************************************
3 Copyright (c) 2007-2008, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
43 #include <net/ethernet.h>
44 #include <net/if_media.h>
46 #include <netinet/in_systm.h>
47 #include <netinet/in.h>
48 #include <netinet/ip.h>
49 #include <netinet/tcp.h>
51 #include <machine/bus.h>
52 #include <machine/in_cksum.h>
54 #include <dev/mxge/mxge_mcp.h>
55 #include <dev/mxge/if_mxge_var.h>
58 /* Assume len is a multiple of 4 */
60 mxge_csum_generic(uint16_t *raw, int len)
71 csum = (csum >> 16) + (csum & 0xffff);
72 csum = (csum >> 16) + (csum & 0xffff);
73 return (uint16_t)csum;
78 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
80 mxge_softc_t *mgp = ss->sc;
85 uint32_t tcplen, tcp_csum;
87 if (lro->append_cnt) {
88 /* incorporate the new len into the ip header and
89 * re-calculate the checksum */
91 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
94 mxge_csum_generic((uint16_t*)ip,
97 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
98 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
99 lro->m_head->m_pkthdr.csum_data = 0xffff;
100 lro->m_head->m_pkthdr.len = lro->len;
102 /* incorporate the latest ack into the tcp header */
103 tcp = (struct tcphdr *) (ip + 1);
104 tcp->th_ack = lro->ack_seq;
105 tcp->th_win = lro->window;
106 /* incorporate latest timestamp into the tcp header */
107 if (lro->timestamp) {
108 ts_ptr = (uint32_t *)(tcp + 1);
109 ts_ptr[1] = htonl(lro->tsval);
110 ts_ptr[2] = lro->tsecr;
113 * update checksum in tcp header by re-calculating the
114 * tcp pseudoheader checksum, and adding it to the checksum
115 * of the tcp payload data
118 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
119 tcp_csum = lro->data_csum;
120 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
121 htons(tcplen + IPPROTO_TCP));
122 tcp_csum += mxge_csum_generic((uint16_t*)tcp,
124 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
125 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
127 IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n",
128 in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
129 htons(tcplen + IPPROTO_TCP)),
130 mxge_csum_generic((uint16_t*)tcp,
132 htons(0xffff ^ tcp_csum));
134 tcp->th_sum = 0xffff ^ tcp_csum;
137 (*ifp->if_input)(mgp->ifp, lro->m_head);
138 ss->lro_queued += lro->append_cnt + 1;
143 SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
147 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
149 struct ether_header *eh;
153 struct mbuf *m_nxt, *m_tail;
154 struct lro_entry *lro;
155 int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
157 uint32_t seq, tmp_csum, device_mtu;
159 eh = mtod(m_head, struct ether_header *);
160 if (eh->ether_type != htons(ETHERTYPE_IP))
162 ip = (struct ip *) (eh + 1);
163 if (ip->ip_p != IPPROTO_TCP)
166 /* ensure there are no options */
167 if ((ip->ip_hl << 2) != sizeof (*ip))
170 /* .. and the packet is not fragmented */
171 if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
174 /* verify that the IP header checksum is correct */
175 tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
176 if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
181 /* find the TCP header */
182 tcp = (struct tcphdr *) (ip + 1);
184 /* ensure no bits set besides ack or psh */
185 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
188 /* check for timestamps. Since the only option we handle are
189 timestamps, we only have to handle the simple case of
190 aligned timestamps */
192 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
193 tcp_hdr_len = sizeof (*tcp) + opt_bytes;
194 ts_ptr = (uint32_t *)(tcp + 1);
195 if (opt_bytes != 0) {
196 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
197 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
201 ip_len = ntohs(ip->ip_len);
202 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
206 * If frame is padded beyond the end of the IP packet,
207 * then we must trim the extra bytes off the end.
209 tot_len = m_head->m_pkthdr.len;
210 trim = tot_len - (ip_len + ETHER_HDR_LEN);
213 /* truncated packet */
216 m_adj(m_head, -trim);
217 tot_len = m_head->m_pkthdr.len;
221 m_tail = NULL; /* -Wuninitialized */
222 while (m_nxt != NULL) {
224 m_nxt = m_tail->m_next;
227 hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
228 seq = ntohl(tcp->th_seq);
230 SLIST_FOREACH(lro, &ss->lro_active, next) {
231 if (lro->source_port == tcp->th_sport &&
232 lro->dest_port == tcp->th_dport &&
233 lro->source_ip == ip->ip_src.s_addr &&
234 lro->dest_ip == ip->ip_dst.s_addr) {
235 /* Try to append it */
237 if (__predict_false(seq != lro->next_seq)) {
238 /* out of order packet */
239 SLIST_REMOVE(&ss->lro_active, lro,
241 mxge_lro_flush(ss, lro);
246 uint32_t tsval = ntohl(*(ts_ptr + 1));
247 /* make sure timestamp values are increasing */
248 if (__predict_false(lro->tsval > tsval ||
249 *(ts_ptr + 2) == 0)) {
253 lro->tsecr = *(ts_ptr + 2);
256 lro->next_seq += tcp_data_len;
257 lro->ack_seq = tcp->th_ack;
258 lro->window = tcp->th_win;
260 if (tcp_data_len == 0) {
264 /* subtract off the checksum of the tcp header
265 * from the hardware checksum, and add it to the
266 * stored tcp data checksum. Byteswap the checksum
267 * if the total length so far is odd
269 tmp_csum = mxge_csum_generic((uint16_t*)tcp,
271 csum = csum + (tmp_csum ^ 0xffff);
272 csum = (csum & 0xffff) + (csum >> 16);
273 csum = (csum & 0xffff) + (csum >> 16);
274 if (lro->len & 0x1) {
275 /* Odd number of bytes so far, flip bytes */
276 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
278 csum = csum + lro->data_csum;
279 csum = (csum & 0xffff) + (csum >> 16);
280 csum = (csum & 0xffff) + (csum >> 16);
281 lro->data_csum = csum;
283 lro->len += tcp_data_len;
285 /* adjust mbuf so that m->m_data points to
286 the first byte of the payload */
288 /* append mbuf chain */
289 lro->m_tail->m_next = m_head;
290 /* advance the last pointer */
291 lro->m_tail = m_tail;
292 /* flush packet if required */
293 device_mtu = ss->sc->ifp->if_mtu;
294 if (lro->len > (65535 - device_mtu)) {
295 SLIST_REMOVE(&ss->lro_active, lro,
297 mxge_lro_flush(ss, lro);
303 if (SLIST_EMPTY(&ss->lro_free))
306 /* start a new chain */
307 lro = SLIST_FIRST(&ss->lro_free);
308 SLIST_REMOVE_HEAD(&ss->lro_free, next);
309 SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
310 lro->source_port = tcp->th_sport;
311 lro->dest_port = tcp->th_dport;
312 lro->source_ip = ip->ip_src.s_addr;
313 lro->dest_ip = ip->ip_dst.s_addr;
314 lro->next_seq = seq + tcp_data_len;
315 lro->mss = tcp_data_len;
316 lro->ack_seq = tcp->th_ack;
317 lro->window = tcp->th_win;
319 /* save the checksum of just the TCP payload by
320 * subtracting off the checksum of the TCP header from
321 * the entire hardware checksum
322 * Since IP header checksum is correct, checksum over
323 * the IP header is -0. Substracting -0 is unnecessary.
325 tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
326 csum = csum + (tmp_csum ^ 0xffff);
327 csum = (csum & 0xffff) + (csum >> 16);
328 csum = (csum & 0xffff) + (csum >> 16);
329 lro->data_csum = csum;
332 /* record timestamp if it is present */
335 lro->tsval = ntohl(*(ts_ptr + 1));
336 lro->tsecr = *(ts_ptr + 2);
339 lro->m_head = m_head;
340 lro->m_tail = m_tail;
344 This file uses Myri10GE driver indentation.