1 /******************************************************************************
3 Copyright (c) 2007-2008, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
43 #include <net/ethernet.h>
44 #include <net/if_media.h>
46 #include <netinet/in_systm.h>
47 #include <netinet/in.h>
48 #include <netinet/ip.h>
49 #include <netinet/tcp.h>
51 #include <machine/bus.h>
52 #include <machine/in_cksum.h>
54 #include <dev/mxge/mxge_mcp.h>
55 #include <dev/mxge/if_mxge_var.h>
61 /* Assume len is a multiple of 4 */
63 mxge_csum_generic(uint16_t *raw, int len)
74 csum = (csum >> 16) + (csum & 0xffff);
75 csum = (csum >> 16) + (csum & 0xffff);
76 return (uint16_t)csum;
81 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
83 mxge_softc_t *mgp = ss->sc;
88 uint32_t tcplen, tcp_csum;
90 if (lro->append_cnt) {
91 /* incorporate the new len into the ip header and
92 * re-calculate the checksum */
94 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
97 mxge_csum_generic((uint16_t*)ip,
100 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
101 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
102 lro->m_head->m_pkthdr.csum_data = 0xffff;
103 lro->m_head->m_pkthdr.len = lro->len;
105 /* incorporate the latest ack into the tcp header */
106 tcp = (struct tcphdr *) (ip + 1);
107 tcp->th_ack = lro->ack_seq;
108 tcp->th_win = lro->window;
109 /* incorporate latest timestamp into the tcp header */
110 if (lro->timestamp) {
111 ts_ptr = (uint32_t *)(tcp + 1);
112 ts_ptr[1] = htonl(lro->tsval);
113 ts_ptr[2] = lro->tsecr;
116 * update checksum in tcp header by re-calculating the
117 * tcp pseudoheader checksum, and adding it to the checksum
118 * of the tcp payload data
121 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
122 tcp_csum = lro->data_csum;
123 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
124 htons(tcplen + IPPROTO_TCP));
125 tcp_csum += mxge_csum_generic((uint16_t*)tcp,
127 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
128 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
130 IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n",
131 in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
132 htons(tcplen + IPPROTO_TCP)),
133 mxge_csum_generic((uint16_t*)tcp,
135 htons(0xffff ^ tcp_csum));
137 tcp->th_sum = 0xffff ^ tcp_csum;
140 (*ifp->if_input)(mgp->ifp, lro->m_head);
141 ss->lro_queued += lro->append_cnt + 1;
146 SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
150 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
152 struct ether_header *eh;
156 struct mbuf *m_nxt, *m_tail;
157 struct lro_entry *lro;
158 int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
160 uint32_t seq, tmp_csum, device_mtu;
162 eh = mtod(m_head, struct ether_header *);
163 if (eh->ether_type != htons(ETHERTYPE_IP))
165 ip = (struct ip *) (eh + 1);
166 if (ip->ip_p != IPPROTO_TCP)
169 /* ensure there are no options */
170 if ((ip->ip_hl << 2) != sizeof (*ip))
173 /* .. and the packet is not fragmented */
174 if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
177 /* verify that the IP header checksum is correct */
178 tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
179 if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
184 /* find the TCP header */
185 tcp = (struct tcphdr *) (ip + 1);
187 /* ensure no bits set besides ack or psh */
188 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
191 /* check for timestamps. Since the only option we handle are
192 timestamps, we only have to handle the simple case of
193 aligned timestamps */
195 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
196 tcp_hdr_len = sizeof (*tcp) + opt_bytes;
197 ts_ptr = (uint32_t *)(tcp + 1);
198 if (opt_bytes != 0) {
199 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
200 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
204 ip_len = ntohs(ip->ip_len);
205 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
209 * If frame is padded beyond the end of the IP packet,
210 * then we must trim the extra bytes off the end.
212 tot_len = m_head->m_pkthdr.len;
213 trim = tot_len - (ip_len + ETHER_HDR_LEN);
216 /* truncated packet */
219 m_adj(m_head, -trim);
220 tot_len = m_head->m_pkthdr.len;
224 m_tail = NULL; /* -Wuninitialized */
225 while (m_nxt != NULL) {
227 m_nxt = m_tail->m_next;
230 hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
231 seq = ntohl(tcp->th_seq);
233 SLIST_FOREACH(lro, &ss->lro_active, next) {
234 if (lro->source_port == tcp->th_sport &&
235 lro->dest_port == tcp->th_dport &&
236 lro->source_ip == ip->ip_src.s_addr &&
237 lro->dest_ip == ip->ip_dst.s_addr) {
238 /* Try to append it */
240 if (__predict_false(seq != lro->next_seq)) {
241 /* out of order packet */
242 SLIST_REMOVE(&ss->lro_active, lro,
244 mxge_lro_flush(ss, lro);
249 uint32_t tsval = ntohl(*(ts_ptr + 1));
250 /* make sure timestamp values are increasing */
251 if (__predict_false(lro->tsval > tsval ||
252 *(ts_ptr + 2) == 0)) {
256 lro->tsecr = *(ts_ptr + 2);
259 lro->next_seq += tcp_data_len;
260 lro->ack_seq = tcp->th_ack;
261 lro->window = tcp->th_win;
263 if (tcp_data_len == 0) {
267 /* subtract off the checksum of the tcp header
268 * from the hardware checksum, and add it to the
269 * stored tcp data checksum. Byteswap the checksum
270 * if the total length so far is odd
272 tmp_csum = mxge_csum_generic((uint16_t*)tcp,
274 csum = csum + (tmp_csum ^ 0xffff);
275 csum = (csum & 0xffff) + (csum >> 16);
276 csum = (csum & 0xffff) + (csum >> 16);
277 if (lro->len & 0x1) {
278 /* Odd number of bytes so far, flip bytes */
279 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
281 csum = csum + lro->data_csum;
282 csum = (csum & 0xffff) + (csum >> 16);
283 csum = (csum & 0xffff) + (csum >> 16);
284 lro->data_csum = csum;
286 lro->len += tcp_data_len;
288 /* adjust mbuf so that m->m_data points to
289 the first byte of the payload */
291 /* append mbuf chain */
292 lro->m_tail->m_next = m_head;
293 /* advance the last pointer */
294 lro->m_tail = m_tail;
295 /* flush packet if required */
296 device_mtu = ss->sc->ifp->if_mtu;
297 if (lro->len > (65535 - device_mtu)) {
298 SLIST_REMOVE(&ss->lro_active, lro,
300 mxge_lro_flush(ss, lro);
306 if (SLIST_EMPTY(&ss->lro_free))
309 /* start a new chain */
310 lro = SLIST_FIRST(&ss->lro_free);
311 SLIST_REMOVE_HEAD(&ss->lro_free, next);
312 SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
313 lro->source_port = tcp->th_sport;
314 lro->dest_port = tcp->th_dport;
315 lro->source_ip = ip->ip_src.s_addr;
316 lro->dest_ip = ip->ip_dst.s_addr;
317 lro->next_seq = seq + tcp_data_len;
318 lro->mss = tcp_data_len;
319 lro->ack_seq = tcp->th_ack;
320 lro->window = tcp->th_win;
322 /* save the checksum of just the TCP payload by
323 * subtracting off the checksum of the TCP header from
324 * the entire hardware checksum
325 * Since IP header checksum is correct, checksum over
326 * the IP header is -0. Substracting -0 is unnecessary.
328 tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
329 csum = csum + (tmp_csum ^ 0xffff);
330 csum = (csum & 0xffff) + (csum >> 16);
331 csum = (csum & 0xffff) + (csum >> 16);
332 lro->data_csum = csum;
335 /* record timestamp if it is present */
338 lro->tsval = ntohl(*(ts_ptr + 1));
339 lro->tsecr = *(ts_ptr + 2);
342 lro->m_head = m_head;
343 lro->m_tail = m_tail;
349 This file uses Myri10GE driver indentation.