1 /******************************************************************************
3 Copyright (c) 2007-2008, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
43 #include <net/ethernet.h>
44 #include <net/if_media.h>
46 #include <netinet/in_systm.h>
47 #include <netinet/in.h>
48 #include <netinet/ip.h>
49 #include <netinet/tcp.h>
51 #include <machine/bus.h>
52 #include <machine/in_cksum.h>
54 #include <dev/mxge/mxge_mcp.h>
55 #include <dev/mxge/if_mxge_var.h>
61 /* Assume len is a multiple of 4 */
63 mxge_csum_generic(uint16_t *raw, int len)
74 csum = (csum >> 16) + (csum & 0xffff);
75 csum = (csum >> 16) + (csum & 0xffff);
76 return (uint16_t)csum;
81 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
83 mxge_softc_t *mgp = ss->sc;
88 uint32_t tcplen, tcp_csum;
90 if (lro->append_cnt) {
91 /* incorporate the new len into the ip header and
92 * re-calculate the checksum */
94 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
97 mxge_csum_generic((uint16_t*)ip,
100 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
101 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
102 lro->m_head->m_pkthdr.csum_data = 0xffff;
103 lro->m_head->m_pkthdr.len = lro->len;
105 /* incorporate the latest ack into the tcp header */
106 tcp = (struct tcphdr *) (ip + 1);
107 tcp->th_ack = lro->ack_seq;
108 tcp->th_win = lro->window;
109 /* incorporate latest timestamp into the tcp header */
110 if (lro->timestamp) {
111 ts_ptr = (uint32_t *)(tcp + 1);
112 ts_ptr[1] = htonl(lro->tsval);
113 ts_ptr[2] = lro->tsecr;
116 * update checksum in tcp header by re-calculating the
117 * tcp pseudoheader checksum, and adding it to the checksum
118 * of the tcp payload data
121 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
122 tcp_csum = lro->data_csum;
123 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
124 htons(tcplen + IPPROTO_TCP));
125 tcp_csum += mxge_csum_generic((uint16_t*)tcp,
127 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
128 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
130 IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n",
131 in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
132 htons(tcplen + IPPROTO_TCP)),
133 mxge_csum_generic((uint16_t*)tcp,
135 htons(0xffff ^ tcp_csum));
137 tcp->th_sum = 0xffff ^ tcp_csum;
140 (*ifp->if_input)(mgp->ifp, lro->m_head);
141 ss->lro_queued += lro->append_cnt + 1;
146 SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
150 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
152 struct ether_header *eh;
156 struct mbuf *m_nxt, *m_tail;
157 struct lro_entry *lro;
158 int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
160 uint32_t seq, tmp_csum, device_mtu;
162 eh = mtod(m_head, struct ether_header *);
163 if (eh->ether_type != htons(ETHERTYPE_IP))
165 ip = (struct ip *) (eh + 1);
166 if (ip->ip_p != IPPROTO_TCP)
169 /* ensure there are no options */
170 if ((ip->ip_hl << 2) != sizeof (*ip))
173 /* .. and the packet is not fragmented */
174 if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
177 /* verify that the IP header checksum is correct */
178 tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
179 if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
184 /* find the TCP header */
185 tcp = (struct tcphdr *) (ip + 1);
187 /* ensure no bits set besides ack or psh */
188 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
191 /* check for timestamps. Since the only option we handle are
192 timestamps, we only have to handle the simple case of
193 aligned timestamps */
195 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
196 tcp_hdr_len = sizeof (*tcp) + opt_bytes;
197 ts_ptr = (uint32_t *)(tcp + 1);
198 if (opt_bytes != 0) {
199 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
200 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
204 ip_len = ntohs(ip->ip_len);
205 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
209 * If frame is padded beyond the end of the IP packet,
210 * then we must trim the extra bytes off the end.
212 tot_len = m_head->m_pkthdr.len;
213 trim = tot_len - (ip_len + ETHER_HDR_LEN);
216 /* truncated packet */
219 m_adj(m_head, -trim);
220 tot_len = m_head->m_pkthdr.len;
224 m_tail = NULL; /* -Wuninitialized */
225 while (m_nxt != NULL) {
227 m_nxt = m_tail->m_next;
230 hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
231 seq = ntohl(tcp->th_seq);
233 SLIST_FOREACH(lro, &ss->lro_active, next) {
234 if (lro->source_port == tcp->th_sport &&
235 lro->dest_port == tcp->th_dport &&
236 lro->source_ip == ip->ip_src.s_addr &&
237 lro->dest_ip == ip->ip_dst.s_addr) {
238 /* Try to append it */
240 if (__predict_false(seq != lro->next_seq ||
241 (tcp_data_len == 0 &&
242 lro->ack_seq == tcp->th_ack))) {
243 /* out of order packet or dup ack */
244 SLIST_REMOVE(&ss->lro_active, lro,
246 mxge_lro_flush(ss, lro);
251 uint32_t tsval = ntohl(*(ts_ptr + 1));
252 /* make sure timestamp values are increasing */
253 if (__predict_false(lro->tsval > tsval ||
254 *(ts_ptr + 2) == 0)) {
258 lro->tsecr = *(ts_ptr + 2);
261 lro->next_seq += tcp_data_len;
262 lro->ack_seq = tcp->th_ack;
263 lro->window = tcp->th_win;
265 if (tcp_data_len == 0) {
269 /* subtract off the checksum of the tcp header
270 * from the hardware checksum, and add it to the
271 * stored tcp data checksum. Byteswap the checksum
272 * if the total length so far is odd
274 tmp_csum = mxge_csum_generic((uint16_t*)tcp,
276 csum = csum + (tmp_csum ^ 0xffff);
277 csum = (csum & 0xffff) + (csum >> 16);
278 csum = (csum & 0xffff) + (csum >> 16);
279 if (lro->len & 0x1) {
280 /* Odd number of bytes so far, flip bytes */
281 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
283 csum = csum + lro->data_csum;
284 csum = (csum & 0xffff) + (csum >> 16);
285 csum = (csum & 0xffff) + (csum >> 16);
286 lro->data_csum = csum;
288 lro->len += tcp_data_len;
290 /* adjust mbuf so that m->m_data points to
291 the first byte of the payload */
293 /* append mbuf chain */
294 lro->m_tail->m_next = m_head;
295 /* advance the last pointer */
296 lro->m_tail = m_tail;
297 /* flush packet if required */
298 device_mtu = ss->sc->ifp->if_mtu;
299 if (lro->len > (65535 - device_mtu)) {
300 SLIST_REMOVE(&ss->lro_active, lro,
302 mxge_lro_flush(ss, lro);
308 if (SLIST_EMPTY(&ss->lro_free))
311 /* start a new chain */
312 lro = SLIST_FIRST(&ss->lro_free);
313 SLIST_REMOVE_HEAD(&ss->lro_free, next);
314 SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
315 lro->source_port = tcp->th_sport;
316 lro->dest_port = tcp->th_dport;
317 lro->source_ip = ip->ip_src.s_addr;
318 lro->dest_ip = ip->ip_dst.s_addr;
319 lro->next_seq = seq + tcp_data_len;
320 lro->mss = tcp_data_len;
321 lro->ack_seq = tcp->th_ack;
322 lro->window = tcp->th_win;
324 /* save the checksum of just the TCP payload by
325 * subtracting off the checksum of the TCP header from
326 * the entire hardware checksum
327 * Since IP header checksum is correct, checksum over
328 * the IP header is -0. Substracting -0 is unnecessary.
330 tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
331 csum = csum + (tmp_csum ^ 0xffff);
332 csum = (csum & 0xffff) + (csum >> 16);
333 csum = (csum & 0xffff) + (csum >> 16);
334 lro->data_csum = csum;
337 /* record timestamp if it is present */
340 lro->tsval = ntohl(*(ts_ptr + 1));
341 lro->tsecr = *(ts_ptr + 2);
344 lro->m_head = m_head;
345 lro->m_tail = m_tail;
351 This file uses Myri10GE driver indentation.