]> CyberLeo.Net >> Repos - FreeBSD/releng/9.0.git/blob - sys/dev/mxge/mxge_lro.c
Copy stable/9 to releng/9.0 as part of the FreeBSD 9.0-RELEASE release
[FreeBSD/releng/9.0.git] / sys / dev / mxge / mxge_lro.c
1 /******************************************************************************
2
3 Copyright (c) 2007-2008, Myricom Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Myricom Inc, nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/endian.h>
36 #include <sys/mbuf.h>
37 #include <sys/kernel.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/bus.h>
41
42 #include <net/if.h>
43 #include <net/ethernet.h>
44 #include <net/if_media.h>
45
46 #include <netinet/in_systm.h>
47 #include <netinet/in.h>
48 #include <netinet/ip.h>
49 #include <netinet/tcp.h>
50
51 #include <machine/bus.h>
52 #include <machine/in_cksum.h>
53
54 #include <dev/mxge/mxge_mcp.h>
55 #include <dev/mxge/if_mxge_var.h>
56
57 #include "opt_inet.h"
58
59 #ifdef INET
60
61 /* Assume len is a multiple of 4 */
62 static uint16_t
63 mxge_csum_generic(uint16_t *raw, int len)
64 {
65         uint32_t csum;
66         csum = 0;
67         while (len > 0) {
68                 csum += *raw;
69                 raw++;
70                 csum += *raw;
71                 raw++;
72                 len -= 4;
73         }
74         csum = (csum >> 16) + (csum & 0xffff);
75         csum = (csum >> 16) + (csum & 0xffff);
76         return (uint16_t)csum;
77 }
78
79
80 void
81 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
82 {
83         mxge_softc_t *mgp = ss->sc;
84         struct ifnet *ifp;
85         struct ip *ip;
86         struct tcphdr *tcp;
87         uint32_t *ts_ptr;
88         uint32_t tcplen, tcp_csum;
89
90         if (lro->append_cnt) {
91                 /* incorporate the new len into the ip header and
92                  * re-calculate the checksum */
93                 ip = lro->ip;
94                 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
95                 ip->ip_sum = 0;
96                 ip->ip_sum = 0xffff ^ 
97                         mxge_csum_generic((uint16_t*)ip,
98                                               sizeof (*ip));
99
100                 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
101                         CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
102                 lro->m_head->m_pkthdr.csum_data = 0xffff;
103                 lro->m_head->m_pkthdr.len = lro->len;
104
105                 /* incorporate the latest ack into the tcp header */
106                 tcp = (struct tcphdr *) (ip + 1);
107                 tcp->th_ack = lro->ack_seq;
108                 tcp->th_win = lro->window;
109                 /* incorporate latest timestamp into the tcp header */
110                 if (lro->timestamp) {
111                         ts_ptr = (uint32_t *)(tcp + 1);
112                         ts_ptr[1] = htonl(lro->tsval);
113                         ts_ptr[2] = lro->tsecr;
114                 }
115                 /* 
116                  * update checksum in tcp header by re-calculating the
117                  * tcp pseudoheader checksum, and adding it to the checksum
118                  * of the tcp payload data 
119                  */
120                 tcp->th_sum = 0;
121                 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
122                 tcp_csum = lro->data_csum;
123                 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
124                                       htons(tcplen + IPPROTO_TCP));
125                 tcp_csum += mxge_csum_generic((uint16_t*)tcp,
126                                                   tcp->th_off << 2);
127                 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
128                 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
129 #if 0
130                 IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n", 
131                       in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
132                                 htons(tcplen + IPPROTO_TCP)),
133                       mxge_csum_generic((uint16_t*)tcp,
134                                             tcp->th_off << 2),
135                       htons(0xffff ^ tcp_csum));
136 #endif
137                 tcp->th_sum = 0xffff ^ tcp_csum;
138         }
139         ifp = mgp->ifp;
140         (*ifp->if_input)(mgp->ifp, lro->m_head);
141         ss->lro_queued += lro->append_cnt + 1;
142         ss->lro_flushed++;
143         lro->m_head = NULL;
144         lro->timestamp = 0;
145         lro->append_cnt = 0;
146         SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
147 }
148
149 int
150 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
151 {
152         struct ether_header *eh;
153         struct ip *ip;
154         struct tcphdr *tcp;
155         uint32_t *ts_ptr;
156         struct mbuf *m_nxt, *m_tail;
157         struct lro_entry *lro;
158         int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
159         int opt_bytes, trim;
160         uint32_t seq, tmp_csum, device_mtu;
161
162         eh = mtod(m_head, struct ether_header *);
163         if (eh->ether_type != htons(ETHERTYPE_IP))
164                 return 1;
165         ip = (struct ip *) (eh + 1);
166         if (ip->ip_p != IPPROTO_TCP)
167                 return 1;
168         
169         /* ensure there are no options */
170         if ((ip->ip_hl << 2) != sizeof (*ip))
171                 return -1;
172
173         /* .. and the packet is not fragmented */
174         if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
175                 return -1;
176
177         /* verify that the IP header checksum is correct */
178         tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
179         if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
180                 ss->lro_bad_csum++;
181                 return -1;
182         }
183
184         /* find the TCP header */
185         tcp = (struct tcphdr *) (ip + 1);
186
187         /* ensure no bits set besides ack or psh */
188         if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
189                 return -1;
190
191         /* check for timestamps. Since the only option we handle are
192            timestamps, we only have to handle the simple case of
193            aligned timestamps */
194
195         opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
196         tcp_hdr_len =  sizeof (*tcp) + opt_bytes;
197         ts_ptr = (uint32_t *)(tcp + 1);
198         if (opt_bytes != 0) {
199                 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
200                     (*ts_ptr !=  ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
201                         return -1;
202         }
203
204         ip_len = ntohs(ip->ip_len);
205         tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
206         
207
208         /* 
209          * If frame is padded beyond the end of the IP packet,
210          * then we must trim the extra bytes off the end.
211          */
212         tot_len = m_head->m_pkthdr.len;
213         trim = tot_len - (ip_len + ETHER_HDR_LEN);
214         if (trim != 0) {
215                 if (trim < 0) {
216                         /* truncated packet */
217                         return -1;
218                 }
219                 m_adj(m_head, -trim);
220                 tot_len = m_head->m_pkthdr.len;
221         }
222
223         m_nxt = m_head;
224         m_tail = NULL; /* -Wuninitialized */
225         while (m_nxt != NULL) {
226                 m_tail = m_nxt;
227                 m_nxt = m_tail->m_next;
228         }
229
230         hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
231         seq = ntohl(tcp->th_seq);
232
233         SLIST_FOREACH(lro, &ss->lro_active, next) {
234                 if (lro->source_port == tcp->th_sport && 
235                     lro->dest_port == tcp->th_dport &&
236                     lro->source_ip == ip->ip_src.s_addr && 
237                     lro->dest_ip == ip->ip_dst.s_addr) {
238                         /* Try to append it */
239
240                         if (__predict_false(seq != lro->next_seq ||
241                                     (tcp_data_len == 0 &&
242                                      lro->ack_seq == tcp->th_ack))) {
243                                 /* out of order packet or dup ack */
244                                 SLIST_REMOVE(&ss->lro_active, lro,
245                                              lro_entry, next);
246                                 mxge_lro_flush(ss, lro);
247                                 return -1;
248                         }
249
250                         if (opt_bytes) {
251                                 uint32_t tsval = ntohl(*(ts_ptr + 1));
252                                 /* make sure timestamp values are increasing */
253                                 if (__predict_false(lro->tsval > tsval || 
254                                              *(ts_ptr + 2) == 0)) {
255                                         return -1;
256                                 }
257                                 lro->tsval = tsval;
258                                 lro->tsecr = *(ts_ptr + 2);
259                         }
260
261                         lro->next_seq += tcp_data_len;
262                         lro->ack_seq = tcp->th_ack;
263                         lro->window = tcp->th_win;
264                         lro->append_cnt++;
265                         if (tcp_data_len == 0) {
266                                 m_freem(m_head);
267                                 return 0;
268                         }
269                         /* subtract off the checksum of the tcp header
270                          * from the hardware checksum, and add it to the
271                          * stored tcp data checksum.  Byteswap the checksum
272                          * if the total length so far is odd 
273                          */
274                         tmp_csum = mxge_csum_generic((uint16_t*)tcp,
275                                                          tcp_hdr_len);
276                         csum = csum + (tmp_csum ^ 0xffff);
277                         csum = (csum & 0xffff) + (csum >> 16);
278                         csum = (csum & 0xffff) + (csum >> 16);
279                         if (lro->len & 0x1) {
280                                 /* Odd number of bytes so far, flip bytes */
281                                 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
282                         }
283                         csum = csum + lro->data_csum;
284                         csum = (csum & 0xffff) + (csum >> 16);
285                         csum = (csum & 0xffff) + (csum >> 16);
286                         lro->data_csum = csum;
287
288                         lro->len += tcp_data_len;
289
290                         /* adjust mbuf so that m->m_data points to
291                            the first byte of the payload */
292                         m_adj(m_head, hlen);
293                         /* append mbuf chain */
294                         lro->m_tail->m_next = m_head;
295                         /* advance the last pointer */
296                         lro->m_tail = m_tail;
297                         /* flush packet if required */
298                         device_mtu = ss->sc->ifp->if_mtu;
299                         if (lro->len > (65535 - device_mtu)) {
300                                 SLIST_REMOVE(&ss->lro_active, lro,
301                                              lro_entry, next);
302                                 mxge_lro_flush(ss, lro);
303                         }
304                         return 0;
305                 }
306         }
307
308         if (SLIST_EMPTY(&ss->lro_free))
309             return -1;
310
311         /* start a new chain */
312         lro = SLIST_FIRST(&ss->lro_free);
313         SLIST_REMOVE_HEAD(&ss->lro_free, next);
314         SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
315         lro->source_port = tcp->th_sport;
316         lro->dest_port = tcp->th_dport;
317         lro->source_ip = ip->ip_src.s_addr;
318         lro->dest_ip = ip->ip_dst.s_addr;
319         lro->next_seq = seq + tcp_data_len;
320         lro->mss = tcp_data_len;
321         lro->ack_seq = tcp->th_ack;
322         lro->window = tcp->th_win;
323
324         /* save the checksum of just the TCP payload by
325          * subtracting off the checksum of the TCP header from
326          * the entire hardware checksum 
327          * Since IP header checksum is correct, checksum over
328          * the IP header is -0.  Substracting -0 is unnecessary.
329          */
330         tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
331         csum = csum + (tmp_csum ^ 0xffff);
332         csum = (csum & 0xffff) + (csum >> 16);
333         csum = (csum & 0xffff) + (csum >> 16);
334         lro->data_csum = csum;
335         
336         lro->ip = ip;
337         /* record timestamp if it is present */
338         if (opt_bytes) {
339                 lro->timestamp = 1;
340                 lro->tsval = ntohl(*(ts_ptr + 1));
341                 lro->tsecr = *(ts_ptr + 2);
342         }
343         lro->len = tot_len;
344         lro->m_head = m_head;
345         lro->m_tail = m_tail;
346         return 0;
347 }
348
349 #endif /* INET */
350 /*
351   This file uses Myri10GE driver indentation.
352
353   Local Variables:
354   c-file-style:"linux"
355   tab-width:8
356   End:
357 */