]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - sys/dev/mxge/mxge_lro.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / sys / dev / mxge / mxge_lro.c
1 /******************************************************************************
2
3 Copyright (c) 2007-2008, Myricom Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Myricom Inc, nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/endian.h>
36 #include <sys/mbuf.h>
37 #include <sys/kernel.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/bus.h>
41
42 #include <net/if.h>
43 #include <net/ethernet.h>
44 #include <net/if_media.h>
45
46 #include <netinet/in_systm.h>
47 #include <netinet/in.h>
48 #include <netinet/ip.h>
49 #include <netinet/tcp.h>
50
51 #include <machine/bus.h>
52 #include <machine/in_cksum.h>
53
54 #include <dev/mxge/mxge_mcp.h>
55 #include <dev/mxge/if_mxge_var.h>
56
57
58 /* Assume len is a multiple of 4 */
59 static uint16_t
60 mxge_csum_generic(uint16_t *raw, int len)
61 {
62         uint32_t csum;
63         csum = 0;
64         while (len > 0) {
65                 csum += *raw;
66                 raw++;
67                 csum += *raw;
68                 raw++;
69                 len -= 4;
70         }
71         csum = (csum >> 16) + (csum & 0xffff);
72         csum = (csum >> 16) + (csum & 0xffff);
73         return (uint16_t)csum;
74 }
75
76
77 void
78 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
79 {
80         mxge_softc_t *mgp = ss->sc;
81         struct ifnet *ifp;
82         struct ip *ip;
83         struct tcphdr *tcp;
84         uint32_t *ts_ptr;
85         uint32_t tcplen, tcp_csum;
86
87         if (lro->append_cnt) {
88                 /* incorporate the new len into the ip header and
89                  * re-calculate the checksum */
90                 ip = lro->ip;
91                 ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
92                 ip->ip_sum = 0;
93                 ip->ip_sum = 0xffff ^ 
94                         mxge_csum_generic((uint16_t*)ip,
95                                               sizeof (*ip));
96
97                 lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
98                         CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
99                 lro->m_head->m_pkthdr.csum_data = 0xffff;
100                 lro->m_head->m_pkthdr.len = lro->len;
101
102                 /* incorporate the latest ack into the tcp header */
103                 tcp = (struct tcphdr *) (ip + 1);
104                 tcp->th_ack = lro->ack_seq;
105                 tcp->th_win = lro->window;
106                 /* incorporate latest timestamp into the tcp header */
107                 if (lro->timestamp) {
108                         ts_ptr = (uint32_t *)(tcp + 1);
109                         ts_ptr[1] = htonl(lro->tsval);
110                         ts_ptr[2] = lro->tsecr;
111                 }
112                 /* 
113                  * update checksum in tcp header by re-calculating the
114                  * tcp pseudoheader checksum, and adding it to the checksum
115                  * of the tcp payload data 
116                  */
117                 tcp->th_sum = 0;
118                 tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
119                 tcp_csum = lro->data_csum;
120                 tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
121                                       htons(tcplen + IPPROTO_TCP));
122                 tcp_csum += mxge_csum_generic((uint16_t*)tcp,
123                                                   tcp->th_off << 2);
124                 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
125                 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
126 #if 0
127                 IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n", 
128                       in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
129                                 htons(tcplen + IPPROTO_TCP)),
130                       mxge_csum_generic((uint16_t*)tcp,
131                                             tcp->th_off << 2),
132                       htons(0xffff ^ tcp_csum));
133 #endif
134                 tcp->th_sum = 0xffff ^ tcp_csum;
135         }
136         ifp = mgp->ifp;
137         (*ifp->if_input)(mgp->ifp, lro->m_head);
138         ss->lro_queued += lro->append_cnt + 1;
139         ss->lro_flushed++;
140         lro->m_head = NULL;
141         lro->timestamp = 0;
142         lro->append_cnt = 0;
143         SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
144 }
145
146 int
147 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
148 {
149         struct ether_header *eh;
150         struct ip *ip;
151         struct tcphdr *tcp;
152         uint32_t *ts_ptr;
153         struct mbuf *m_nxt, *m_tail;
154         struct lro_entry *lro;
155         int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
156         int opt_bytes, trim;
157         uint32_t seq, tmp_csum, device_mtu;
158
159         eh = mtod(m_head, struct ether_header *);
160         if (eh->ether_type != htons(ETHERTYPE_IP))
161                 return 1;
162         ip = (struct ip *) (eh + 1);
163         if (ip->ip_p != IPPROTO_TCP)
164                 return 1;
165         
166         /* ensure there are no options */
167         if ((ip->ip_hl << 2) != sizeof (*ip))
168                 return -1;
169
170         /* .. and the packet is not fragmented */
171         if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
172                 return -1;
173
174         /* verify that the IP header checksum is correct */
175         tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
176         if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
177                 ss->lro_bad_csum++;
178                 return -1;
179         }
180
181         /* find the TCP header */
182         tcp = (struct tcphdr *) (ip + 1);
183
184         /* ensure no bits set besides ack or psh */
185         if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
186                 return -1;
187
188         /* check for timestamps. Since the only option we handle are
189            timestamps, we only have to handle the simple case of
190            aligned timestamps */
191
192         opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
193         tcp_hdr_len =  sizeof (*tcp) + opt_bytes;
194         ts_ptr = (uint32_t *)(tcp + 1);
195         if (opt_bytes != 0) {
196                 if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
197                     (*ts_ptr !=  ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
198                         return -1;
199         }
200
201         ip_len = ntohs(ip->ip_len);
202         tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
203         
204
205         /* 
206          * If frame is padded beyond the end of the IP packet,
207          * then we must trim the extra bytes off the end.
208          */
209         tot_len = m_head->m_pkthdr.len;
210         trim = tot_len - (ip_len + ETHER_HDR_LEN);
211         if (trim != 0) {
212                 if (trim < 0) {
213                         /* truncated packet */
214                         return -1;
215                 }
216                 m_adj(m_head, -trim);
217                 tot_len = m_head->m_pkthdr.len;
218         }
219
220         m_nxt = m_head;
221         m_tail = NULL; /* -Wuninitialized */
222         while (m_nxt != NULL) {
223                 m_tail = m_nxt;
224                 m_nxt = m_tail->m_next;
225         }
226
227         hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
228         seq = ntohl(tcp->th_seq);
229
230         SLIST_FOREACH(lro, &ss->lro_active, next) {
231                 if (lro->source_port == tcp->th_sport && 
232                     lro->dest_port == tcp->th_dport &&
233                     lro->source_ip == ip->ip_src.s_addr && 
234                     lro->dest_ip == ip->ip_dst.s_addr) {
235                         /* Try to append it */
236
237                         if (__predict_false(seq != lro->next_seq)) {
238                                 /* out of order packet */
239                                 SLIST_REMOVE(&ss->lro_active, lro,
240                                              lro_entry, next);
241                                 mxge_lro_flush(ss, lro);
242                                 return -1;
243                         }
244
245                         if (opt_bytes) {
246                                 uint32_t tsval = ntohl(*(ts_ptr + 1));
247                                 /* make sure timestamp values are increasing */
248                                 if (__predict_false(lro->tsval > tsval || 
249                                              *(ts_ptr + 2) == 0)) {
250                                         return -1;
251                                 }
252                                 lro->tsval = tsval;
253                                 lro->tsecr = *(ts_ptr + 2);
254                         }
255
256                         lro->next_seq += tcp_data_len;
257                         lro->ack_seq = tcp->th_ack;
258                         lro->window = tcp->th_win;
259                         lro->append_cnt++;
260                         if (tcp_data_len == 0) {
261                                 m_freem(m_head);
262                                 return 0;
263                         }
264                         /* subtract off the checksum of the tcp header
265                          * from the hardware checksum, and add it to the
266                          * stored tcp data checksum.  Byteswap the checksum
267                          * if the total length so far is odd 
268                          */
269                         tmp_csum = mxge_csum_generic((uint16_t*)tcp,
270                                                          tcp_hdr_len);
271                         csum = csum + (tmp_csum ^ 0xffff);
272                         csum = (csum & 0xffff) + (csum >> 16);
273                         csum = (csum & 0xffff) + (csum >> 16);
274                         if (lro->len & 0x1) {
275                                 /* Odd number of bytes so far, flip bytes */
276                                 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
277                         }
278                         csum = csum + lro->data_csum;
279                         csum = (csum & 0xffff) + (csum >> 16);
280                         csum = (csum & 0xffff) + (csum >> 16);
281                         lro->data_csum = csum;
282
283                         lro->len += tcp_data_len;
284
285                         /* adjust mbuf so that m->m_data points to
286                            the first byte of the payload */
287                         m_adj(m_head, hlen);
288                         /* append mbuf chain */
289                         lro->m_tail->m_next = m_head;
290                         /* advance the last pointer */
291                         lro->m_tail = m_tail;
292                         /* flush packet if required */
293                         device_mtu = ss->sc->ifp->if_mtu;
294                         if (lro->len > (65535 - device_mtu)) {
295                                 SLIST_REMOVE(&ss->lro_active, lro,
296                                              lro_entry, next);
297                                 mxge_lro_flush(ss, lro);
298                         }
299                         return 0;
300                 }
301         }
302
303         if (SLIST_EMPTY(&ss->lro_free))
304             return -1;
305
306         /* start a new chain */
307         lro = SLIST_FIRST(&ss->lro_free);
308         SLIST_REMOVE_HEAD(&ss->lro_free, next);
309         SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
310         lro->source_port = tcp->th_sport;
311         lro->dest_port = tcp->th_dport;
312         lro->source_ip = ip->ip_src.s_addr;
313         lro->dest_ip = ip->ip_dst.s_addr;
314         lro->next_seq = seq + tcp_data_len;
315         lro->mss = tcp_data_len;
316         lro->ack_seq = tcp->th_ack;
317         lro->window = tcp->th_win;
318
319         /* save the checksum of just the TCP payload by
320          * subtracting off the checksum of the TCP header from
321          * the entire hardware checksum 
322          * Since IP header checksum is correct, checksum over
323          * the IP header is -0.  Substracting -0 is unnecessary.
324          */
325         tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
326         csum = csum + (tmp_csum ^ 0xffff);
327         csum = (csum & 0xffff) + (csum >> 16);
328         csum = (csum & 0xffff) + (csum >> 16);
329         lro->data_csum = csum;
330         
331         lro->ip = ip;
332         /* record timestamp if it is present */
333         if (opt_bytes) {
334                 lro->timestamp = 1;
335                 lro->tsval = ntohl(*(ts_ptr + 1));
336                 lro->tsecr = *(ts_ptr + 2);
337         }
338         lro->len = tot_len;
339         lro->m_head = m_head;
340         lro->m_tail = m_tail;
341         return 0;
342 }
343 /*
344   This file uses Myri10GE driver indentation.
345
346   Local Variables:
347   c-file-style:"linux"
348   tab-width:8
349   End:
350 */