]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/netmap/netmap_offloadings.c
Update DTS files from Linux 4.12
[FreeBSD/FreeBSD.git] / sys / dev / netmap / netmap_offloadings.c
1 /*
2  * Copyright (C) 2014-2015 Vincenzo Maffione
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 /* $FreeBSD$ */
28
29 #if defined(__FreeBSD__)
30 #include <sys/cdefs.h> /* prerequisite */
31
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/param.h>  /* defines used in kernel.h */
35 #include <sys/kernel.h> /* types used in module initialization */
36 #include <sys/sockio.h>
37 #include <sys/malloc.h>
38 #include <sys/socketvar.h>      /* struct socket */
39 #include <sys/socket.h> /* sockaddrs */
40 #include <net/if.h>
41 #include <net/if_var.h>
42 #include <machine/bus.h>        /* bus_dmamap_* */
43 #include <sys/endian.h>
44
45 #elif defined(linux)
46
47 #include "bsd_glue.h"
48
49 #elif defined(__APPLE__)
50
51 #warning OSX support is only partial
52 #include "osx_glue.h"
53
54 #else
55
56 #error  Unsupported platform
57
58 #endif /* unsupported */
59
60 #include <net/netmap.h>
61 #include <dev/netmap/netmap_kern.h>
62
63
64
65 /* This routine is called by bdg_mismatch_datapath() when it finishes
66  * accumulating bytes for a segment, in order to fix some fields in the
67  * segment headers (which still contain the same content as the header
68  * of the original GSO packet). 'pkt' points to the beginning of the IP
69  * header of the segment, while 'len' is the length of the IP packet.
70  */
71 static void
72 gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
73                 u_int idx, u_int segmented_bytes, u_int last_segment)
74 {
75         struct nm_iphdr *iph = (struct nm_iphdr *)(pkt);
76         struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt);
77         uint16_t *check = NULL;
78         uint8_t *check_data = NULL;
79
80         if (ipv4) {
81                 /* Set the IPv4 "Total Length" field. */
82                 iph->tot_len = htobe16(len);
83                 ND("ip total length %u", be16toh(ip->tot_len));
84
85                 /* Set the IPv4 "Identification" field. */
86                 iph->id = htobe16(be16toh(iph->id) + idx);
87                 ND("ip identification %u", be16toh(iph->id));
88
89                 /* Compute and insert the IPv4 header checksum. */
90                 iph->check = 0;
91                 iph->check = nm_os_csum_ipv4(iph);
92                 ND("IP csum %x", be16toh(iph->check));
93         } else {
94                 /* Set the IPv6 "Payload Len" field. */
95                 ip6h->payload_len = htobe16(len-iphlen);
96         }
97
98         if (tcp) {
99                 struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen);
100
101                 /* Set the TCP sequence number. */
102                 tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
103                 ND("tcp seq %u", be32toh(tcph->seq));
104
105                 /* Zero the PSH and FIN TCP flags if this is not the last
106                    segment. */
107                 if (!last_segment)
108                         tcph->flags &= ~(0x8 | 0x1);
109                 ND("last_segment %u", last_segment);
110
111                 check = &tcph->check;
112                 check_data = (uint8_t *)tcph;
113         } else { /* UDP */
114                 struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen);
115
116                 /* Set the UDP 'Length' field. */
117                 udph->len = htobe16(len-iphlen);
118
119                 check = &udph->check;
120                 check_data = (uint8_t *)udph;
121         }
122
123         /* Compute and insert TCP/UDP checksum. */
124         *check = 0;
125         if (ipv4)
126                 nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check);
127         else
128                 nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check);
129
130         ND("TCP/UDP csum %x", be16toh(*check));
131 }
132
133 static int
134 vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
135 {
136         uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
137
138         return (
139                 (gso_type != VIRTIO_NET_HDR_GSO_NONE &&
140                  gso_type != VIRTIO_NET_HDR_GSO_TCPV4 &&
141                  gso_type != VIRTIO_NET_HDR_GSO_UDP &&
142                  gso_type != VIRTIO_NET_HDR_GSO_TCPV6)
143                 ||
144                  (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM
145                                | VIRTIO_NET_HDR_F_DATA_VALID))
146                );
147 }
148
149 /* The VALE mismatch datapath implementation. */
150 void
151 bdg_mismatch_datapath(struct netmap_vp_adapter *na,
152                       struct netmap_vp_adapter *dst_na,
153                       const struct nm_bdg_fwd *ft_p,
154                       struct netmap_ring *dst_ring,
155                       u_int *j, u_int lim, u_int *howmany)
156 {
157         struct netmap_slot *dst_slot = NULL;
158         struct nm_vnet_hdr *vh = NULL;
159         const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags;
160
161         /* Source and destination pointers. */
162         uint8_t *dst, *src;
163         size_t src_len, dst_len;
164
165         /* Indices and counters for the destination ring. */
166         u_int j_start = *j;
167         u_int j_cur = j_start;
168         u_int dst_slots = 0;
169
170         if (unlikely(ft_p == ft_end)) {
171                 RD(3, "No source slots to process");
172                 return;
173         }
174
175         /* Init source and dest pointers. */
176         src = ft_p->ft_buf;
177         src_len = ft_p->ft_len;
178         dst_slot = &dst_ring->slot[j_cur];
179         dst = NMB(&dst_na->up, dst_slot);
180         dst_len = src_len;
181
182         /* If the source port uses the offloadings, while destination doesn't,
183          * we grab the source virtio-net header and do the offloadings here.
184          */
185         if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) {
186                 vh = (struct nm_vnet_hdr *)src;
187                 /* Initial sanity check on the source virtio-net header. If
188                  * something seems wrong, just drop the packet. */
189                 if (src_len < na->up.virt_hdr_len) {
190                         RD(3, "Short src vnet header, dropping");
191                         return;
192                 }
193                 if (vnet_hdr_is_bad(vh)) {
194                         RD(3, "Bad src vnet header, dropping");
195                         return;
196                 }
197         }
198
199         /* We are processing the first input slot and there is a mismatch
200          * between source and destination virt_hdr_len (SHL and DHL).
201          * When the a client is using virtio-net headers, the header length
202          * can be:
203          *    - 10: the header corresponds to the struct nm_vnet_hdr
204          *    - 12: the first 10 bytes correspond to the struct
205          *          virtio_net_hdr, and the last 2 bytes store the
206          *          "mergeable buffers" info, which is an optional
207          *          hint that can be zeroed for compatibility
208          *
209          * The destination header is therefore built according to the
210          * following table:
211          *
212          * SHL | DHL | destination header
213          * -----------------------------
214          *   0 |  10 | zero
215          *   0 |  12 | zero
216          *  10 |   0 | doesn't exist
217          *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
218          *  12 |   0 | doesn't exist
219          *  12 |  10 | copied from the first 10 bytes of source header
220          */
221         bzero(dst, dst_na->up.virt_hdr_len);
222         if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len)
223                 memcpy(dst, src, sizeof(struct nm_vnet_hdr));
224         /* Skip the virtio-net headers. */
225         src += na->up.virt_hdr_len;
226         src_len -= na->up.virt_hdr_len;
227         dst += dst_na->up.virt_hdr_len;
228         dst_len = dst_na->up.virt_hdr_len + src_len;
229
230         /* Here it could be dst_len == 0 (which implies src_len == 0),
231          * so we avoid passing a zero length fragment.
232          */
233         if (dst_len == 0) {
234                 ft_p++;
235                 src = ft_p->ft_buf;
236                 src_len = ft_p->ft_len;
237                 dst_len = src_len;
238         }
239
240         if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
241                 u_int gso_bytes = 0;
242                 /* Length of the GSO packet header. */
243                 u_int gso_hdr_len = 0;
244                 /* Pointer to the GSO packet header. Assume it is in a single fragment. */
245                 uint8_t *gso_hdr = NULL;
246                 /* Index of the current segment. */
247                 u_int gso_idx = 0;
248                 /* Payload data bytes segmented so far (e.g. TCP data bytes). */
249                 u_int segmented_bytes = 0;
250                 /* Is this an IPv4 or IPv6 GSO packet? */
251                 u_int ipv4 = 0;
252                 /* Length of the IP header (20 if IPv4, 40 if IPv6). */
253                 u_int iphlen = 0;
254                 /* Length of the Ethernet header (18 if 802.1q, otherwise 14). */
255                 u_int ethhlen = 14;
256                 /* Is this a TCP or an UDP GSO packet? */
257                 u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
258                                 == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
259
260                 /* Segment the GSO packet contained into the input slots (frags). */
261                 for (;;) {
262                         size_t copy;
263
264                         if (dst_slots >= *howmany) {
265                                 /* We still have work to do, but we've run out of
266                                  * dst slots, so we have to drop the packet. */
267                                 RD(3, "Not enough slots, dropping GSO packet");
268                                 return;
269                         }
270
271                         /* Grab the GSO header if we don't have it. */
272                         if (!gso_hdr) {
273                                 uint16_t ethertype;
274
275                                 gso_hdr = src;
276
277                                 /* Look at the 'Ethertype' field to see if this packet
278                                  * is IPv4 or IPv6, taking into account VLAN
279                                  * encapsulation. */
280                                 for (;;) {
281                                         if (src_len < ethhlen) {
282                                                 RD(3, "Short GSO fragment [eth], dropping");
283                                                 return;
284                                         }
285                                         ethertype = be16toh(*((uint16_t *)
286                                                             (gso_hdr + ethhlen - 2)));
287                                         if (ethertype != 0x8100) /* not 802.1q */
288                                                 break;
289                                         ethhlen += 4;
290                                 }
291                                 switch (ethertype) {
292                                         case 0x0800:  /* IPv4 */
293                                         {
294                                                 struct nm_iphdr *iph = (struct nm_iphdr *)
295                                                                         (gso_hdr + ethhlen);
296
297                                                 if (src_len < ethhlen + 20) {
298                                                         RD(3, "Short GSO fragment "
299                                                               "[IPv4], dropping");
300                                                         return;
301                                                 }
302                                                 ipv4 = 1;
303                                                 iphlen = 4 * (iph->version_ihl & 0x0F);
304                                                 break;
305                                         }
306                                         case 0x86DD:  /* IPv6 */
307                                                 ipv4 = 0;
308                                                 iphlen = 40;
309                                                 break;
310                                         default:
311                                                 RD(3, "Unsupported ethertype, "
312                                                       "dropping GSO packet");
313                                                 return;
314                                 }
315                                 ND(3, "type=%04x", ethertype);
316
317                                 if (src_len < ethhlen + iphlen) {
318                                         RD(3, "Short GSO fragment [IP], dropping");
319                                         return;
320                                 }
321
322                                 /* Compute gso_hdr_len. For TCP we need to read the
323                                  * content of the 'Data Offset' field.
324                                  */
325                                 if (tcp) {
326                                         struct nm_tcphdr *tcph = (struct nm_tcphdr *)
327                                                                 (gso_hdr + ethhlen + iphlen);
328
329                                         if (src_len < ethhlen + iphlen + 20) {
330                                                 RD(3, "Short GSO fragment "
331                                                                 "[TCP], dropping");
332                                                 return;
333                                         }
334                                         gso_hdr_len = ethhlen + iphlen +
335                                                       4 * (tcph->doff >> 4);
336                                 } else {
337                                         gso_hdr_len = ethhlen + iphlen + 8; /* UDP */
338                                 }
339
340                                 if (src_len < gso_hdr_len) {
341                                         RD(3, "Short GSO fragment [TCP/UDP], dropping");
342                                         return;
343                                 }
344
345                                 ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
346                                                                    dst_na->mfs);
347
348                                 /* Advance source pointers. */
349                                 src += gso_hdr_len;
350                                 src_len -= gso_hdr_len;
351                                 if (src_len == 0) {
352                                         ft_p++;
353                                         if (ft_p == ft_end)
354                                                 break;
355                                         src = ft_p->ft_buf;
356                                         src_len = ft_p->ft_len;
357                                 }
358                         }
359
360                         /* Fill in the header of the current segment. */
361                         if (gso_bytes == 0) {
362                                 memcpy(dst, gso_hdr, gso_hdr_len);
363                                 gso_bytes = gso_hdr_len;
364                         }
365
366                         /* Fill in data and update source and dest pointers. */
367                         copy = src_len;
368                         if (gso_bytes + copy > dst_na->mfs)
369                                 copy = dst_na->mfs - gso_bytes;
370                         memcpy(dst + gso_bytes, src, copy);
371                         gso_bytes += copy;
372                         src += copy;
373                         src_len -= copy;
374
375                         /* A segment is complete or we have processed all the
376                            the GSO payload bytes. */
377                         if (gso_bytes >= dst_na->mfs ||
378                                 (src_len == 0 && ft_p + 1 == ft_end)) {
379                                 /* After raw segmentation, we must fix some header
380                                  * fields and compute checksums, in a protocol dependent
381                                  * way. */
382                                 gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen,
383                                                 ipv4, iphlen, tcp,
384                                                 gso_idx, segmented_bytes,
385                                                 src_len == 0 && ft_p + 1 == ft_end);
386
387                                 ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
388                                 dst_slot->len = gso_bytes;
389                                 dst_slot->flags = 0;
390                                 dst_slots++;
391                                 segmented_bytes += gso_bytes - gso_hdr_len;
392
393                                 gso_bytes = 0;
394                                 gso_idx++;
395
396                                 /* Next destination slot. */
397                                 j_cur = nm_next(j_cur, lim);
398                                 dst_slot = &dst_ring->slot[j_cur];
399                                 dst = NMB(&dst_na->up, dst_slot);
400                         }
401
402                         /* Next input slot. */
403                         if (src_len == 0) {
404                                 ft_p++;
405                                 if (ft_p == ft_end)
406                                         break;
407                                 src = ft_p->ft_buf;
408                                 src_len = ft_p->ft_len;
409                         }
410                 }
411                 ND(3, "%d bytes segmented", segmented_bytes);
412
413         } else {
414                 /* Address of a checksum field into a destination slot. */
415                 uint16_t *check = NULL;
416                 /* Accumulator for an unfolded checksum. */
417                 rawsum_t csum = 0;
418
419                 /* Process a non-GSO packet. */
420
421                 /* Init 'check' if necessary. */
422                 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
423                         if (unlikely(vh->csum_offset + vh->csum_start > src_len))
424                                 D("invalid checksum request");
425                         else
426                                 check = (uint16_t *)(dst + vh->csum_start +
427                                                 vh->csum_offset);
428                 }
429
430                 while (ft_p != ft_end) {
431                         /* Init/update the packet checksum if needed. */
432                         if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
433                                 if (!dst_slots)
434                                         csum = nm_os_csum_raw(src + vh->csum_start,
435                                                                 src_len - vh->csum_start, 0);
436                                 else
437                                         csum = nm_os_csum_raw(src, src_len, csum);
438                         }
439
440                         /* Round to a multiple of 64 */
441                         src_len = (src_len + 63) & ~63;
442
443                         if (ft_p->ft_flags & NS_INDIRECT) {
444                                 if (copyin(src, dst, src_len)) {
445                                         /* Invalid user pointer, pretend len is 0. */
446                                         dst_len = 0;
447                                 }
448                         } else {
449                                 memcpy(dst, src, (int)src_len);
450                         }
451                         dst_slot->len = dst_len;
452                         dst_slots++;
453
454                         /* Next destination slot. */
455                         j_cur = nm_next(j_cur, lim);
456                         dst_slot = &dst_ring->slot[j_cur];
457                         dst = NMB(&dst_na->up, dst_slot);
458
459                         /* Next source slot. */
460                         ft_p++;
461                         src = ft_p->ft_buf;
462                         dst_len = src_len = ft_p->ft_len;
463                 }
464
465                 /* Finalize (fold) the checksum if needed. */
466                 if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
467                         *check = nm_os_csum_fold(csum);
468                 }
469                 ND(3, "using %u dst_slots", dst_slots);
470
471                 /* A second pass on the destination slots to set the slot flags,
472                  * using the right number of destination slots.
473                  */
474                 while (j_start != j_cur) {
475                         dst_slot = &dst_ring->slot[j_start];
476                         dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG;
477                         j_start = nm_next(j_start, lim);
478                 }
479                 /* Clear NS_MOREFRAG flag on last entry. */
480                 dst_slot->flags = (dst_slots << 8);
481         }
482
483         /* Update howmany and j. This is to commit the use of
484          * those slots in the destination ring. */
485         if (unlikely(dst_slots > *howmany)) {
486                 D("Slot allocation error: This is a bug");
487         }
488         *j = j_cur;
489         *howmany -= dst_slots;
490 }