]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/netmap/netmap_offloadings.c
zfs: merge openzfs/zfs@2e2a46e0a
[FreeBSD/FreeBSD.git] / sys / dev / netmap / netmap_offloadings.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (C) 2014-2015 Vincenzo Maffione
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *   2. Redistributions in binary form must reproduce the above copyright
13  *      notice, this list of conditions and the following disclaimer in the
14  *      documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29
30 #if defined(__FreeBSD__)
31 #include <sys/cdefs.h> /* prerequisite */
32
33 #include <sys/types.h>
34 #include <sys/errno.h>
35 #include <sys/param.h>  /* defines used in kernel.h */
36 #include <sys/kernel.h> /* types used in module initialization */
37 #include <sys/sockio.h>
38 #include <sys/malloc.h>
39 #include <sys/socketvar.h>      /* struct socket */
40 #include <sys/socket.h> /* sockaddrs */
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <machine/bus.h>        /* bus_dmamap_* */
44 #include <sys/endian.h>
45
46 #elif defined(linux)
47
48 #include "bsd_glue.h"
49
50 #elif defined(__APPLE__)
51
52 #warning OSX support is only partial
53 #include "osx_glue.h"
54
55 #else
56
57 #error  Unsupported platform
58
59 #endif /* unsupported */
60
61 #include <net/netmap.h>
62 #include <dev/netmap/netmap_kern.h>
63
64
65
66 /* This routine is called by bdg_mismatch_datapath() when it finishes
67  * accumulating bytes for a segment, in order to fix some fields in the
68  * segment headers (which still contain the same content as the header
69  * of the original GSO packet). 'pkt' points to the beginning of the IP
70  * header of the segment, while 'len' is the length of the IP packet.
71  */
72 static void
73 gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
74                 u_int idx, u_int segmented_bytes, u_int last_segment)
75 {
76         struct nm_iphdr *iph = (struct nm_iphdr *)(pkt);
77         struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt);
78         uint16_t *check = NULL;
79         uint8_t *check_data = NULL;
80
81         if (ipv4) {
82                 /* Set the IPv4 "Total Length" field. */
83                 iph->tot_len = htobe16(len);
84                 nm_prdis("ip total length %u", be16toh(ip->tot_len));
85
86                 /* Set the IPv4 "Identification" field. */
87                 iph->id = htobe16(be16toh(iph->id) + idx);
88                 nm_prdis("ip identification %u", be16toh(iph->id));
89
90                 /* Compute and insert the IPv4 header checksum. */
91                 iph->check = 0;
92                 iph->check = nm_os_csum_ipv4(iph);
93                 nm_prdis("IP csum %x", be16toh(iph->check));
94         } else {
95                 /* Set the IPv6 "Payload Len" field. */
96                 ip6h->payload_len = htobe16(len-iphlen);
97         }
98
99         if (tcp) {
100                 struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen);
101
102                 /* Set the TCP sequence number. */
103                 tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
104                 nm_prdis("tcp seq %u", be32toh(tcph->seq));
105
106                 /* Zero the PSH and FIN TCP flags if this is not the last
107                    segment. */
108                 if (!last_segment)
109                         tcph->flags &= ~(0x8 | 0x1);
110                 nm_prdis("last_segment %u", last_segment);
111
112                 check = &tcph->check;
113                 check_data = (uint8_t *)tcph;
114         } else { /* UDP */
115                 struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen);
116
117                 /* Set the UDP 'Length' field. */
118                 udph->len = htobe16(len-iphlen);
119
120                 check = &udph->check;
121                 check_data = (uint8_t *)udph;
122         }
123
124         /* Compute and insert TCP/UDP checksum. */
125         *check = 0;
126         if (ipv4)
127                 nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check);
128         else
129                 nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check);
130
131         nm_prdis("TCP/UDP csum %x", be16toh(*check));
132 }
133
134 static inline int
135 vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
136 {
137         uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
138
139         return (
140                 (gso_type != VIRTIO_NET_HDR_GSO_NONE &&
141                  gso_type != VIRTIO_NET_HDR_GSO_TCPV4 &&
142                  gso_type != VIRTIO_NET_HDR_GSO_UDP &&
143                  gso_type != VIRTIO_NET_HDR_GSO_TCPV6)
144                 ||
145                  (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM
146                                | VIRTIO_NET_HDR_F_DATA_VALID))
147                );
148 }
149
150 /* The VALE mismatch datapath implementation. */
151 void
152 bdg_mismatch_datapath(struct netmap_vp_adapter *na,
153                       struct netmap_vp_adapter *dst_na,
154                       const struct nm_bdg_fwd *ft_p,
155                       struct netmap_ring *dst_ring,
156                       u_int *j, u_int lim, u_int *howmany)
157 {
158         struct netmap_slot *dst_slot = NULL;
159         struct nm_vnet_hdr *vh = NULL;
160         const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags;
161
162         /* Source and destination pointers. */
163         uint8_t *dst, *src;
164         size_t src_len, dst_len;
165
166         /* Indices and counters for the destination ring. */
167         u_int j_start = *j;
168         u_int j_cur = j_start;
169         u_int dst_slots = 0;
170
171         if (unlikely(ft_p == ft_end)) {
172                 nm_prlim(1, "No source slots to process");
173                 return;
174         }
175
176         /* Init source and dest pointers. */
177         src = ft_p->ft_buf;
178         src_len = ft_p->ft_len;
179         dst_slot = &dst_ring->slot[j_cur];
180         dst = NMB(&dst_na->up, dst_slot);
181         dst_len = src_len;
182
183         /* If the source port uses the offloadings, while destination doesn't,
184          * we grab the source virtio-net header and do the offloadings here.
185          */
186         if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) {
187                 vh = (struct nm_vnet_hdr *)src;
188                 /* Initial sanity check on the source virtio-net header. If
189                  * something seems wrong, just drop the packet. */
190                 if (src_len < na->up.virt_hdr_len) {
191                         nm_prlim(1, "Short src vnet header, dropping");
192                         return;
193                 }
194                 if (unlikely(vnet_hdr_is_bad(vh))) {
195                         nm_prlim(1, "Bad src vnet header, dropping");
196                         return;
197                 }
198         }
199
200         /* We are processing the first input slot and there is a mismatch
201          * between source and destination virt_hdr_len (SHL and DHL).
202          * When the a client is using virtio-net headers, the header length
203          * can be:
204          *    - 10: the header corresponds to the struct nm_vnet_hdr
205          *    - 12: the first 10 bytes correspond to the struct
206          *          virtio_net_hdr, and the last 2 bytes store the
207          *          "mergeable buffers" info, which is an optional
208          *          hint that can be zeroed for compatibility
209          *
210          * The destination header is therefore built according to the
211          * following table:
212          *
213          * SHL | DHL | destination header
214          * -----------------------------
215          *   0 |  10 | zero
216          *   0 |  12 | zero
217          *  10 |   0 | doesn't exist
218          *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
219          *  12 |   0 | doesn't exist
220          *  12 |  10 | copied from the first 10 bytes of source header
221          */
222         bzero(dst, dst_na->up.virt_hdr_len);
223         if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len)
224                 memcpy(dst, src, sizeof(struct nm_vnet_hdr));
225         /* Skip the virtio-net headers. */
226         src += na->up.virt_hdr_len;
227         src_len -= na->up.virt_hdr_len;
228         dst += dst_na->up.virt_hdr_len;
229         dst_len = dst_na->up.virt_hdr_len + src_len;
230
231         /* Here it could be dst_len == 0 (which implies src_len == 0),
232          * so we avoid passing a zero length fragment.
233          */
234         if (dst_len == 0) {
235                 ft_p++;
236                 src = ft_p->ft_buf;
237                 src_len = ft_p->ft_len;
238                 dst_len = src_len;
239         }
240
241         if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
242                 u_int gso_bytes = 0;
243                 /* Length of the GSO packet header. */
244                 u_int gso_hdr_len = 0;
245                 /* Pointer to the GSO packet header. Assume it is in a single fragment. */
246                 uint8_t *gso_hdr = NULL;
247                 /* Index of the current segment. */
248                 u_int gso_idx = 0;
249                 /* Payload data bytes segmented so far (e.g. TCP data bytes). */
250                 u_int segmented_bytes = 0;
251                 /* Is this an IPv4 or IPv6 GSO packet? */
252                 u_int ipv4 = 0;
253                 /* Length of the IP header (20 if IPv4, 40 if IPv6). */
254                 u_int iphlen = 0;
255                 /* Length of the Ethernet header (18 if 802.1q, otherwise 14). */
256                 u_int ethhlen = 14;
257                 /* Is this a TCP or an UDP GSO packet? */
258                 u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
259                                 == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
260
261                 /* Segment the GSO packet contained into the input slots (frags). */
262                 for (;;) {
263                         size_t copy;
264
265                         if (dst_slots >= *howmany) {
266                                 /* We still have work to do, but we've run out of
267                                  * dst slots, so we have to drop the packet. */
268                                 nm_prdis(1, "Not enough slots, dropping GSO packet");
269                                 return;
270                         }
271
272                         /* Grab the GSO header if we don't have it. */
273                         if (!gso_hdr) {
274                                 uint16_t ethertype;
275
276                                 gso_hdr = src;
277
278                                 /* Look at the 'Ethertype' field to see if this packet
279                                  * is IPv4 or IPv6, taking into account VLAN
280                                  * encapsulation. */
281                                 for (;;) {
282                                         if (src_len < ethhlen) {
283                                                 nm_prlim(1, "Short GSO fragment [eth], dropping");
284                                                 return;
285                                         }
286                                         ethertype = be16toh(*((uint16_t *)
287                                                             (gso_hdr + ethhlen - 2)));
288                                         if (ethertype != 0x8100) /* not 802.1q */
289                                                 break;
290                                         ethhlen += 4;
291                                 }
292                                 switch (ethertype) {
293                                         case 0x0800:  /* IPv4 */
294                                         {
295                                                 struct nm_iphdr *iph = (struct nm_iphdr *)
296                                                                         (gso_hdr + ethhlen);
297
298                                                 if (src_len < ethhlen + 20) {
299                                                         nm_prlim(1, "Short GSO fragment "
300                                                               "[IPv4], dropping");
301                                                         return;
302                                                 }
303                                                 ipv4 = 1;
304                                                 iphlen = 4 * (iph->version_ihl & 0x0F);
305                                                 break;
306                                         }
307                                         case 0x86DD:  /* IPv6 */
308                                                 ipv4 = 0;
309                                                 iphlen = 40;
310                                                 break;
311                                         default:
312                                                 nm_prlim(1, "Unsupported ethertype, "
313                                                       "dropping GSO packet");
314                                                 return;
315                                 }
316                                 nm_prdis(3, "type=%04x", ethertype);
317
318                                 if (src_len < ethhlen + iphlen) {
319                                         nm_prlim(1, "Short GSO fragment [IP], dropping");
320                                         return;
321                                 }
322
323                                 /* Compute gso_hdr_len. For TCP we need to read the
324                                  * content of the 'Data Offset' field.
325                                  */
326                                 if (tcp) {
327                                         struct nm_tcphdr *tcph = (struct nm_tcphdr *)
328                                                                 (gso_hdr + ethhlen + iphlen);
329
330                                         if (src_len < ethhlen + iphlen + 20) {
331                                                 nm_prlim(1, "Short GSO fragment "
332                                                                 "[TCP], dropping");
333                                                 return;
334                                         }
335                                         gso_hdr_len = ethhlen + iphlen +
336                                                       4 * (tcph->doff >> 4);
337                                 } else {
338                                         gso_hdr_len = ethhlen + iphlen + 8; /* UDP */
339                                 }
340
341                                 if (src_len < gso_hdr_len) {
342                                         nm_prlim(1, "Short GSO fragment [TCP/UDP], dropping");
343                                         return;
344                                 }
345
346                                 nm_prdis(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
347                                                                    dst_na->mfs);
348
349                                 /* Advance source pointers. */
350                                 src += gso_hdr_len;
351                                 src_len -= gso_hdr_len;
352                                 if (src_len == 0) {
353                                         ft_p++;
354                                         if (ft_p == ft_end)
355                                                 break;
356                                         src = ft_p->ft_buf;
357                                         src_len = ft_p->ft_len;
358                                 }
359                         }
360
361                         /* Fill in the header of the current segment. */
362                         if (gso_bytes == 0) {
363                                 memcpy(dst, gso_hdr, gso_hdr_len);
364                                 gso_bytes = gso_hdr_len;
365                         }
366
367                         /* Fill in data and update source and dest pointers. */
368                         copy = src_len;
369                         if (gso_bytes + copy > dst_na->mfs)
370                                 copy = dst_na->mfs - gso_bytes;
371                         memcpy(dst + gso_bytes, src, copy);
372                         gso_bytes += copy;
373                         src += copy;
374                         src_len -= copy;
375
376                         /* A segment is complete or we have processed all the
377                            the GSO payload bytes. */
378                         if (gso_bytes >= dst_na->mfs ||
379                                 (src_len == 0 && ft_p + 1 == ft_end)) {
380                                 /* After raw segmentation, we must fix some header
381                                  * fields and compute checksums, in a protocol dependent
382                                  * way. */
383                                 gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen,
384                                                 ipv4, iphlen, tcp,
385                                                 gso_idx, segmented_bytes,
386                                                 src_len == 0 && ft_p + 1 == ft_end);
387
388                                 nm_prdis("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
389                                 dst_slot->len = gso_bytes;
390                                 dst_slot->flags = 0;
391                                 dst_slots++;
392                                 segmented_bytes += gso_bytes - gso_hdr_len;
393
394                                 gso_bytes = 0;
395                                 gso_idx++;
396
397                                 /* Next destination slot. */
398                                 j_cur = nm_next(j_cur, lim);
399                                 dst_slot = &dst_ring->slot[j_cur];
400                                 dst = NMB(&dst_na->up, dst_slot);
401                         }
402
403                         /* Next input slot. */
404                         if (src_len == 0) {
405                                 ft_p++;
406                                 if (ft_p == ft_end)
407                                         break;
408                                 src = ft_p->ft_buf;
409                                 src_len = ft_p->ft_len;
410                         }
411                 }
412                 nm_prdis(3, "%d bytes segmented", segmented_bytes);
413
414         } else {
415                 /* Address of a checksum field into a destination slot. */
416                 uint16_t *check = NULL;
417                 /* Accumulator for an unfolded checksum. */
418                 rawsum_t csum = 0;
419
420                 /* Process a non-GSO packet. */
421
422                 /* Init 'check' if necessary. */
423                 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
424                         if (unlikely(vh->csum_offset + vh->csum_start > src_len))
425                                 nm_prerr("invalid checksum request");
426                         else
427                                 check = (uint16_t *)(dst + vh->csum_start +
428                                                 vh->csum_offset);
429                 }
430
431                 while (ft_p != ft_end) {
432                         /* Init/update the packet checksum if needed. */
433                         if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
434                                 if (!dst_slots)
435                                         csum = nm_os_csum_raw(src + vh->csum_start,
436                                                                 src_len - vh->csum_start, 0);
437                                 else
438                                         csum = nm_os_csum_raw(src, src_len, csum);
439                         }
440
441                         /* Round to a multiple of 64 */
442                         src_len = (src_len + 63) & ~63;
443
444                         if (ft_p->ft_flags & NS_INDIRECT) {
445                                 if (copyin(src, dst, src_len)) {
446                                         /* Invalid user pointer, pretend len is 0. */
447                                         dst_len = 0;
448                                 }
449                         } else {
450                                 memcpy(dst, src, (int)src_len);
451                         }
452                         dst_slot->len = dst_len;
453                         dst_slots++;
454
455                         /* Next destination slot. */
456                         j_cur = nm_next(j_cur, lim);
457                         dst_slot = &dst_ring->slot[j_cur];
458                         dst = NMB(&dst_na->up, dst_slot);
459
460                         /* Next source slot. */
461                         ft_p++;
462                         src = ft_p->ft_buf;
463                         dst_len = src_len = ft_p->ft_len;
464                 }
465
466                 /* Finalize (fold) the checksum if needed. */
467                 if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
468                         *check = nm_os_csum_fold(csum);
469                 }
470                 nm_prdis(3, "using %u dst_slots", dst_slots);
471
472                 /* A second pass on the destination slots to set the slot flags,
473                  * using the right number of destination slots.
474                  */
475                 while (j_start != j_cur) {
476                         dst_slot = &dst_ring->slot[j_start];
477                         dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG;
478                         j_start = nm_next(j_start, lim);
479                 }
480                 /* Clear NS_MOREFRAG flag on last entry. */
481                 dst_slot->flags = (dst_slots << 8);
482         }
483
484         /* Update howmany and j. This is to commit the use of
485          * those slots in the destination ring. */
486         if (unlikely(dst_slots > *howmany)) {
487                 nm_prerr("bug: slot allocation error");
488         }
489         *j = j_cur;
490         *howmany -= dst_slots;
491 }