2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (C) 2014-2015 Vincenzo Maffione
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #if defined(__FreeBSD__)
32 #include <sys/cdefs.h> /* prerequisite */
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/param.h> /* defines used in kernel.h */
37 #include <sys/kernel.h> /* types used in module initialization */
38 #include <sys/sockio.h>
39 #include <sys/malloc.h>
40 #include <sys/socketvar.h> /* struct socket */
41 #include <sys/socket.h> /* sockaddrs */
43 #include <net/if_var.h>
44 #include <machine/bus.h> /* bus_dmamap_* */
45 #include <sys/endian.h>
51 #elif defined(__APPLE__)
53 #warning OSX support is only partial
58 #error Unsupported platform
60 #endif /* unsupported */
62 #include <net/netmap.h>
63 #include <dev/netmap/netmap_kern.h>
67 /* This routine is called by bdg_mismatch_datapath() when it finishes
68 * accumulating bytes for a segment, in order to fix some fields in the
69 * segment headers (which still contain the same content as the header
70 * of the original GSO packet). 'pkt' points to the beginning of the IP
71 * header of the segment, while 'len' is the length of the IP packet.
74 gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
75 u_int idx, u_int segmented_bytes, u_int last_segment)
77 struct nm_iphdr *iph = (struct nm_iphdr *)(pkt);
78 struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt);
79 uint16_t *check = NULL;
80 uint8_t *check_data = NULL;
83 /* Set the IPv4 "Total Length" field. */
84 iph->tot_len = htobe16(len);
85 ND("ip total length %u", be16toh(ip->tot_len));
87 /* Set the IPv4 "Identification" field. */
88 iph->id = htobe16(be16toh(iph->id) + idx);
89 ND("ip identification %u", be16toh(iph->id));
91 /* Compute and insert the IPv4 header checksum. */
93 iph->check = nm_os_csum_ipv4(iph);
94 ND("IP csum %x", be16toh(iph->check));
96 /* Set the IPv6 "Payload Len" field. */
97 ip6h->payload_len = htobe16(len-iphlen);
101 struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen);
103 /* Set the TCP sequence number. */
104 tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
105 ND("tcp seq %u", be32toh(tcph->seq));
107 /* Zero the PSH and FIN TCP flags if this is not the last
110 tcph->flags &= ~(0x8 | 0x1);
111 ND("last_segment %u", last_segment);
113 check = &tcph->check;
114 check_data = (uint8_t *)tcph;
116 struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen);
118 /* Set the UDP 'Length' field. */
119 udph->len = htobe16(len-iphlen);
121 check = &udph->check;
122 check_data = (uint8_t *)udph;
125 /* Compute and insert TCP/UDP checksum. */
128 nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check);
130 nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check);
132 ND("TCP/UDP csum %x", be16toh(*check));
136 vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
138 uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
141 (gso_type != VIRTIO_NET_HDR_GSO_NONE &&
142 gso_type != VIRTIO_NET_HDR_GSO_TCPV4 &&
143 gso_type != VIRTIO_NET_HDR_GSO_UDP &&
144 gso_type != VIRTIO_NET_HDR_GSO_TCPV6)
146 (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM
147 | VIRTIO_NET_HDR_F_DATA_VALID))
151 /* The VALE mismatch datapath implementation. */
153 bdg_mismatch_datapath(struct netmap_vp_adapter *na,
154 struct netmap_vp_adapter *dst_na,
155 const struct nm_bdg_fwd *ft_p,
156 struct netmap_ring *dst_ring,
157 u_int *j, u_int lim, u_int *howmany)
159 struct netmap_slot *dst_slot = NULL;
160 struct nm_vnet_hdr *vh = NULL;
161 const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags;
163 /* Source and destination pointers. */
165 size_t src_len, dst_len;
167 /* Indices and counters for the destination ring. */
169 u_int j_cur = j_start;
172 if (unlikely(ft_p == ft_end)) {
173 RD(1, "No source slots to process");
177 /* Init source and dest pointers. */
179 src_len = ft_p->ft_len;
180 dst_slot = &dst_ring->slot[j_cur];
181 dst = NMB(&dst_na->up, dst_slot);
184 /* If the source port uses the offloadings, while destination doesn't,
185 * we grab the source virtio-net header and do the offloadings here.
187 if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) {
188 vh = (struct nm_vnet_hdr *)src;
189 /* Initial sanity check on the source virtio-net header. If
190 * something seems wrong, just drop the packet. */
191 if (src_len < na->up.virt_hdr_len) {
192 RD(1, "Short src vnet header, dropping");
195 if (unlikely(vnet_hdr_is_bad(vh))) {
196 RD(1, "Bad src vnet header, dropping");
201 /* We are processing the first input slot and there is a mismatch
202 * between source and destination virt_hdr_len (SHL and DHL).
203 * When the a client is using virtio-net headers, the header length
205 * - 10: the header corresponds to the struct nm_vnet_hdr
206 * - 12: the first 10 bytes correspond to the struct
207 * virtio_net_hdr, and the last 2 bytes store the
208 * "mergeable buffers" info, which is an optional
209 * hint that can be zeroed for compatibility
211 * The destination header is therefore built according to the
214 * SHL | DHL | destination header
215 * -----------------------------
218 * 10 | 0 | doesn't exist
219 * 10 | 12 | first 10 bytes are copied from source header, last 2 are zero
220 * 12 | 0 | doesn't exist
221 * 12 | 10 | copied from the first 10 bytes of source header
223 bzero(dst, dst_na->up.virt_hdr_len);
224 if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len)
225 memcpy(dst, src, sizeof(struct nm_vnet_hdr));
226 /* Skip the virtio-net headers. */
227 src += na->up.virt_hdr_len;
228 src_len -= na->up.virt_hdr_len;
229 dst += dst_na->up.virt_hdr_len;
230 dst_len = dst_na->up.virt_hdr_len + src_len;
232 /* Here it could be dst_len == 0 (which implies src_len == 0),
233 * so we avoid passing a zero length fragment.
238 src_len = ft_p->ft_len;
242 if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
244 /* Length of the GSO packet header. */
245 u_int gso_hdr_len = 0;
246 /* Pointer to the GSO packet header. Assume it is in a single fragment. */
247 uint8_t *gso_hdr = NULL;
248 /* Index of the current segment. */
250 /* Payload data bytes segmented so far (e.g. TCP data bytes). */
251 u_int segmented_bytes = 0;
252 /* Is this an IPv4 or IPv6 GSO packet? */
254 /* Length of the IP header (20 if IPv4, 40 if IPv6). */
256 /* Length of the Ethernet header (18 if 802.1q, otherwise 14). */
258 /* Is this a TCP or an UDP GSO packet? */
259 u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
260 == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
262 /* Segment the GSO packet contained into the input slots (frags). */
266 if (dst_slots >= *howmany) {
267 /* We still have work to do, but we've run out of
268 * dst slots, so we have to drop the packet. */
269 ND(1, "Not enough slots, dropping GSO packet");
273 /* Grab the GSO header if we don't have it. */
279 /* Look at the 'Ethertype' field to see if this packet
280 * is IPv4 or IPv6, taking into account VLAN
283 if (src_len < ethhlen) {
284 RD(1, "Short GSO fragment [eth], dropping");
287 ethertype = be16toh(*((uint16_t *)
288 (gso_hdr + ethhlen - 2)));
289 if (ethertype != 0x8100) /* not 802.1q */
294 case 0x0800: /* IPv4 */
296 struct nm_iphdr *iph = (struct nm_iphdr *)
299 if (src_len < ethhlen + 20) {
300 RD(1, "Short GSO fragment "
305 iphlen = 4 * (iph->version_ihl & 0x0F);
308 case 0x86DD: /* IPv6 */
313 RD(1, "Unsupported ethertype, "
314 "dropping GSO packet");
317 ND(3, "type=%04x", ethertype);
319 if (src_len < ethhlen + iphlen) {
320 RD(1, "Short GSO fragment [IP], dropping");
324 /* Compute gso_hdr_len. For TCP we need to read the
325 * content of the 'Data Offset' field.
328 struct nm_tcphdr *tcph = (struct nm_tcphdr *)
329 (gso_hdr + ethhlen + iphlen);
331 if (src_len < ethhlen + iphlen + 20) {
332 RD(1, "Short GSO fragment "
336 gso_hdr_len = ethhlen + iphlen +
337 4 * (tcph->doff >> 4);
339 gso_hdr_len = ethhlen + iphlen + 8; /* UDP */
342 if (src_len < gso_hdr_len) {
343 RD(1, "Short GSO fragment [TCP/UDP], dropping");
347 ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
350 /* Advance source pointers. */
352 src_len -= gso_hdr_len;
358 src_len = ft_p->ft_len;
362 /* Fill in the header of the current segment. */
363 if (gso_bytes == 0) {
364 memcpy(dst, gso_hdr, gso_hdr_len);
365 gso_bytes = gso_hdr_len;
368 /* Fill in data and update source and dest pointers. */
370 if (gso_bytes + copy > dst_na->mfs)
371 copy = dst_na->mfs - gso_bytes;
372 memcpy(dst + gso_bytes, src, copy);
377 /* A segment is complete or we have processed all the
378 the GSO payload bytes. */
379 if (gso_bytes >= dst_na->mfs ||
380 (src_len == 0 && ft_p + 1 == ft_end)) {
381 /* After raw segmentation, we must fix some header
382 * fields and compute checksums, in a protocol dependent
384 gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen,
386 gso_idx, segmented_bytes,
387 src_len == 0 && ft_p + 1 == ft_end);
389 ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
390 dst_slot->len = gso_bytes;
393 segmented_bytes += gso_bytes - gso_hdr_len;
398 /* Next destination slot. */
399 j_cur = nm_next(j_cur, lim);
400 dst_slot = &dst_ring->slot[j_cur];
401 dst = NMB(&dst_na->up, dst_slot);
404 /* Next input slot. */
410 src_len = ft_p->ft_len;
413 ND(3, "%d bytes segmented", segmented_bytes);
416 /* Address of a checksum field into a destination slot. */
417 uint16_t *check = NULL;
418 /* Accumulator for an unfolded checksum. */
421 /* Process a non-GSO packet. */
423 /* Init 'check' if necessary. */
424 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
425 if (unlikely(vh->csum_offset + vh->csum_start > src_len))
426 D("invalid checksum request");
428 check = (uint16_t *)(dst + vh->csum_start +
432 while (ft_p != ft_end) {
433 /* Init/update the packet checksum if needed. */
434 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
436 csum = nm_os_csum_raw(src + vh->csum_start,
437 src_len - vh->csum_start, 0);
439 csum = nm_os_csum_raw(src, src_len, csum);
442 /* Round to a multiple of 64 */
443 src_len = (src_len + 63) & ~63;
445 if (ft_p->ft_flags & NS_INDIRECT) {
446 if (copyin(src, dst, src_len)) {
447 /* Invalid user pointer, pretend len is 0. */
451 memcpy(dst, src, (int)src_len);
453 dst_slot->len = dst_len;
456 /* Next destination slot. */
457 j_cur = nm_next(j_cur, lim);
458 dst_slot = &dst_ring->slot[j_cur];
459 dst = NMB(&dst_na->up, dst_slot);
461 /* Next source slot. */
464 dst_len = src_len = ft_p->ft_len;
467 /* Finalize (fold) the checksum if needed. */
468 if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
469 *check = nm_os_csum_fold(csum);
471 ND(3, "using %u dst_slots", dst_slots);
473 /* A second pass on the destination slots to set the slot flags,
474 * using the right number of destination slots.
476 while (j_start != j_cur) {
477 dst_slot = &dst_ring->slot[j_start];
478 dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG;
479 j_start = nm_next(j_start, lim);
481 /* Clear NS_MOREFRAG flag on last entry. */
482 dst_slot->flags = (dst_slots << 8);
485 /* Update howmany and j. This is to commit the use of
486 * those slots in the destination ring. */
487 if (unlikely(dst_slots > *howmany)) {
488 D("Slot allocation error: This is a bug");
491 *howmany -= dst_slots;