2 * Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #if defined(__FreeBSD__)
29 #include <sys/cdefs.h> /* prerequisite */
31 #include <sys/types.h>
32 #include <sys/errno.h>
33 #include <sys/param.h> /* defines used in kernel.h */
34 #include <sys/malloc.h> /* types used in module initialization */
35 #include <sys/kernel.h> /* types used in module initialization */
36 #include <sys/sockio.h>
37 #include <sys/socketvar.h> /* struct socket */
38 #include <sys/socket.h> /* sockaddrs */
40 #include <net/if_var.h>
41 #include <machine/bus.h> /* bus_dmamap_* */
42 #include <sys/endian.h>
48 #elif defined(__APPLE__)
50 #warning OSX support is only partial
55 #error Unsupported platform
57 #endif /* unsupported */
59 #include <net/netmap.h>
60 #include <dev/netmap/netmap_kern.h>
64 /* This routine is called by bdg_mismatch_datapath() when it finishes
65 * accumulating bytes for a segment, in order to fix some fields in the
66 * segment headers (which still contain the same content as the header
67 * of the original GSO packet). 'buf' points to the beginning (e.g.
68 * the ethernet header) of the segment, and 'len' is its length.
70 static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
71 u_int segmented_bytes, u_int last_segment,
72 u_int tcp, u_int iphlen)
74 struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
75 struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
76 uint16_t *check = NULL;
77 uint8_t *check_data = NULL;
80 /* Set the IPv4 "Total Length" field. */
81 iph->tot_len = htobe16(len-14);
82 ND("ip total length %u", be16toh(ip->tot_len));
84 /* Set the IPv4 "Identification" field. */
85 iph->id = htobe16(be16toh(iph->id) + idx);
86 ND("ip identification %u", be16toh(iph->id));
88 /* Compute and insert the IPv4 header checksum. */
90 iph->check = nm_csum_ipv4(iph);
91 ND("IP csum %x", be16toh(iph->check));
92 } else {/* if (iphlen == 40) */
93 /* Set the IPv6 "Payload Len" field. */
94 ip6h->payload_len = htobe16(len-14-iphlen);
98 struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
100 /* Set the TCP sequence number. */
101 tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
102 ND("tcp seq %u", be32toh(tcph->seq));
104 /* Zero the PSH and FIN TCP flags if this is not the last
107 tcph->flags &= ~(0x8 | 0x1);
108 ND("last_segment %u", last_segment);
110 check = &tcph->check;
111 check_data = (uint8_t *)tcph;
113 struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
115 /* Set the UDP 'Length' field. */
116 udph->len = htobe16(len-14-iphlen);
118 check = &udph->check;
119 check_data = (uint8_t *)udph;
122 /* Compute and insert TCP/UDP checksum. */
125 nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
127 nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
129 ND("TCP/UDP csum %x", be16toh(*check));
133 /* The VALE mismatch datapath implementation. */
134 void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
135 struct netmap_vp_adapter *dst_na,
136 struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
137 u_int *j, u_int lim, u_int *howmany)
139 struct netmap_slot *slot = NULL;
140 struct nm_vnet_hdr *vh = NULL;
141 /* Number of source slots to process. */
142 u_int frags = ft_p->ft_frags;
143 struct nm_bdg_fwd *ft_end = ft_p + frags;
145 /* Source and destination pointers. */
147 size_t src_len, dst_len;
152 /* If the source port uses the offloadings, while destination doesn't,
153 * we grab the source virtio-net header and do the offloadings here.
155 if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
156 vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
159 /* Init source and dest pointers. */
161 src_len = ft_p->ft_len;
162 slot = &ring->slot[*j];
163 dst = NMB(&dst_na->up, slot);
166 /* We are processing the first input slot and there is a mismatch
167 * between source and destination virt_hdr_len (SHL and DHL).
168 * When the a client is using virtio-net headers, the header length
170 * - 10: the header corresponds to the struct nm_vnet_hdr
171 * - 12: the first 10 bytes correspond to the struct
172 * virtio_net_hdr, and the last 2 bytes store the
173 * "mergeable buffers" info, which is an optional
174 * hint that can be zeroed for compatibility
176 * The destination header is therefore built according to the
179 * SHL | DHL | destination header
180 * -----------------------------
183 * 10 | 0 | doesn't exist
184 * 10 | 12 | first 10 bytes are copied from source header, last 2 are zero
185 * 12 | 0 | doesn't exist
186 * 12 | 10 | copied from the first 10 bytes of source header
188 bzero(dst, dst_na->virt_hdr_len);
189 if (na->virt_hdr_len && dst_na->virt_hdr_len)
190 memcpy(dst, src, sizeof(struct nm_vnet_hdr));
191 /* Skip the virtio-net headers. */
192 src += na->virt_hdr_len;
193 src_len -= na->virt_hdr_len;
194 dst += dst_na->virt_hdr_len;
195 dst_len = dst_na->virt_hdr_len + src_len;
197 /* Here it could be dst_len == 0 (which implies src_len == 0),
198 * so we avoid passing a zero length fragment.
203 src_len = ft_p->ft_len;
207 if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
209 /* Length of the GSO packet header. */
210 u_int gso_hdr_len = 0;
211 /* Pointer to the GSO packet header. Assume it is in a single fragment. */
212 uint8_t *gso_hdr = NULL;
213 /* Index of the current segment. */
215 /* Payload data bytes segmented so far (e.g. TCP data bytes). */
216 u_int segmented_bytes = 0;
217 /* Length of the IP header (20 if IPv4, 40 if IPv6). */
219 /* Is this a TCP or an UDP GSO packet? */
220 u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
221 == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
223 /* Segment the GSO packet contained into the input slots (frags). */
224 while (ft_p != ft_end) {
227 /* Grab the GSO header if we don't have it. */
233 /* Look at the 'Ethertype' field to see if this packet
236 ethertype = be16toh(*((uint16_t *)(gso_hdr + 12)));
237 if (ethertype == 0x0800)
239 else /* if (ethertype == 0x86DD) */
241 ND(3, "type=%04x", ethertype);
243 /* Compute gso_hdr_len. For TCP we need to read the
244 * content of the 'Data Offset' field.
247 struct nm_tcphdr *tcph =
248 (struct nm_tcphdr *)&gso_hdr[14+iphlen];
250 gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
252 gso_hdr_len = 14 + iphlen + 8; /* UDP */
254 ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
257 /* Advance source pointers. */
259 src_len -= gso_hdr_len;
265 src_len = ft_p->ft_len;
270 /* Fill in the header of the current segment. */
271 if (gso_bytes == 0) {
272 memcpy(dst, gso_hdr, gso_hdr_len);
273 gso_bytes = gso_hdr_len;
276 /* Fill in data and update source and dest pointers. */
278 if (gso_bytes + copy > dst_na->mfs)
279 copy = dst_na->mfs - gso_bytes;
280 memcpy(dst + gso_bytes, src, copy);
285 /* A segment is complete or we have processed all the
286 the GSO payload bytes. */
287 if (gso_bytes >= dst_na->mfs ||
288 (src_len == 0 && ft_p + 1 == ft_end)) {
289 /* After raw segmentation, we must fix some header
290 * fields and compute checksums, in a protocol dependent
292 gso_fix_segment(dst, gso_bytes, gso_idx,
294 src_len == 0 && ft_p + 1 == ft_end,
297 ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
298 slot->len = gso_bytes;
300 segmented_bytes += gso_bytes - gso_hdr_len;
304 /* Next destination slot. */
305 *j = nm_next(*j, lim);
306 slot = &ring->slot[*j];
307 dst = NMB(&dst_na->up, slot);
313 /* Next input slot. */
319 src_len = ft_p->ft_len;
322 ND(3, "%d bytes segmented", segmented_bytes);
325 /* Address of a checksum field into a destination slot. */
326 uint16_t *check = NULL;
327 /* Accumulator for an unfolded checksum. */
330 /* Process a non-GSO packet. */
332 /* Init 'check' if necessary. */
333 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
334 if (unlikely(vh->csum_offset + vh->csum_start > src_len))
335 D("invalid checksum request");
337 check = (uint16_t *)(dst + vh->csum_start +
341 while (ft_p != ft_end) {
342 /* Init/update the packet checksum if needed. */
343 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
345 csum = nm_csum_raw(src + vh->csum_start,
346 src_len - vh->csum_start, 0);
348 csum = nm_csum_raw(src, src_len, csum);
351 /* Round to a multiple of 64 */
352 src_len = (src_len + 63) & ~63;
354 if (ft_p->ft_flags & NS_INDIRECT) {
355 if (copyin(src, dst, src_len)) {
356 /* Invalid user pointer, pretend len is 0. */
360 memcpy(dst, src, (int)src_len);
366 /* Next destination slot. */
367 *j = nm_next(*j, lim);
368 slot = &ring->slot[*j];
369 dst = NMB(&dst_na->up, slot);
371 /* Next source slot. */
374 dst_len = src_len = ft_p->ft_len;
378 /* Finalize (fold) the checksum if needed. */
379 if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
380 *check = nm_csum_fold(csum);
382 ND(3, "using %u dst_slots", dst_slots);
384 /* A second pass on the desitations slots to set the slot flags,
385 * using the right number of destination slots.
387 while (j_start != *j) {
388 slot = &ring->slot[j_start];
389 slot->flags = (dst_slots << 8)| NS_MOREFRAG;
390 j_start = nm_next(j_start, lim);
392 /* Clear NS_MOREFRAG flag on last entry. */
393 slot->flags = (dst_slots << 8);
396 /* Update howmany. */
397 if (unlikely(dst_slots > *howmany)) {
398 dst_slots = *howmany;
399 D("Slot allocation error: Should never happen");
401 *howmany -= dst_slots;