2 * SPDX-License-Identifier: BSD-3-Clause
4 * This header is BSD licensed so anyone can use the definitions to implement
5 * compatible drivers/servers.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of IBM nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 /* The feature bitmap for virtio net */
37 #define VIRTIO_NET_F_CSUM 0x00001 /* Host handles pkts w/ partial csum */
38 #define VIRTIO_NET_F_GUEST_CSUM 0x00002 /* Guest handles pkts w/ partial csum*/
39 #define VIRTIO_NET_F_MAC 0x00020 /* Host has given MAC address. */
40 #define VIRTIO_NET_F_GSO 0x00040 /* Host handles pkts w/ any GSO type */
41 #define VIRTIO_NET_F_GUEST_TSO4 0x00080 /* Guest can handle TSOv4 in. */
42 #define VIRTIO_NET_F_GUEST_TSO6 0x00100 /* Guest can handle TSOv6 in. */
43 #define VIRTIO_NET_F_GUEST_ECN 0x00200 /* Guest can handle TSO[6] w/ ECN in.*/
44 #define VIRTIO_NET_F_GUEST_UFO 0x00400 /* Guest can handle UFO in. */
45 #define VIRTIO_NET_F_HOST_TSO4 0x00800 /* Host can handle TSOv4 in. */
46 #define VIRTIO_NET_F_HOST_TSO6 0x01000 /* Host can handle TSOv6 in. */
47 #define VIRTIO_NET_F_HOST_ECN 0x02000 /* Host can handle TSO[6] w/ ECN in. */
48 #define VIRTIO_NET_F_HOST_UFO 0x04000 /* Host can handle UFO in. */
49 #define VIRTIO_NET_F_MRG_RXBUF 0x08000 /* Host can merge receive buffers. */
50 #define VIRTIO_NET_F_STATUS 0x10000 /* virtio_net_config.status available*/
51 #define VIRTIO_NET_F_CTRL_VQ 0x20000 /* Control channel available */
52 #define VIRTIO_NET_F_CTRL_RX 0x40000 /* Control channel RX mode support */
53 #define VIRTIO_NET_F_CTRL_VLAN 0x80000 /* Control channel VLAN filtering */
54 #define VIRTIO_NET_F_CTRL_RX_EXTRA 0x100000 /* Extra RX mode control support */
55 #define VIRTIO_NET_F_GUEST_ANNOUNCE 0x200000 /* Announce device on network */
56 #define VIRTIO_NET_F_MQ 0x400000 /* Device supports RFS */
57 #define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000 /* Set MAC address */
59 #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
61 struct virtio_net_config {
62 /* The config defining mac address (if VIRTIO_NET_F_MAC) */
63 uint8_t mac[ETHER_ADDR_LEN];
64 /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
66 /* Maximum number of each of transmit and receive queues;
67 * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ.
68 * Legal values are between 1 and 0x8000.
70 uint16_t max_virtqueue_pairs;
74 * This is the first element of the scatter-gather list. If you don't
75 * specify GSO or CSUM features, you can simply ignore the header.
77 struct virtio_net_hdr {
78 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start,csum_offset*/
79 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
81 #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
82 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
83 #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */
84 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
85 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
87 uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */
88 uint16_t gso_size; /* Bytes to append to hdr_len per frame */
89 uint16_t csum_start; /* Position to start checksumming from */
90 uint16_t csum_offset; /* Offset after that to place checksum */
94 * This is the version of the header to use when the MRG_RXBUF
95 * feature has been negotiated.
97 struct virtio_net_hdr_mrg_rxbuf {
98 struct virtio_net_hdr hdr;
99 uint16_t num_buffers; /* Number of merged rx buffers */
103 * Control virtqueue data structures
105 * The control virtqueue expects a header in the first sg entry
106 * and an ack/status response in the last entry. Data for the
107 * command goes in between.
109 struct virtio_net_ctrl_hdr {
114 #define VIRTIO_NET_OK 0
115 #define VIRTIO_NET_ERR 1
118 * Control the RX mode, ie. promiscuous, allmulti, etc...
119 * All commands require an "out" sg entry containing a 1 byte
120 * state value, zero = disable, non-zero = enable. Commands
121 * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
122 * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
124 #define VIRTIO_NET_CTRL_RX 0
125 #define VIRTIO_NET_CTRL_RX_PROMISC 0
126 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1
127 #define VIRTIO_NET_CTRL_RX_ALLUNI 2
128 #define VIRTIO_NET_CTRL_RX_NOMULTI 3
129 #define VIRTIO_NET_CTRL_RX_NOUNI 4
130 #define VIRTIO_NET_CTRL_RX_NOBCAST 5
133 * Control the MAC filter table.
135 * The MAC filter table is managed by the hypervisor, the guest should
136 * assume the size is infinite. Filtering should be considered
137 * non-perfect, ie. based on hypervisor resources, the guest may
138 * received packets from sources not specified in the filter list.
140 * In addition to the class/cmd header, the TABLE_SET command requires
141 * two out scatterlists. Each contains a 4 byte count of entries followed
142 * by a concatenated byte stream of the ETH_ALEN MAC addresses. The
143 * first sg list contains unicast addresses, the second is for multicast.
144 * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
147 * The ADDR_SET command requests one out scatterlist, it contains a
148 * 6 bytes MAC address. This functionality is present if the
149 * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
151 struct virtio_net_ctrl_mac {
153 uint8_t macs[][ETHER_ADDR_LEN];
156 #define VIRTIO_NET_CTRL_MAC 1
157 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0
158 #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1
161 * Control VLAN filtering
163 * The VLAN filter table is controlled via a simple ADD/DEL interface.
164 * VLAN IDs not added may be filtered by the hypervisor. Del is the
165 * opposite of add. Both commands expect an out entry containing a 2
166 * byte VLAN ID. VLAN filtering is available with the
167 * VIRTIO_NET_F_CTRL_VLAN feature bit.
169 #define VIRTIO_NET_CTRL_VLAN 2
170 #define VIRTIO_NET_CTRL_VLAN_ADD 0
171 #define VIRTIO_NET_CTRL_VLAN_DEL 1
174 * Control link announce acknowledgement
176 * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
177 * driver has recevied the notification; device would clear the
178 * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
181 #define VIRTIO_NET_CTRL_ANNOUNCE 3
182 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
185 * Control Receive Flow Steering
187 * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET enables Receive Flow
188 * Steering, specifying the number of the transmit and receive queues
189 * that will be used. After the command is consumed and acked by the
190 * device, the device will not steer new packets on receive virtqueues
191 * other than specified nor read from transmit virtqueues other than
192 * specified. Accordingly, driver should not transmit new packets on
193 * virtqueues other than specified.
195 struct virtio_net_ctrl_mq {
196 uint16_t virtqueue_pairs;
199 #define VIRTIO_NET_CTRL_MQ 4
200 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0
201 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1
202 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000
205 * Use the checksum offset in the VirtIO header to set the
206 * correct CSUM_* flags.
209 virtio_net_rx_csum_by_offset(struct mbuf *m, uint16_t eth_type, int ip_start,
210 struct virtio_net_hdr *hdr)
212 #if defined(INET) || defined(INET6)
213 int offset = hdr->csum_start + hdr->csum_offset;
216 /* Only do a basic sanity check on the offset. */
220 if (__predict_false(offset < ip_start + sizeof(struct ip)))
226 if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
231 /* Here we should increment the rx_csum_bad_ethtype counter. */
236 * Use the offset to determine the appropriate CSUM_* flags. This is
237 * a bit dirty, but we can get by with it since the checksum offsets
238 * happen to be different. We assume the host host does not do IPv4
239 * header checksum offloading.
241 switch (hdr->csum_offset) {
242 case offsetof(struct udphdr, uh_sum):
243 case offsetof(struct tcphdr, th_sum):
244 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
245 m->m_pkthdr.csum_data = 0xFFFF;
248 /* Here we should increment the rx_csum_bad_offset counter. */
256 virtio_net_rx_csum_by_parse(struct mbuf *m, uint16_t eth_type, int ip_start,
257 struct virtio_net_hdr *hdr)
265 if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
267 ip = (struct ip *)(m->m_data + ip_start);
269 offset = ip_start + (ip->ip_hl << 2);
275 if (__predict_false(m->m_len < ip_start +
276 sizeof(struct ip6_hdr)))
278 offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
279 if (__predict_false(offset < 0))
284 /* Here we should increment the rx_csum_bad_ethtype counter. */
290 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
292 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
293 m->m_pkthdr.csum_data = 0xFFFF;
296 if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
298 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
299 m->m_pkthdr.csum_data = 0xFFFF;
303 * For the remaining protocols, FreeBSD does not support
304 * checksum offloading, so the checksum will be recomputed.
307 if_printf(ifp, "cksum offload of unsupported "
308 "protocol eth_type=%#x proto=%d csum_start=%d "
309 "csum_offset=%d\n", __func__, eth_type, proto,
310 hdr->csum_start, hdr->csum_offset);
319 * Set the appropriate CSUM_* flags. Unfortunately, the information
320 * provided is not directly useful to us. The VirtIO header gives the
321 * offset of the checksum, which is all Linux needs, but this is not
322 * how FreeBSD does things. We are forced to peek inside the packet
325 * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
326 * could accept the offsets and let the stack figure it out.
329 virtio_net_rx_csum(struct mbuf *m, struct virtio_net_hdr *hdr)
331 struct ether_header *eh;
332 struct ether_vlan_header *evh;
336 if ((hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
337 VIRTIO_NET_HDR_F_DATA_VALID)) == 0) {
341 eh = mtod(m, struct ether_header *);
342 eth_type = ntohs(eh->ether_type);
343 if (eth_type == ETHERTYPE_VLAN) {
344 /* BMV: We should handle nested VLAN tags too. */
345 evh = mtod(m, struct ether_vlan_header *);
346 eth_type = ntohs(evh->evl_proto);
347 offset = sizeof(struct ether_vlan_header);
349 offset = sizeof(struct ether_header);
351 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
352 error = virtio_net_rx_csum_by_offset(m, eth_type, offset, hdr);
354 error = virtio_net_rx_csum_by_parse(m, eth_type, offset, hdr);
360 virtio_net_tx_offload_ctx(struct mbuf *m, int *etype, int *proto, int *start)
362 struct ether_vlan_header *evh;
365 evh = mtod(m, struct ether_vlan_header *);
366 if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
367 /* BMV: We should handle nested VLAN tags too. */
368 *etype = ntohs(evh->evl_proto);
369 offset = sizeof(struct ether_vlan_header);
371 *etype = ntohs(evh->evl_encap_proto);
372 offset = sizeof(struct ether_header);
378 struct ip *ip, iphdr;
379 if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
380 m_copydata(m, offset, sizeof(struct ip),
384 ip = (struct ip *)(m->m_data + offset);
386 *start = offset + (ip->ip_hl << 2);
393 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
394 /* Assert the network stack sent us a valid packet. */
395 KASSERT(*start > offset,
396 ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
397 *start, offset, *proto));
401 /* Here we should increment the tx_csum_bad_ethtype counter. */
409 virtio_net_tx_offload_tso(if_t ifp, struct mbuf *m, int eth_type,
410 int offset, bool allow_ecn, struct virtio_net_hdr *hdr)
412 static struct timeval lastecn;
414 struct tcphdr *tcp, tcphdr;
416 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
417 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
420 tcp = (struct tcphdr *)(m->m_data + offset);
422 hdr->hdr_len = offset + (tcp->th_off << 2);
423 hdr->gso_size = m->m_pkthdr.tso_segsz;
424 hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
425 VIRTIO_NET_HDR_GSO_TCPV6;
427 if (tcp->th_flags & TH_CWR) {
429 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
430 * ECN support is not on a per-interface basis, but globally via
431 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
434 if (ppsratecheck(&lastecn, &curecn, 1))
436 "TSO with ECN not negotiated with host\n");
439 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
442 /* Here we should increment tx_tso counter. */
447 static inline struct mbuf *
448 virtio_net_tx_offload(if_t ifp, struct mbuf *m, bool allow_ecn,
449 struct virtio_net_hdr *hdr)
451 int flags, etype, csum_start, proto, error;
453 flags = m->m_pkthdr.csum_flags;
455 error = virtio_net_tx_offload_ctx(m, &etype, &proto, &csum_start);
459 if ((etype == ETHERTYPE_IP && (flags & (CSUM_TCP | CSUM_UDP))) ||
460 (etype == ETHERTYPE_IPV6 &&
461 (flags & (CSUM_TCP_IPV6 | CSUM_UDP_IPV6)))) {
463 * We could compare the IP protocol vs the CSUM_ flag too,
464 * but that really should not be necessary.
466 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
467 hdr->csum_start = csum_start;
468 hdr->csum_offset = m->m_pkthdr.csum_data;
469 /* Here we should increment the tx_csum counter. */
472 if (flags & CSUM_TSO) {
473 if (__predict_false(proto != IPPROTO_TCP)) {
474 /* Likely failed to correctly parse the mbuf.
475 * Here we should increment the tx_tso_not_tcp
480 KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
481 ("%s: mbuf %p TSO without checksum offload %#x",
482 __func__, m, flags));
484 error = virtio_net_tx_offload_tso(ifp, m, etype, csum_start,
497 #endif /* _VIRTIO_NET_H */