2 * Copyright (C) 2013-2016 Luigi Rizzo
3 * Copyright (C) 2013-2016 Giuseppe Lettieri
4 * Copyright (C) 2013-2016 Vincenzo Maffione
5 * Copyright (C) 2015 Stefano Garzarella
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #define NETMAP_VIRT_CSB_SIZE 4096
37 /* ptnetmap features */
38 #define PTNETMAP_F_BASE 1
39 #define PTNETMAP_F_FULL 2 /* not used */
40 #define PTNETMAP_F_VNET_HDR 4
43 * ptnetmap_memdev: device used to expose memory into the guest VM
45 * These macros are used in the hypervisor frontend (QEMU, bhyve) and in the
46 * guest device driver.
49 /* PCI identifiers and PCI BARs for the ptnetmap memdev
50 * and ptnetmap network interface. */
51 #define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev"
52 #define PTNETMAP_PCI_VENDOR_ID 0x3333 /* TODO change vendor_id */
53 #define PTNETMAP_PCI_DEVICE_ID 0x0001 /* memory device */
54 #define PTNETMAP_PCI_NETIF_ID 0x0002 /* ptnet network interface */
55 #define PTNETMAP_IO_PCI_BAR 0
56 #define PTNETMAP_MEM_PCI_BAR 1
57 #define PTNETMAP_MSIX_PCI_BAR 2
59 /* Registers for the ptnetmap memdev */
61 #define PTNETMAP_IO_PCI_FEATURES 0 /* XXX should be removed */
63 #define PTNETMAP_IO_PCI_MEMSIZE 4 /* size of the netmap memory shared
64 * between guest and host */
66 #define PTNETMAP_IO_PCI_HOSTID 8 /* memory allocator ID in netmap host */
67 #define PTNETMAP_IO_SIZE 10
70 * ptnetmap configuration
72 * The hypervisor (QEMU or bhyve) sends this struct to the host netmap
73 * module through an ioctl() command when it wants to start the ptnetmap
77 #define PTNETMAP_CFG_FEAT_CSB 0x0001
78 #define PTNETMAP_CFG_FEAT_EVENTFD 0x0002
79 #define PTNETMAP_CFG_FEAT_IOCTL 0x0004
81 void *ptrings; /* ptrings inside CSB */
82 uint32_t num_rings; /* number of entries */
83 struct ptnet_ring_cfg entries[0]; /* per-ptring configuration */
87 * Functions used to write ptnetmap_cfg from/to the nmreq.
88 * The user-space application writes the pointer of ptnetmap_cfg
89 * (user-space buffer) starting from nr_arg1 field, so that the kernel
90 * can read it with copyin (copy_from_user).
93 ptnetmap_write_cfg(struct nmreq *nmr, struct ptnetmap_cfg *cfg)
95 uintptr_t *nmr_ptncfg = (uintptr_t *)&nmr->nr_arg1;
96 *nmr_ptncfg = (uintptr_t)cfg;
99 /* ptnetmap control commands */
100 #define PTNETMAP_PTCTL_CONFIG 1
101 #define PTNETMAP_PTCTL_FINALIZE 2
102 #define PTNETMAP_PTCTL_IFNEW 3
103 #define PTNETMAP_PTCTL_IFDELETE 4
104 #define PTNETMAP_PTCTL_RINGSCREATE 5
105 #define PTNETMAP_PTCTL_RINGSDELETE 6
106 #define PTNETMAP_PTCTL_DEREF 7
107 #define PTNETMAP_PTCTL_TXSYNC 8
108 #define PTNETMAP_PTCTL_RXSYNC 9
109 #define PTNETMAP_PTCTL_REGIF 10
110 #define PTNETMAP_PTCTL_UNREGIF 11
111 #define PTNETMAP_PTCTL_HOSTMEMID 12
114 /* I/O registers for the ptnet device. */
115 #define PTNET_IO_PTFEAT 0
116 #define PTNET_IO_PTCTL 4
117 #define PTNET_IO_PTSTS 8
119 #define PTNET_IO_MAC_LO 16
120 #define PTNET_IO_MAC_HI 20
121 #define PTNET_IO_CSBBAH 24
122 #define PTNET_IO_CSBBAL 28
123 #define PTNET_IO_NIFP_OFS 32
124 #define PTNET_IO_NUM_TX_RINGS 36
125 #define PTNET_IO_NUM_RX_RINGS 40
126 #define PTNET_IO_NUM_TX_SLOTS 44
127 #define PTNET_IO_NUM_RX_SLOTS 48
128 #define PTNET_IO_VNET_HDR_LEN 52
129 #define PTNET_IO_END 56
130 #define PTNET_IO_KICK_BASE 128
131 #define PTNET_IO_MASK 0xff
133 /* If defined, CSB is allocated by the guest, not by the host. */
134 #define PTNET_CSB_ALLOC
136 /* ptnetmap ring fields shared between guest and host */
138 /* XXX revise the layout to minimize cache bounces. */
139 uint32_t head; /* GW+ HR+ the head of the guest netmap_ring */
140 uint32_t cur; /* GW+ HR+ the cur of the guest netmap_ring */
141 uint32_t guest_need_kick; /* GW+ HR+ host-->guest notification enable */
143 uint32_t hwcur; /* GR+ HW+ the hwcur of the host netmap_kring */
144 uint32_t hwtail; /* GR+ HW+ the hwtail of the host netmap_kring */
145 uint32_t host_need_kick; /* GR+ HW+ guest-->host notification enable */
146 uint32_t sync_flags; /* GW+ HR+ the flags of the guest [tx|rx]sync() */
149 /* CSB for the ptnet device. */
151 struct ptnet_ring rings[NETMAP_VIRT_CSB_SIZE/sizeof(struct ptnet_ring)];
154 #if defined (WITH_PTNETMAP_HOST) || defined (WITH_PTNETMAP_GUEST)
156 /* return l_elem - r_elem with wraparound */
157 static inline uint32_t
158 ptn_sub(uint32_t l_elem, uint32_t r_elem, uint32_t num_slots)
162 res = (int64_t)(l_elem) - r_elem;
164 return (res < 0) ? res + num_slots : res;
166 #endif /* WITH_PTNETMAP_HOST || WITH_PTNETMAP_GUEST */
168 #ifdef WITH_PTNETMAP_HOST
170 * ptnetmap kernel thread routines
173 /* Functions to read and write CSB fields in the host */
175 #define CSB_READ(csb, field, r) (get_user(r, &csb->field))
176 #define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
178 #define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
179 #define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
183 * HOST read/write kring pointers from/in CSB
186 /* Host: Read kring pointers (head, cur, sync_flags) from CSB */
188 ptnetmap_host_read_kring_csb(struct ptnet_ring __user *ptr,
189 struct netmap_ring *g_ring,
192 uint32_t old_head = g_ring->head, old_cur = g_ring->cur;
193 uint32_t d, inc_h, inc_c;
195 //mb(); /* Force memory complete before read CSB */
198 * We must first read head and then cur with a barrier in the
199 * middle, because cur can exceed head, but not vice versa.
200 * The guest must first write cur and then head with a barrier.
206 * STORE(cur) LOAD(head)
207 * mb() ----------- mb()
208 * STORE(head) LOAD(cur)
210 * This approach ensures that every head that we read is
211 * associated with the correct cur. In this way head can not exceed cur.
213 CSB_READ(ptr, head, g_ring->head);
215 CSB_READ(ptr, cur, g_ring->cur);
216 CSB_READ(ptr, sync_flags, g_ring->flags);
219 * Even with the previous barrier, it is still possible that we read an
220 * updated cur and an old head.
221 * To detect this situation, we can check if the new cur overtakes
222 * the (apparently) new head.
224 d = ptn_sub(old_cur, old_head, num_slots); /* previous distance */
225 inc_c = ptn_sub(g_ring->cur, old_cur, num_slots); /* increase of cur */
226 inc_h = ptn_sub(g_ring->head, old_head, num_slots); /* increase of head */
228 if (unlikely(inc_c > num_slots - d + inc_h)) { /* cur overtakes head */
229 ND(1,"ERROR cur overtakes head - old_cur: %u cur: %u old_head: %u head: %u",
230 old_cur, g_ring->cur, old_head, g_ring->head);
231 g_ring->cur = nm_prev(g_ring->head, num_slots - 1);
236 /* Host: Write kring pointers (hwcur, hwtail) into the CSB */
238 ptnetmap_host_write_kring_csb(struct ptnet_ring __user *ptr, uint32_t hwcur,
241 /* We must write hwtail before hwcur (see below). */
242 CSB_WRITE(ptr, hwtail, hwtail);
244 CSB_WRITE(ptr, hwcur, hwcur);
246 //mb(); /* Force memory complete before send notification */
249 #endif /* WITH_PTNETMAP_HOST */
251 #ifdef WITH_PTNETMAP_GUEST
253 * GUEST read/write kring pointers from/in CSB.
254 * To use into device driver.
257 /* Guest: Write kring pointers (cur, head) into the CSB */
259 ptnetmap_guest_write_kring_csb(struct ptnet_ring *ptr, uint32_t cur,
262 /* We must write cur before head for sync reason (see above) */
267 //mb(); /* Force memory complete before send notification */
270 /* Guest: Read kring pointers (hwcur, hwtail) from CSB */
272 ptnetmap_guest_read_kring_csb(struct ptnet_ring *ptr, struct netmap_kring *kring)
274 uint32_t old_hwcur = kring->nr_hwcur, old_hwtail = kring->nr_hwtail;
275 uint32_t num_slots = kring->nkr_num_slots;
276 uint32_t d, inc_hc, inc_ht;
278 //mb(); /* Force memory complete before read CSB */
281 * We must first read hwcur and then hwtail with a barrier in the
282 * middle, because hwtail can exceed hwcur, but not vice versa.
283 * The host must first write hwtail and then hwcur with a barrier.
289 * STORE(hwtail) LOAD(hwcur)
290 * mb() --------- mb()
291 * STORE(hwcur) LOAD(hwtail)
293 * This approach ensures that every hwcur that the guest reads is
294 * associated with the correct hwtail. In this way hwcur can not exceed
297 kring->nr_hwcur = ptr->hwcur;
299 kring->nr_hwtail = ptr->hwtail;
302 * Even with the previous barrier, it is still possible that we read an
303 * updated hwtail and an old hwcur.
304 * To detect this situation, we can check if the new hwtail overtakes
305 * the (apparently) new hwcur.
307 d = ptn_sub(old_hwtail, old_hwcur, num_slots); /* previous distance */
308 inc_ht = ptn_sub(kring->nr_hwtail, old_hwtail, num_slots); /* increase of hwtail */
309 inc_hc = ptn_sub(kring->nr_hwcur, old_hwcur, num_slots); /* increase of hwcur */
311 if (unlikely(inc_ht > num_slots - d + inc_hc)) {
312 ND(1, "ERROR hwtail overtakes hwcur - old_hwtail: %u hwtail: %u old_hwcur: %u hwcur: %u",
313 old_hwtail, kring->nr_hwtail, old_hwcur, kring->nr_hwcur);
314 kring->nr_hwtail = nm_prev(kring->nr_hwcur, num_slots - 1);
315 //kring->nr_hwtail = kring->nr_hwcur;
319 /* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */
320 struct ptnetmap_memdev;
321 int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **);
322 void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
323 #endif /* WITH_PTNETMAP_GUEST */
325 #endif /* NETMAP_VIRT_H */