2 * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * Functions and macros to manipulate netmap structures and packets
31 * in userspace. See netmap(4) for more information.
33 * The address of the struct netmap_if, say nifp, is computed from the
34 * value returned from ioctl(.., NIOCREG, ...) and the mmap region:
35 * ioctl(fd, NIOCREG, &req);
36 * mem = mmap(0, ... );
37 * nifp = NETMAP_IF(mem, req.nr_nifp);
38 * (so simple, we could just do it manually)
41 * struct netmap_ring *NETMAP_TXRING(nifp, index)
42 * struct netmap_ring *NETMAP_RXRING(nifp, index)
43 * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags
45 * ring->slot[i] gives us the i-th slot (we can access
46 * directly len, flags, buf_idx)
48 * char *buf = NETMAP_BUF(ring, x) returns a pointer to
49 * the buffer numbered x
51 * All ring indexes (head, cur, tail) should always move forward.
52 * To compute the next index in a circular ring you can use
53 * i = nm_ring_next(ring, i);
55 * To ease porting apps from pcap to netmap we supply a few fuctions
56 * that can be called to open, close, read and write on netmap in a way
57 * similar to libpcap. Note that the read/write function depend on
58 * an ioctl()/select()/poll() being issued to refill rings or push
61 * In order to use these, include #define NETMAP_WITH_LIBS
62 * in the source file that invokes these functions.
65 #ifndef _NET_NETMAP_USER_H_
66 #define _NET_NETMAP_USER_H_
69 #include <net/if.h> /* IFNAMSIZ */
72 #define likely(x) __builtin_expect(!!(x), 1)
73 #define unlikely(x) __builtin_expect(!!(x), 0)
74 #endif /* likely and unlikely */
76 #include <net/netmap.h>
79 #define _NETMAP_OFFSET(type, ptr, offset) \
80 ((type)(void *)((char *)(ptr) + (offset)))
82 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
84 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
85 nifp, (nifp)->ring_ofs[index] )
87 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
88 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
90 #define NETMAP_BUF(ring, index) \
91 ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
93 #define NETMAP_BUF_IDX(ring, buf) \
94 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
98 static inline uint32_t
99 nm_ring_next(struct netmap_ring *r, uint32_t i)
101 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1);
106 * Return 1 if we have pending transmissions in the tx ring.
107 * When everything is complete ring->cur = ring->tail + 1 (modulo ring size)
110 nm_tx_pending(struct netmap_ring *r)
112 return nm_ring_next(r, r->tail) != r->cur;
116 static inline uint32_t
117 nm_ring_space(struct netmap_ring *ring)
119 int ret = ring->tail - ring->cur;
121 ret += ring->num_slots;
126 #ifdef NETMAP_WITH_LIBS
128 * Support for simple I/O libraries.
129 * Include other system headers required for compiling this.
132 #ifndef HAVE_NETMAP_WITH_LIBS
133 #define HAVE_NETMAP_WITH_LIBS
135 #include <sys/time.h>
136 #include <sys/mman.h>
137 #include <string.h> /* memset */
138 #include <sys/ioctl.h>
139 #include <sys/errno.h> /* EINVAL */
140 #include <fcntl.h> /* O_RDWR */
141 #include <unistd.h> /* close() */
145 struct nm_hdr_t { /* same as pcap_pkthdr */
151 struct nm_stat_t { // pcap_stat
160 #define NM_ERRBUF_SIZE 512
163 struct nm_desc_t *self;
167 struct netmap_if *nifp;
168 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring;
169 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring;
170 struct nmreq req; /* also contains the nr_name = ifname */
173 struct netmap_ring *tx, *rx; /* shortcuts to base hw/sw rings */
175 /* parameters from pcap_open_live */
181 /* save flags so we can restore them on close */
187 char msg[NM_ERRBUF_SIZE];
191 * when the descriptor is open correctly, d->self == d
192 * Eventually we should also use some magic number.
194 #define P2NMD(p) ((struct nm_desc_t *)(p))
195 #define IS_NETMAP_DESC(d) (P2NMD(d)->self == P2NMD(d))
196 #define NETMAP_FD(d) (P2NMD(d)->fd)
200 * this is a slightly optimized copy routine which rounds
201 * to multiple of 64 bytes and is often faster than dealing
202 * with other odd sizes. We assume there is enough room
203 * in the source and destination buffers.
205 * XXX only for multiples of 64 bytes, non overlapped.
208 pkt_copy(const void *_src, void *_dst, int l)
210 const uint64_t *src = (const uint64_t *)_src;
211 uint64_t *dst = (uint64_t *)_dst;
213 if (unlikely(l >= 1024)) {
217 for (; likely(l > 0); l-=64) {
231 * The callback, invoked on each received packet. Same as libpcap
233 typedef void (*nm_cb_t)(u_char *, const struct nm_hdr_t *, const u_char *d);
236 *--- the pcap-like API ---
238 * nm_open() opens a file descriptor, binds to a port and maps memory.
240 * ifname (netmap:foo or vale:foo) is the port name
241 * flags can be NETMAP_SW_RING or NETMAP_HW_RING etc.
242 * ring_no only used if NETMAP_HW_RING is specified, is interpreted
243 * as a string or integer indicating the ring number
244 * ring_flags is stored in all ring flags (e.g. for transparent mode)
245 * to open. If successful, t opens the fd and maps the memory.
248 static struct nm_desc_t *nm_open(const char *ifname,
249 const char *ring_no, int flags, int ring_flags);
252 * nm_close() closes and restores the port to its previous state
255 static int nm_close(struct nm_desc_t *);
258 * nm_inject() is the same as pcap_inject()
259 * nm_dispatch() is the same as pcap_dispatch()
260 * nm_nextpkt() is the same as pcap_next()
263 static int nm_inject(struct nm_desc_t *, const void *, size_t);
264 static int nm_dispatch(struct nm_desc_t *, int, nm_cb_t, u_char *);
265 static u_char *nm_nextpkt(struct nm_desc_t *, struct nm_hdr_t *);
269 * Try to open, return descriptor if successful, NULL otherwise.
270 * An invalid netmap name will return errno = 0;
272 static struct nm_desc_t *
273 nm_open(const char *ifname, const char *ring_name, int flags, int ring_flags)
279 if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
280 errno = 0; /* name not recognised */
283 if (ifname[0] == 'n')
285 port = strchr(ifname, '-');
287 namelen = strlen(ifname);
289 namelen = port - ifname;
290 flags &= ~(NETMAP_SW_RING | NETMAP_HW_RING | NETMAP_RING_MASK);
292 flags |= NETMAP_SW_RING;
296 if (namelen >= sizeof(d->req.nr_name))
297 namelen = sizeof(d->req.nr_name) - 1;
299 d = (struct nm_desc_t *)calloc(1, sizeof(*d));
304 d->self = d; /* set this early so nm_close() works */
305 d->fd = open("/dev/netmap", O_RDWR);
309 if (flags & NETMAP_SW_RING) {
310 d->req.nr_ringid = NETMAP_SW_RING;
313 if (flags & NETMAP_HW_RING) /* interpret ring as int */
314 r = (uintptr_t)ring_name;
315 else /* interpret ring as numeric string */
316 r = ring_name ? atoi(ring_name) : ~0;
317 r = (r < NETMAP_RING_MASK) ? (r | NETMAP_HW_RING) : 0;
318 d->req.nr_ringid = r; /* set the ring */
320 d->req.nr_ringid |= (flags & ~NETMAP_RING_MASK);
321 d->req.nr_version = NETMAP_API;
322 memcpy(d->req.nr_name, ifname, namelen);
323 d->req.nr_name[namelen] = '\0';
324 if (ioctl(d->fd, NIOCREGIF, &d->req)) {
328 d->memsize = d->req.nr_memsize;
329 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
333 d->nifp = NETMAP_IF(d->mem, d->req.nr_offset);
334 if (d->req.nr_ringid & NETMAP_SW_RING) {
335 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
336 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
337 } else if (d->req.nr_ringid & NETMAP_HW_RING) {
338 /* XXX check validity */
339 d->first_tx_ring = d->last_tx_ring =
340 d->first_rx_ring = d->last_rx_ring =
341 d->req.nr_ringid & NETMAP_RING_MASK;
343 d->first_tx_ring = d->last_rx_ring = 0;
344 d->last_tx_ring = d->req.nr_tx_rings - 1;
345 d->last_rx_ring = d->req.nr_rx_rings - 1;
347 d->tx = NETMAP_TXRING(d->nifp, 0);
348 d->rx = NETMAP_RXRING(d->nifp, 0);
349 d->cur_tx_ring = d->first_tx_ring;
350 d->cur_rx_ring = d->first_rx_ring;
351 for (n = d->first_tx_ring; n <= d->last_tx_ring; n++) {
352 d->tx[n].flags |= ring_flags;
354 for (n = d->first_rx_ring; n <= d->last_rx_ring; n++) {
355 d->rx[n].flags |= ring_flags;
367 nm_close(struct nm_desc_t *d)
370 * ugly trick to avoid unused warnings
372 static void *__xxzt[] __attribute__ ((unused)) =
373 { (void *)nm_open, (void *)nm_inject,
374 (void *)nm_dispatch, (void *)nm_nextpkt } ;
376 if (d == NULL || d->self != d)
379 munmap(d->mem, d->memsize);
382 bzero(d, sizeof(*d));
389 * Same prototype as pcap_inject(), only need to cast.
392 nm_inject(struct nm_desc_t *d, const void *buf, size_t size)
394 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1;
396 for (c = 0; c < n ; c++) {
397 /* compute current ring to use */
398 struct netmap_ring *ring;
400 uint32_t ri = d->cur_tx_ring + c;
402 if (ri > d->last_tx_ring)
403 ri = d->first_tx_ring;
404 ring = NETMAP_TXRING(d->nifp, ri);
405 if (nm_ring_empty(ring)) {
409 idx = ring->slot[i].buf_idx;
410 ring->slot[i].len = size;
411 pkt_copy(buf, NETMAP_BUF(ring, idx), size);
413 ring->head = ring->cur = nm_ring_next(ring, i);
421 * Same prototype as pcap_dispatch(), only need to cast.
424 nm_dispatch(struct nm_desc_t *d, int cnt, nm_cb_t cb, u_char *arg)
426 int n = d->last_rx_ring - d->first_rx_ring + 1;
427 int c, got = 0, ri = d->cur_rx_ring;
431 /* cnt == -1 means infinite, but rings have a finite amount
432 * of buffers and the int is large enough that we never wrap,
433 * so we can omit checking for -1
435 for (c=0; c < n && cnt != got; c++) {
436 /* compute current ring to use */
437 struct netmap_ring *ring;
439 ri = d->cur_rx_ring + c;
440 if (ri > d->last_rx_ring)
441 ri = d->first_rx_ring;
442 ring = NETMAP_RXRING(d->nifp, ri);
443 for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
445 u_int idx = ring->slot[i].buf_idx;
446 u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
448 // __builtin_prefetch(buf);
449 d->hdr.len = d->hdr.caplen = ring->slot[i].len;
450 d->hdr.ts = ring->ts;
451 cb(arg, &d->hdr, buf);
452 ring->head = ring->cur = nm_ring_next(ring, i);
460 nm_nextpkt(struct nm_desc_t *d, struct nm_hdr_t *hdr)
462 int ri = d->cur_rx_ring;
465 /* compute current ring to use */
466 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri);
467 if (!nm_ring_empty(ring)) {
469 u_int idx = ring->slot[i].buf_idx;
470 u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
472 // __builtin_prefetch(buf);
474 hdr->len = hdr->caplen = ring->slot[i].len;
475 ring->cur = nm_ring_next(ring, i);
476 /* we could postpone advancing head if we want
477 * to hold the buffer. This can be supported in
480 ring->head = ring->cur;
485 if (ri > d->last_rx_ring)
486 ri = d->first_rx_ring;
487 } while (ri != d->cur_rx_ring);
488 return NULL; /* nothing found */
491 #endif /* !HAVE_NETMAP_WITH_LIBS */
493 #endif /* NETMAP_WITH_LIBS */
495 #endif /* _NET_NETMAP_USER_H_ */