sys/dev/netmap/netmap_generic.c

   1 /*
   2  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions
   6  * are met:
   7  *   1. Redistributions of source code must retain the above copyright
   8  *      notice, this list of conditions and the following disclaimer.
   9  *   2. Redistributions in binary form must reproduce the above copyright
  10  *      notice, this list of conditions and the following disclaimer in the
  11  *      documentation and/or other materials provided with the distribution.
  12  *
  13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  23  * SUCH DAMAGE.
  24  */
  25
  26 /*
  27  * This module implements netmap support on top of standard,
  28  * unmodified device drivers.
  29  *
  30  * A NIOCREGIF request is handled here if the device does not
  31  * have native support. TX and RX rings are emulated as follows:
  32  *
  33  * NIOCREGIF
  34  *      We preallocate a block of TX mbufs (roughly as many as
  35  *      tx descriptors; the number is not critical) to speed up
  36  *      operation during transmissions. The refcount on most of
  37  *      these buffers is artificially bumped up so we can recycle
  38  *      them more easily. Also, the destructor is intercepted
  39  *      so we use it as an interrupt notification to wake up
  40  *      processes blocked on a poll().
  41  *
  42  *      For each receive ring we allocate one "struct mbq"
  43  *      (an mbuf tailq plus a spinlock). We intercept packets
  44  *      (through if_input)
  45  *      on the receive path and put them in the mbq from which
  46  *      netmap receive routines can grab them.
  47  *
  48  * TX:
  49  *      in the generic_txsync() routine, netmap buffers are copied
  50  *      (or linked, in a future) to the preallocated mbufs
  51  *      and pushed to the transmit queue. Some of these mbufs
  52  *      (those with NS_REPORT, or otherwise every half ring)
  53  *      have the refcount=1, others have refcount=2.
  54  *      When the destructor is invoked, we take that as
  55  *      a notification that all mbufs up to that one in
  56  *      the specific ring have been completed, and generate
  57  *      the equivalent of a transmit interrupt.
  58  *
  59  * RX:
  60  *
  61  */
  62
  63 #ifdef __FreeBSD__
  64
  65 #include <sys/cdefs.h> /* prerequisite */
  66 __FBSDID("$FreeBSD$");
  67
  68 #include <sys/types.h>
  69 #include <sys/errno.h>
  70 #include <sys/malloc.h>
  71 #include <sys/lock.h>   /* PROT_EXEC */
  72 #include <sys/rwlock.h>
  73 #include <sys/socket.h> /* sockaddrs */
  74 #include <sys/selinfo.h>
  75 #include <net/if.h>
  76 #include <net/if_var.h>
  77 #include <machine/bus.h>        /* bus_dmamap_* in netmap_kern.h */
  78
  79 // XXX temporary - D() defined here
  80 #include <net/netmap.h>
  81 #include <dev/netmap/netmap_kern.h>
  82 #include <dev/netmap/netmap_mem2.h>
  83
  84 #define rtnl_lock() D("rtnl_lock called");
  85 #define rtnl_unlock() D("rtnl_unlock called");
  86 #define MBUF_TXQ(m)     ((m)->m_pkthdr.flowid)
  87 #define smp_mb()
  88
  89 /*
  90  * mbuf wrappers
  91  */
  92
  93 /*
  94  * we allocate an EXT_PACKET
  95  */
  96 #define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE)
  97
  98 /* mbuf destructor, also need to change the type to EXT_EXTREF,
  99  * add an M_NOFREE flag, and then clear the flag and
 100  * chain into uma_zfree(zone_pack, mf)
 101  * (or reinstall the buffer ?)
 102  */
 103 #define SET_MBUF_DESTRUCTOR(m, fn)      do {            \
 104         (m)->m_ext.ext_free = (void *)fn;       \
 105         (m)->m_ext.ext_type = EXT_EXTREF;       \
 106 } while (0)
 107
 108
 109 #define GET_MBUF_REFCNT(m)      ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
 110
 111
 112
 113 #else /* linux */
 114
 115 #include "bsd_glue.h"
 116
 117 #include <linux/rtnetlink.h>    /* rtnl_[un]lock() */
 118 #include <linux/ethtool.h>      /* struct ethtool_ops, get_ringparam */
 119 #include <linux/hrtimer.h>
 120
 121 //#define RATE  /* Enables communication statistics. */
 122
 123 //#define REG_RESET
 124
 125 #endif /* linux */
 126
 127
 128 /* Common headers. */
 129 #include <net/netmap.h>
 130 #include <dev/netmap/netmap_kern.h>
 131 #include <dev/netmap/netmap_mem2.h>
 132
 133
 134
 135 /* ======================== usage stats =========================== */
 136
 137 #ifdef RATE
 138 #define IFRATE(x) x
 139 struct rate_stats {
 140         unsigned long txpkt;
 141         unsigned long txsync;
 142         unsigned long txirq;
 143         unsigned long rxpkt;
 144         unsigned long rxirq;
 145         unsigned long rxsync;
 146 };
 147
 148 struct rate_context {
 149         unsigned refcount;
 150         struct timer_list timer;
 151         struct rate_stats new;
 152         struct rate_stats old;
 153 };
 154
 155 #define RATE_PRINTK(_NAME_) \
 156         printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
 157 #define RATE_PERIOD  2
 158 static void rate_callback(unsigned long arg)
 159 {
 160         struct rate_context * ctx = (struct rate_context *)arg;
 161         struct rate_stats cur = ctx->new;
 162         int r;
 163
 164         RATE_PRINTK(txpkt);
 165         RATE_PRINTK(txsync);
 166         RATE_PRINTK(txirq);
 167         RATE_PRINTK(rxpkt);
 168         RATE_PRINTK(rxsync);
 169         RATE_PRINTK(rxirq);
 170         printk("\n");
 171
 172         ctx->old = cur;
 173         r = mod_timer(&ctx->timer, jiffies +
 174                         msecs_to_jiffies(RATE_PERIOD * 1000));
 175         if (unlikely(r))
 176                 D("[v1000] Error: mod_timer()");
 177 }
 178
 179 static struct rate_context rate_ctx;
 180
 181 #else /* !RATE */
 182 #define IFRATE(x)
 183 #endif /* !RATE */
 184
 185
 186 /* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
 187 #define GENERIC_BUF_SIZE        netmap_buf_size    /* Size of the mbufs in the Tx pool. */
 188
 189 /*
 190  * Wrapper used by the generic adapter layer to notify
 191  * the poller threads. Differently from netmap_rx_irq(), we check
 192  * only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq.
 193  */
 194 static void
 195 netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
 196 {
 197         if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP)))
 198                 return;
 199
 200         netmap_common_irq(ifp, q, work_done);
 201 }
 202
 203
 204 /* Enable/disable netmap mode for a generic network interface. */
 205 static int
 206 generic_netmap_register(struct netmap_adapter *na, int enable)
 207 {
 208         struct ifnet *ifp = na->ifp;
 209         struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
 210         struct mbuf *m;
 211         int error;
 212         int i, r;
 213
 214         if (!na)
 215                 return EINVAL;
 216
 217 #ifdef REG_RESET
 218         error = ifp->netdev_ops->ndo_stop(ifp);
 219         if (error) {
 220                 return error;
 221         }
 222 #endif /* REG_RESET */
 223
 224         if (enable) { /* Enable netmap mode. */
 225                 /* Initialize the rx queue, as generic_rx_handler() can
 226                  * be called as soon as netmap_catch_rx() returns.
 227                  */
 228                 for (r=0; r<na->num_rx_rings; r++) {
 229                         mbq_safe_init(&na->rx_rings[r].rx_queue);
 230                 }
 231
 232                 /* Init the mitigation timer. */
 233                 netmap_mitigation_init(gna);
 234
 235                 /*
 236                  * Preallocate packet buffers for the tx rings.
 237                  */
 238                 for (r=0; r<na->num_tx_rings; r++)
 239                         na->tx_rings[r].tx_pool = NULL;
 240                 for (r=0; r<na->num_tx_rings; r++) {
 241                         na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
 242                                         M_DEVBUF, M_NOWAIT | M_ZERO);
 243                         if (!na->tx_rings[r].tx_pool) {
 244                                 D("tx_pool allocation failed");
 245                                 error = ENOMEM;
 246                                 goto free_tx_pools;
 247                         }
 248                         for (i=0; i<na->num_tx_desc; i++)
 249                                 na->tx_rings[r].tx_pool[i] = NULL;
 250                         for (i=0; i<na->num_tx_desc; i++) {
 251                                 m = netmap_get_mbuf(GENERIC_BUF_SIZE);
 252                                 if (!m) {
 253                                         D("tx_pool[%d] allocation failed", i);
 254                                         error = ENOMEM;
 255                                         goto free_tx_pools;
 256                                 }
 257                                 na->tx_rings[r].tx_pool[i] = m;
 258                         }
 259                 }
 260                 rtnl_lock();
 261                 /* Prepare to intercept incoming traffic. */
 262                 error = netmap_catch_rx(na, 1);
 263                 if (error) {
 264                         D("netdev_rx_handler_register() failed (%d)", error);
 265                         goto register_handler;
 266                 }
 267                 ifp->if_capenable |= IFCAP_NETMAP;
 268
 269                 /* Make netmap control the packet steering. */
 270                 netmap_catch_tx(gna, 1);
 271
 272                 rtnl_unlock();
 273
 274 #ifdef RATE
 275                 if (rate_ctx.refcount == 0) {
 276                         D("setup_timer()");
 277                         memset(&rate_ctx, 0, sizeof(rate_ctx));
 278                         setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
 279                         if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
 280                                 D("Error: mod_timer()");
 281                         }
 282                 }
 283                 rate_ctx.refcount++;
 284 #endif /* RATE */
 285
 286         } else if (na->tx_rings[0].tx_pool) {
 287                 /* Disable netmap mode. We enter here only if the previous
 288                    generic_netmap_register(na, 1) was successfull.
 289                    If it was not, na->tx_rings[0].tx_pool was set to NULL by the
 290                    error handling code below. */
 291                 rtnl_lock();
 292
 293                 ifp->if_capenable &= ~IFCAP_NETMAP;
 294
 295                 /* Release packet steering control. */
 296                 netmap_catch_tx(gna, 0);
 297
 298                 /* Do not intercept packets on the rx path. */
 299                 netmap_catch_rx(na, 0);
 300
 301                 rtnl_unlock();
 302
 303                 /* Free the mbufs going to the netmap rings */
 304                 for (r=0; r<na->num_rx_rings; r++) {
 305                         mbq_safe_purge(&na->rx_rings[r].rx_queue);
 306                         mbq_safe_destroy(&na->rx_rings[r].rx_queue);
 307                 }
 308
 309                 netmap_mitigation_cleanup(gna);
 310
 311                 for (r=0; r<na->num_tx_rings; r++) {
 312                         for (i=0; i<na->num_tx_desc; i++) {
 313                                 m_freem(na->tx_rings[r].tx_pool[i]);
 314                         }
 315                         free(na->tx_rings[r].tx_pool, M_DEVBUF);
 316                 }
 317
 318 #ifdef RATE
 319                 if (--rate_ctx.refcount == 0) {
 320                         D("del_timer()");
 321                         del_timer(&rate_ctx.timer);
 322                 }
 323 #endif
 324         }
 325
 326 #ifdef REG_RESET
 327         error = ifp->netdev_ops->ndo_open(ifp);
 328         if (error) {
 329                 goto free_tx_pools;
 330         }
 331 #endif
 332
 333         return 0;
 334
 335 register_handler:
 336         rtnl_unlock();
 337 free_tx_pools:
 338         for (r=0; r<na->num_tx_rings; r++) {
 339                 if (na->tx_rings[r].tx_pool == NULL)
 340                         continue;
 341                 for (i=0; i<na->num_tx_desc; i++)
 342                         if (na->tx_rings[r].tx_pool[i])
 343                                 m_freem(na->tx_rings[r].tx_pool[i]);
 344                 free(na->tx_rings[r].tx_pool, M_DEVBUF);
 345                 na->tx_rings[r].tx_pool = NULL;
 346         }
 347         netmap_mitigation_cleanup(gna);
 348         for (r=0; r<na->num_rx_rings; r++) {
 349                 mbq_safe_destroy(&na->rx_rings[r].rx_queue);
 350         }
 351
 352         return error;
 353 }
 354
 355 /*
 356  * Callback invoked when the device driver frees an mbuf used
 357  * by netmap to transmit a packet. This usually happens when
 358  * the NIC notifies the driver that transmission is completed.
 359  */
 360 static void
 361 generic_mbuf_destructor(struct mbuf *m)
 362 {
 363         if (netmap_verbose)
 364                 D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
 365         netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
 366 #ifdef __FreeBSD__
 367         m->m_ext.ext_type = EXT_PACKET;
 368         m->m_ext.ext_free = NULL;
 369         if (*(m->m_ext.ref_cnt) == 0)
 370                 *(m->m_ext.ref_cnt) = 1;
 371         uma_zfree(zone_pack, m);
 372 #endif /* __FreeBSD__ */
 373         IFRATE(rate_ctx.new.txirq++);
 374 }
 375
 376 /* Record completed transmissions and update hwtail.
 377  *
 378  * The oldest tx buffer not yet completed is at nr_hwtail + 1,
 379  * nr_hwcur is the first unsent buffer.
 380  */
 381 static u_int
 382 generic_netmap_tx_clean(struct netmap_kring *kring)
 383 {
 384         u_int const lim = kring->nkr_num_slots - 1;
 385         u_int nm_i = nm_next(kring->nr_hwtail, lim);
 386         u_int hwcur = kring->nr_hwcur;
 387         u_int n = 0;
 388         struct mbuf **tx_pool = kring->tx_pool;
 389
 390         while (nm_i != hwcur) { /* buffers not completed */
 391                 struct mbuf *m = tx_pool[nm_i];
 392
 393                 if (unlikely(m == NULL)) {
 394                         /* this is done, try to replenish the entry */
 395                         tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
 396                         if (unlikely(m == NULL)) {
 397                                 D("mbuf allocation failed, XXX error");
 398                                 // XXX how do we proceed ? break ?
 399                                 return -ENOMEM;
 400                         }
 401                 } else if (GET_MBUF_REFCNT(m) != 1) {
 402                         break; /* This mbuf is still busy: its refcnt is 2. */
 403                 }
 404                 n++;
 405                 nm_i = nm_next(nm_i, lim);
 406         }
 407         kring->nr_hwtail = nm_prev(nm_i, lim);
 408         ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
 409
 410         return n;
 411 }
 412
 413
 414 /*
 415  * We have pending packets in the driver between nr_hwtail +1 and hwcur.
 416  * Compute a position in the middle, to be used to generate
 417  * a notification.
 418  */
 419 static inline u_int
 420 generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
 421 {
 422         u_int n = kring->nkr_num_slots;
 423         u_int ntc = nm_next(kring->nr_hwtail, n-1);
 424         u_int e;
 425
 426         if (hwcur >= ntc) {
 427                 e = (hwcur + ntc) / 2;
 428         } else { /* wrap around */
 429                 e = (hwcur + n + ntc) / 2;
 430                 if (e >= n) {
 431                         e -= n;
 432                 }
 433         }
 434
 435         if (unlikely(e >= n)) {
 436                 D("This cannot happen");
 437                 e = 0;
 438         }
 439
 440         return e;
 441 }
 442
 443 /*
 444  * We have pending packets in the driver between nr_hwtail+1 and hwcur.
 445  * Schedule a notification approximately in the middle of the two.
 446  * There is a race but this is only called within txsync which does
 447  * a double check.
 448  */
 449 static void
 450 generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
 451 {
 452         struct mbuf *m;
 453         u_int e;
 454
 455         if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) {
 456                 return; /* all buffers are free */
 457         }
 458         e = generic_tx_event_middle(kring, hwcur);
 459
 460         m = kring->tx_pool[e];
 461         if (m == NULL) {
 462                 /* This can happen if there is already an event on the netmap
 463                    slot 'e': There is nothing to do. */
 464                 return;
 465         }
 466         ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
 467         kring->tx_pool[e] = NULL;
 468         SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
 469
 470         // XXX wmb() ?
 471         /* Decrement the refcount an free it if we have the last one. */
 472         m_freem(m);
 473         smp_mb();
 474 }
 475
 476
 477 /*
 478  * generic_netmap_txsync() transforms netmap buffers into mbufs
 479  * and passes them to the standard device driver
 480  * (ndo_start_xmit() or ifp->if_transmit() ).
 481  * On linux this is not done directly, but using dev_queue_xmit(),
 482  * since it implements the TX flow control (and takes some locks).
 483  */
 484 static int
 485 generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
 486 {
 487         struct ifnet *ifp = na->ifp;
 488         struct netmap_kring *kring = &na->tx_rings[ring_nr];
 489         struct netmap_ring *ring = kring->ring;
 490         u_int nm_i;     /* index into the netmap ring */ // j
 491         u_int const lim = kring->nkr_num_slots - 1;
 492         u_int const head = kring->rhead;
 493
 494         IFRATE(rate_ctx.new.txsync++);
 495
 496         // TODO: handle the case of mbuf allocation failure
 497
 498         rmb();
 499
 500         /*
 501          * First part: process new packets to send.
 502          */
 503         nm_i = kring->nr_hwcur;
 504         if (nm_i != head) {     /* we have new packets to send */
 505                 while (nm_i != head) {
 506                         struct netmap_slot *slot = &ring->slot[nm_i];
 507                         u_int len = slot->len;
 508                         void *addr = NMB(slot);
 509
 510                         /* device-specific */
 511                         struct mbuf *m;
 512                         int tx_ret;
 513
 514                         NM_CHECK_ADDR_LEN(addr, len);
 515
 516                         /* Tale a mbuf from the tx pool and copy in the user packet. */
 517                         m = kring->tx_pool[nm_i];
 518                         if (unlikely(!m)) {
 519                                 RD(5, "This should never happen");
 520                                 kring->tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
 521                                 if (unlikely(m == NULL)) {
 522                                         D("mbuf allocation failed");
 523                                         break;
 524                                 }
 525                         }
 526                         /* XXX we should ask notifications when NS_REPORT is set,
 527                          * or roughly every half frame. We can optimize this
 528                          * by lazily requesting notifications only when a
 529                          * transmission fails. Probably the best way is to
 530                          * break on failures and set notifications when
 531                          * ring->cur == ring->tail || nm_i != cur
 532                          */
 533                         tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
 534                         if (unlikely(tx_ret)) {
 535                                 RD(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]",
 536                                                 tx_ret, nm_i, head, kring->nr_hwtail);
 537                                 /*
 538                                  * No room for this mbuf in the device driver.
 539                                  * Request a notification FOR A PREVIOUS MBUF,
 540                                  * then call generic_netmap_tx_clean(kring) to do the
 541                                  * double check and see if we can free more buffers.
 542                                  * If there is space continue, else break;
 543                                  * NOTE: the double check is necessary if the problem
 544                                  * occurs in the txsync call after selrecord().
 545                                  * Also, we need some way to tell the caller that not
 546                                  * all buffers were queued onto the device (this was
 547                                  * not a problem with native netmap driver where space
 548                                  * is preallocated). The bridge has a similar problem
 549                                  * and we solve it there by dropping the excess packets.
 550                                  */
 551                                 generic_set_tx_event(kring, nm_i);
 552                                 if (generic_netmap_tx_clean(kring)) { /* space now available */
 553                                         continue;
 554                                 } else {
 555                                         break;
 556                                 }
 557                         }
 558                         slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
 559                         nm_i = nm_next(nm_i, lim);
 560                 }
 561
 562                 /* Update hwcur to the next slot to transmit. */
 563                 kring->nr_hwcur = nm_i; /* not head, we could break early */
 564
 565                 IFRATE(rate_ctx.new.txpkt += ntx);
 566         }
 567
 568         /*
 569          * Second, reclaim completed buffers
 570          */
 571         if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
 572                 /* No more available slots? Set a notification event
 573                  * on a netmap slot that will be cleaned in the future.
 574                  * No doublecheck is performed, since txsync() will be
 575                  * called twice by netmap_poll().
 576                  */
 577                 generic_set_tx_event(kring, nm_i);
 578         }
 579         ND("tx #%d, hwtail = %d", n, kring->nr_hwtail);
 580
 581         generic_netmap_tx_clean(kring);
 582
 583         nm_txsync_finalize(kring);
 584
 585         return 0;
 586 }
 587
 588
 589 /*
 590  * This handler is registered (through netmap_catch_rx())
 591  * within the attached network interface
 592  * in the RX subsystem, so that every mbuf passed up by
 593  * the driver can be stolen to the network stack.
 594  * Stolen packets are put in a queue where the
 595  * generic_netmap_rxsync() callback can extract them.
 596  */
 597 void
 598 generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
 599 {
 600         struct netmap_adapter *na = NA(ifp);
 601         struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
 602         u_int work_done;
 603         u_int rr = 0; // receive ring number
 604
 605         /* limit the size of the queue */
 606         if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
 607                 m_freem(m);
 608         } else {
 609                 mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
 610         }
 611
 612         if (netmap_generic_mit < 32768) {
 613                 /* no rx mitigation, pass notification up */
 614                 netmap_generic_irq(na->ifp, rr, &work_done);
 615                 IFRATE(rate_ctx.new.rxirq++);
 616         } else {
 617                 /* same as send combining, filter notification if there is a
 618                  * pending timer, otherwise pass it up and start a timer.
 619                  */
 620                 if (likely(netmap_mitigation_active(gna))) {
 621                         /* Record that there is some pending work. */
 622                         gna->mit_pending = 1;
 623                 } else {
 624                         netmap_generic_irq(na->ifp, rr, &work_done);
 625                         IFRATE(rate_ctx.new.rxirq++);
 626                         netmap_mitigation_start(gna);
 627                 }
 628         }
 629 }
 630
 631 /*
 632  * generic_netmap_rxsync() extracts mbufs from the queue filled by
 633  * generic_netmap_rx_handler() and puts their content in the netmap
 634  * receive ring.
 635  * Access must be protected because the rx handler is asynchronous,
 636  */
 637 static int
 638 generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
 639 {
 640         struct netmap_kring *kring = &na->rx_rings[ring_nr];
 641         struct netmap_ring *ring = kring->ring;
 642         u_int nm_i;     /* index into the netmap ring */ //j,
 643         u_int n;
 644         u_int const lim = kring->nkr_num_slots - 1;
 645         u_int const head = nm_rxsync_prologue(kring);
 646         int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
 647
 648         if (head > lim)
 649                 return netmap_ring_reinit(kring);
 650
 651         /*
 652          * First part: import newly received packets.
 653          */
 654         if (netmap_no_pendintr || force_update) {
 655                 /* extract buffers from the rx queue, stop at most one
 656                  * slot before nr_hwcur (stop_i)
 657                  */
 658                 uint16_t slot_flags = kring->nkr_slot_flags;
 659                 u_int stop_i = nm_prev(kring->nr_hwcur, lim);
 660
 661                 nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */
 662                 for (n = 0; nm_i != stop_i; n++) {
 663                         int len;
 664                         void *addr = NMB(&ring->slot[nm_i]);
 665                         struct mbuf *m;
 666
 667                         /* we only check the address here on generic rx rings */
 668                         if (addr == netmap_buffer_base) { /* Bad buffer */
 669                                 return netmap_ring_reinit(kring);
 670                         }
 671                         /*
 672                          * Call the locked version of the function.
 673                          * XXX Ideally we could grab a batch of mbufs at once
 674                          * and save some locking overhead.
 675                          */
 676                         m = mbq_safe_dequeue(&kring->rx_queue);
 677                         if (!m) /* no more data */
 678                                 break;
 679                         len = MBUF_LEN(m);
 680                         m_copydata(m, 0, len, addr);
 681                         ring->slot[nm_i].len = len;
 682                         ring->slot[nm_i].flags = slot_flags;
 683                         m_freem(m);
 684                         nm_i = nm_next(nm_i, lim);
 685                         n++;
 686                 }
 687                 if (n) {
 688                         kring->nr_hwtail = nm_i;
 689                         IFRATE(rate_ctx.new.rxpkt += n);
 690                 }
 691                 kring->nr_kflags &= ~NKR_PENDINTR;
 692         }
 693
 694         // XXX should we invert the order ?
 695         /*
 696          * Second part: skip past packets that userspace has released.
 697          */
 698         nm_i = kring->nr_hwcur;
 699         if (nm_i != head) {
 700                 /* Userspace has released some packets. */
 701                 for (n = 0; nm_i != head; n++) {
 702                         struct netmap_slot *slot = &ring->slot[nm_i];
 703
 704                         slot->flags &= ~NS_BUF_CHANGED;
 705                         nm_i = nm_next(nm_i, lim);
 706                 }
 707                 kring->nr_hwcur = head;
 708         }
 709         /* tell userspace that there might be new packets. */
 710         nm_rxsync_finalize(kring);
 711         IFRATE(rate_ctx.new.rxsync++);
 712
 713         return 0;
 714 }
 715
 716 static void
 717 generic_netmap_dtor(struct netmap_adapter *na)
 718 {
 719         struct ifnet *ifp = na->ifp;
 720         struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
 721         struct netmap_adapter *prev_na = gna->prev;
 722
 723         if (prev_na != NULL) {
 724                 D("Released generic NA %p", gna);
 725                 if_rele(na->ifp);
 726                 netmap_adapter_put(prev_na);
 727         }
 728         if (ifp != NULL) {
 729                 WNA(ifp) = prev_na;
 730                 D("Restored native NA %p", prev_na);
 731                 na->ifp = NULL;
 732         }
 733 }
 734
 735 /*
 736  * generic_netmap_attach() makes it possible to use netmap on
 737  * a device without native netmap support.
 738  * This is less performant than native support but potentially
 739  * faster than raw sockets or similar schemes.
 740  *
 741  * In this "emulated" mode, netmap rings do not necessarily
 742  * have the same size as those in the NIC. We use a default
 743  * value and possibly override it if the OS has ways to fetch the
 744  * actual configuration.
 745  */
 746 int
 747 generic_netmap_attach(struct ifnet *ifp)
 748 {
 749         struct netmap_adapter *na;
 750         struct netmap_generic_adapter *gna;
 751         int retval;
 752         u_int num_tx_desc, num_rx_desc;
 753
 754         num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
 755
 756         generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
 757         ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
 758
 759         gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
 760         if (gna == NULL) {
 761                 D("no memory on attach, give up");
 762                 return ENOMEM;
 763         }
 764         na = (struct netmap_adapter *)gna;
 765         na->ifp = ifp;
 766         na->num_tx_desc = num_tx_desc;
 767         na->num_rx_desc = num_rx_desc;
 768         na->nm_register = &generic_netmap_register;
 769         na->nm_txsync = &generic_netmap_txsync;
 770         na->nm_rxsync = &generic_netmap_rxsync;
 771         na->nm_dtor = &generic_netmap_dtor;
 772         /* when using generic, IFCAP_NETMAP is set so we force
 773          * NAF_SKIP_INTR to use the regular interrupt handler
 774          */
 775         na->na_flags = NAF_SKIP_INTR;
 776
 777         ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
 778                         ifp->num_tx_queues, ifp->real_num_tx_queues,
 779                         ifp->tx_queue_len);
 780         ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
 781                         ifp->num_rx_queues, ifp->real_num_rx_queues);
 782
 783         generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
 784
 785         retval = netmap_attach_common(na);
 786         if (retval) {
 787                 free(gna, M_DEVBUF);
 788         }
 789
 790         return retval;
 791 }