]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/sfxge/sfxge_rx.c
MFC: 280376
[FreeBSD/stable/10.git] / sys / dev / sfxge / sfxge_rx.c
1 /*-
2  * Copyright (c) 2010-2011 Solarflare Communications, Inc.
3  * All rights reserved.
4  *
5  * This software was developed in part by Philip Paeps under contract for
6  * Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/types.h>
34 #include <sys/mbuf.h>
35 #include <sys/smp.h>
36 #include <sys/socket.h>
37 #include <sys/sysctl.h>
38 #include <sys/limits.h>
39 #include <sys/syslog.h>
40
41 #include <net/ethernet.h>
42 #include <net/if.h>
43 #include <net/if_vlan_var.h>
44
45 #include <netinet/in.h>
46 #include <netinet/ip.h>
47 #include <netinet/ip6.h>
48 #include <netinet/tcp.h>
49
50 #include <machine/in_cksum.h>
51
52 #include "common/efx.h"
53
54
55 #include "sfxge.h"
56 #include "sfxge_rx.h"
57
58 #define RX_REFILL_THRESHOLD(_entries)   (EFX_RXQ_LIMIT(_entries) * 9 / 10)
59
60 SYSCTL_NODE(_hw_sfxge, OID_AUTO, lro, CTLFLAG_RD, NULL,
61             "Large receive offload (LRO) parameters");
62
63 #define SFXGE_LRO_PARAM(_param) SFXGE_PARAM(lro._param)
64
65 /* Size of the LRO hash table.  Must be a power of 2.  A larger table
66  * means we can accelerate a larger number of streams.
67  */
68 static unsigned lro_table_size = 128;
69 TUNABLE_INT(SFXGE_LRO_PARAM(table_size), &lro_table_size);
70 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, table_size, CTLFLAG_RDTUN,
71             &lro_table_size, 0,
72             "Size of the LRO hash table (must be a power of 2)");
73
74 /* Maximum length of a hash chain.  If chains get too long then the lookup
75  * time increases and may exceed the benefit of LRO.
76  */
77 static unsigned lro_chain_max = 20;
78 TUNABLE_INT(SFXGE_LRO_PARAM(chain_max), &lro_chain_max);
79 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, chain_max, CTLFLAG_RDTUN,
80             &lro_chain_max, 0,
81             "The maximum length of a hash chain");
82
83 /* Maximum time (in ticks) that a connection can be idle before it's LRO
84  * state is discarded.
85  */
86 static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */
87 TUNABLE_INT(SFXGE_LRO_PARAM(idle_ticks), &lro_idle_ticks);
88 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, idle_ticks, CTLFLAG_RDTUN,
89             &lro_idle_ticks, 0,
90             "The maximum time (in ticks) that a connection can be idle "
91             "before it's LRO state is discarded");
92
93 /* Number of packets with payload that must arrive in-order before a
94  * connection is eligible for LRO.  The idea is we should avoid coalescing
95  * segments when the sender is in slow-start because reducing the ACK rate
96  * can damage performance.
97  */
98 static int lro_slow_start_packets = 2000;
99 TUNABLE_INT(SFXGE_LRO_PARAM(slow_start_packets), &lro_slow_start_packets);
100 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, slow_start_packets, CTLFLAG_RDTUN,
101             &lro_slow_start_packets, 0,
102             "Number of packets with payload that must arrive in-order before "
103             "a connection is eligible for LRO");
104
105 /* Number of packets with payload that must arrive in-order following loss
106  * before a connection is eligible for LRO.  The idea is we should avoid
107  * coalescing segments when the sender is recovering from loss, because
108  * reducing the ACK rate can damage performance.
109  */
110 static int lro_loss_packets = 20;
111 TUNABLE_INT(SFXGE_LRO_PARAM(loss_packets), &lro_loss_packets);
112 SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, loss_packets, CTLFLAG_RDTUN,
113             &lro_loss_packets, 0,
114             "Number of packets with payload that must arrive in-order "
115             "following loss before a connection is eligible for LRO");
116
117 /* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */
118 #define SFXGE_LRO_L2_ID_VLAN 0x4000
119 #define SFXGE_LRO_L2_ID_IPV6 0x8000
120 #define SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN)
121 #define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6))
122
123 /* Compare IPv6 addresses, avoiding conditional branches */
124 static unsigned long ipv6_addr_cmp(const struct in6_addr *left,
125                                    const struct in6_addr *right)
126 {
127 #if LONG_BIT == 64
128         const uint64_t *left64 = (const uint64_t *)left;
129         const uint64_t *right64 = (const uint64_t *)right;
130         return (left64[0] - right64[0]) | (left64[1] - right64[1]);
131 #else
132         return (left->s6_addr32[0] - right->s6_addr32[0]) |
133                (left->s6_addr32[1] - right->s6_addr32[1]) |
134                (left->s6_addr32[2] - right->s6_addr32[2]) |
135                (left->s6_addr32[3] - right->s6_addr32[3]);
136 #endif
137 }
138
139 void
140 sfxge_rx_qflush_done(struct sfxge_rxq *rxq)
141 {
142
143         rxq->flush_state = SFXGE_FLUSH_DONE;
144 }
145
146 void
147 sfxge_rx_qflush_failed(struct sfxge_rxq *rxq)
148 {
149
150         rxq->flush_state = SFXGE_FLUSH_FAILED;
151 }
152
153 static uint8_t toep_key[] = {
154         0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
155         0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
156         0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
157         0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
158         0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
159 };
160
161 static void
162 sfxge_rx_post_refill(void *arg)
163 {
164         struct sfxge_rxq *rxq = arg;
165         struct sfxge_softc *sc;
166         unsigned int index;
167         struct sfxge_evq *evq;
168         uint16_t magic;
169
170         sc = rxq->sc;
171         index = rxq->index;
172         evq = sc->evq[index];
173
174         magic = SFXGE_MAGIC_RX_QREFILL | index;
175
176         /* This is guaranteed due to the start/stop order of rx and ev */
177         KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
178             ("evq not started"));
179         KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
180             ("rxq not started"));
181         efx_ev_qpost(evq->common, magic);
182 }
183
184 static void
185 sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying)
186 {
187         /* Initially retry after 100 ms, but back off in case of
188          * repeated failures as we probably have to wait for the
189          * administrator to raise the pool limit. */
190         if (retrying)
191                 rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz);
192         else
193                 rxq->refill_delay = hz / 10;
194
195         callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay,
196                              sfxge_rx_post_refill, rxq);
197 }
198
199 static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
200 {
201         struct mb_args args;
202         struct mbuf *m;
203
204         /* Allocate mbuf structure */
205         args.flags = M_PKTHDR;
206         args.type = MT_DATA;
207         m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT);
208
209         /* Allocate (and attach) packet buffer */
210         if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) {
211                 uma_zfree(zone_mbuf, m);
212                 m = NULL;
213         }
214
215         return (m);
216 }
217
218 #define SFXGE_REFILL_BATCH  64
219
220 static void
221 sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying)
222 {
223         struct sfxge_softc *sc;
224         unsigned int index;
225         struct sfxge_evq *evq;
226         unsigned int batch;
227         unsigned int rxfill;
228         unsigned int mblksize;
229         int ntodo;
230         efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
231
232         sc = rxq->sc;
233         index = rxq->index;
234         evq = sc->evq[index];
235
236         prefetch_read_many(sc->enp);
237         prefetch_read_many(rxq->common);
238
239         SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
240
241         if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
242                 return;
243
244         rxfill = rxq->added - rxq->completed;
245         KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries),
246             ("rxfill > EFX_RXQ_LIMIT(rxq->entries)"));
247         ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target);
248         KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries),
249             ("ntodo > EFX_RQX_LIMIT(rxq->entries)"));
250
251         if (ntodo == 0)
252                 return;
253
254         batch = 0;
255         mblksize = sc->rx_buffer_size;
256         while (ntodo-- > 0) {
257                 unsigned int id;
258                 struct sfxge_rx_sw_desc *rx_desc;
259                 bus_dma_segment_t seg;
260                 struct mbuf *m;
261
262                 id = (rxq->added + batch) & rxq->ptr_mask;
263                 rx_desc = &rxq->queue[id];
264                 KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL"));
265
266                 rx_desc->flags = EFX_DISCARD;
267                 m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc);
268                 if (m == NULL)
269                         break;
270                 sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg);
271                 addr[batch++] = seg.ds_addr;
272
273                 if (batch == SFXGE_REFILL_BATCH) {
274                         efx_rx_qpost(rxq->common, addr, mblksize, batch,
275                             rxq->completed, rxq->added);
276                         rxq->added += batch;
277                         batch = 0;
278                 }
279         }
280
281         if (ntodo != 0)
282                 sfxge_rx_schedule_refill(rxq, retrying);
283
284         if (batch != 0) {
285                 efx_rx_qpost(rxq->common, addr, mblksize, batch,
286                     rxq->completed, rxq->added);
287                 rxq->added += batch;
288         }
289
290         /* Make the descriptors visible to the hardware */
291         bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map,
292                         BUS_DMASYNC_PREWRITE);
293
294         efx_rx_qpush(rxq->common, rxq->added);
295 }
296
297 void
298 sfxge_rx_qrefill(struct sfxge_rxq *rxq)
299 {
300
301         if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
302                 return;
303
304         /* Make sure the queue is full */
305         sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE);
306 }
307
308 static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m)
309 {
310         struct ifnet *ifp = sc->ifnet;
311
312         m->m_pkthdr.rcvif = ifp;
313         m->m_pkthdr.csum_data = 0xffff;
314         ifp->if_input(ifp, m);
315 }
316
317 static void
318 sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc)
319 {
320         struct mbuf *m = rx_desc->mbuf;
321         int csum_flags;
322
323         /* Convert checksum flags */
324         csum_flags = (rx_desc->flags & EFX_CKSUM_IPV4) ?
325                 (CSUM_IP_CHECKED | CSUM_IP_VALID) : 0;
326         if (rx_desc->flags & EFX_CKSUM_TCPUDP)
327                 csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
328
329         /* The hash covers a 4-tuple for TCP only */
330         if (rx_desc->flags & EFX_PKT_TCP) {
331                 m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
332                                                        mtod(m, uint8_t *));
333                 m->m_flags |= M_FLOWID;
334         }
335         m->m_data += sc->rx_prefix_size;
336         m->m_len = rx_desc->size - sc->rx_prefix_size;
337         m->m_pkthdr.len = m->m_len;
338         m->m_pkthdr.csum_flags = csum_flags;
339         __sfxge_rx_deliver(sc, rx_desc->mbuf);
340
341         rx_desc->flags = EFX_DISCARD;
342         rx_desc->mbuf = NULL;
343 }
344
345 static void
346 sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c)
347 {
348         struct sfxge_softc *sc = st->sc;
349         struct mbuf *m = c->mbuf;
350         struct tcphdr *c_th;
351         int csum_flags;
352
353         KASSERT(m, ("no mbuf to deliver"));
354
355         ++st->n_bursts;
356
357         /* Finish off packet munging and recalculate IP header checksum. */
358         if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
359                 struct ip *iph = c->nh;
360                 iph->ip_len = htons(iph->ip_len);
361                 iph->ip_sum = 0;
362                 iph->ip_sum = in_cksum_hdr(iph);
363                 c_th = (struct tcphdr *)(iph + 1);
364                 csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
365                               CSUM_IP_CHECKED | CSUM_IP_VALID);
366         } else {
367                 struct ip6_hdr *iph = c->nh;
368                 iph->ip6_plen = htons(iph->ip6_plen);
369                 c_th = (struct tcphdr *)(iph + 1);
370                 csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
371         }
372
373         c_th->th_win = c->th_last->th_win;
374         c_th->th_ack = c->th_last->th_ack;
375         if (c_th->th_off == c->th_last->th_off) {
376                 /* Copy TCP options (take care to avoid going negative). */
377                 int optlen = ((c_th->th_off - 5) & 0xf) << 2u;
378                 memcpy(c_th + 1, c->th_last + 1, optlen);
379         }
380
381         m->m_pkthdr.flowid = c->conn_hash;
382         m->m_flags |= M_FLOWID;
383
384         m->m_pkthdr.csum_flags = csum_flags;
385         __sfxge_rx_deliver(sc, m);
386
387         c->mbuf = NULL;
388         c->delivered = 1;
389 }
390
391 /* Drop the given connection, and add it to the free list. */
392 static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
393 {
394         unsigned bucket;
395
396         KASSERT(!c->mbuf, ("found orphaned mbuf"));
397
398         if (c->next_buf.mbuf != NULL) {
399                 sfxge_rx_deliver(rxq->sc, &c->next_buf);
400                 LIST_REMOVE(c, active_link);
401         }
402
403         bucket = c->conn_hash & rxq->lro.conns_mask;
404         KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong"));
405         --rxq->lro.conns_n[bucket];
406         TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
407         TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link);
408 }
409
410 /* Stop tracking connections that have gone idle in order to keep hash
411  * chains short.
412  */
413 static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now)
414 {
415         struct sfxge_lro_conn *c;
416         unsigned i;
417
418         KASSERT(LIST_EMPTY(&rxq->lro.active_conns),
419                 ("found active connections"));
420
421         rxq->lro.last_purge_ticks = now;
422         for (i = 0; i <= rxq->lro.conns_mask; ++i) {
423                 if (TAILQ_EMPTY(&rxq->lro.conns[i]))
424                         continue;
425
426                 c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq);
427                 if (now - c->last_pkt_ticks > lro_idle_ticks) {
428                         ++rxq->lro.n_drop_idle;
429                         sfxge_lro_drop(rxq, c);
430                 }
431         }
432 }
433
434 static void
435 sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
436                 struct mbuf *mbuf, struct tcphdr *th)
437 {
438         struct tcphdr *c_th;
439
440         /* Tack the new mbuf onto the chain. */
441         KASSERT(!mbuf->m_next, ("mbuf already chained"));
442         c->mbuf_tail->m_next = mbuf;
443         c->mbuf_tail = mbuf;
444
445         /* Increase length appropriately */
446         c->mbuf->m_pkthdr.len += mbuf->m_len;
447
448         /* Update the connection state flags */
449         if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
450                 struct ip *iph = c->nh;
451                 iph->ip_len += mbuf->m_len;
452                 c_th = (struct tcphdr *)(iph + 1);
453         } else {
454                 struct ip6_hdr *iph = c->nh;
455                 iph->ip6_plen += mbuf->m_len;
456                 c_th = (struct tcphdr *)(iph + 1);
457         }
458         c_th->th_flags |= (th->th_flags & TH_PUSH);
459         c->th_last = th;
460         ++st->n_merges;
461
462         /* Pass packet up now if another segment could overflow the IP
463          * length.
464          */
465         if (c->mbuf->m_pkthdr.len > 65536 - 9200)
466                 sfxge_lro_deliver(st, c);
467 }
468
469 static void
470 sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
471                 struct mbuf *mbuf, void *nh, struct tcphdr *th)
472 {
473         /* Start the chain */
474         c->mbuf = mbuf;
475         c->mbuf_tail = c->mbuf;
476         c->nh = nh;
477         c->th_last = th;
478
479         mbuf->m_pkthdr.len = mbuf->m_len;
480
481         /* Mangle header fields for later processing */
482         if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
483                 struct ip *iph = nh;
484                 iph->ip_len = ntohs(iph->ip_len);
485         } else {
486                 struct ip6_hdr *iph = nh;
487                 iph->ip6_plen = ntohs(iph->ip6_plen);
488         }
489 }
490
491 /* Try to merge or otherwise hold or deliver (as appropriate) the
492  * packet buffered for this connection (c->next_buf).  Return a flag
493  * indicating whether the connection is still active for LRO purposes.
494  */
495 static int
496 sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
497 {
498         struct sfxge_rx_sw_desc *rx_buf = &c->next_buf;
499         char *eh = c->next_eh;
500         int data_length, hdr_length, dont_merge;
501         unsigned th_seq, pkt_length;
502         struct tcphdr *th;
503         unsigned now;
504
505         if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
506                 struct ip *iph = c->next_nh;
507                 th = (struct tcphdr *)(iph + 1);
508                 pkt_length = ntohs(iph->ip_len) + (char *) iph - eh;
509         } else {
510                 struct ip6_hdr *iph = c->next_nh;
511                 th = (struct tcphdr *)(iph + 1);
512                 pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh;
513         }
514
515         hdr_length = (char *) th + th->th_off * 4 - eh;
516         data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) -
517                        hdr_length);
518         th_seq = ntohl(th->th_seq);
519         dont_merge = ((data_length <= 0)
520                       | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN)));
521
522         /* Check for options other than aligned timestamp. */
523         if (th->th_off != 5) {
524                 const uint32_t *opt_ptr = (const uint32_t *) (th + 1);
525                 if (th->th_off == 8 &&
526                     opt_ptr[0] == ntohl((TCPOPT_NOP << 24) |
527                                         (TCPOPT_NOP << 16) |
528                                         (TCPOPT_TIMESTAMP << 8) |
529                                         TCPOLEN_TIMESTAMP)) {
530                         /* timestamp option -- okay */
531                 } else {
532                         dont_merge = 1;
533                 }
534         }
535
536         if (__predict_false(th_seq != c->next_seq)) {
537                 /* Out-of-order, so start counting again. */
538                 if (c->mbuf != NULL)
539                         sfxge_lro_deliver(&rxq->lro, c);
540                 c->n_in_order_pkts -= lro_loss_packets;
541                 c->next_seq = th_seq + data_length;
542                 ++rxq->lro.n_misorder;
543                 goto deliver_buf_out;
544         }
545         c->next_seq = th_seq + data_length;
546
547         now = ticks;
548         if (now - c->last_pkt_ticks > lro_idle_ticks) {
549                 ++rxq->lro.n_drop_idle;
550                 if (c->mbuf != NULL)
551                         sfxge_lro_deliver(&rxq->lro, c);
552                 sfxge_lro_drop(rxq, c);
553                 return (0);
554         }
555         c->last_pkt_ticks = ticks;
556
557         if (c->n_in_order_pkts < lro_slow_start_packets) {
558                 /* May be in slow-start, so don't merge. */
559                 ++rxq->lro.n_slow_start;
560                 ++c->n_in_order_pkts;
561                 goto deliver_buf_out;
562         }
563
564         if (__predict_false(dont_merge)) {
565                 if (c->mbuf != NULL)
566                         sfxge_lro_deliver(&rxq->lro, c);
567                 if (th->th_flags & (TH_FIN | TH_RST)) {
568                         ++rxq->lro.n_drop_closed;
569                         sfxge_lro_drop(rxq, c);
570                         return (0);
571                 }
572                 goto deliver_buf_out;
573         }
574
575         rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size;
576
577         if (__predict_true(c->mbuf != NULL)) {
578                 /* Remove headers and any padding */
579                 rx_buf->mbuf->m_data += hdr_length;
580                 rx_buf->mbuf->m_len = data_length;
581
582                 sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th);
583         } else {
584                 /* Remove any padding */
585                 rx_buf->mbuf->m_len = pkt_length;
586
587                 sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th);
588         }
589
590         rx_buf->mbuf = NULL;
591         return (1);
592
593  deliver_buf_out:
594         sfxge_rx_deliver(rxq->sc, rx_buf);
595         return (1);
596 }
597
598 static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash,
599                                uint16_t l2_id, void *nh, struct tcphdr *th)
600 {
601         unsigned bucket = conn_hash & st->conns_mask;
602         struct sfxge_lro_conn *c;
603
604         if (st->conns_n[bucket] >= lro_chain_max) {
605                 ++st->n_too_many;
606                 return;
607         }
608
609         if (!TAILQ_EMPTY(&st->free_conns)) {
610                 c = TAILQ_FIRST(&st->free_conns);
611                 TAILQ_REMOVE(&st->free_conns, c, link);
612         } else {
613                 c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT);
614                 if (c == NULL)
615                         return;
616                 c->mbuf = NULL;
617                 c->next_buf.mbuf = NULL;
618         }
619
620         /* Create the connection tracking data */
621         ++st->conns_n[bucket];
622         TAILQ_INSERT_HEAD(&st->conns[bucket], c, link);
623         c->l2_id = l2_id;
624         c->conn_hash = conn_hash;
625         c->source = th->th_sport;
626         c->dest = th->th_dport;
627         c->n_in_order_pkts = 0;
628         c->last_pkt_ticks = *(volatile int *)&ticks;
629         c->delivered = 0;
630         ++st->n_new_stream;
631         /* NB. We don't initialise c->next_seq, and it doesn't matter what
632          * value it has.  Most likely the next packet received for this
633          * connection will not match -- no harm done.
634          */
635 }
636
637 /* Process mbuf and decide whether to dispatch it to the stack now or
638  * later.
639  */
640 static void
641 sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
642 {
643         struct sfxge_softc *sc = rxq->sc;
644         struct mbuf *m = rx_buf->mbuf;
645         struct ether_header *eh;
646         struct sfxge_lro_conn *c;
647         uint16_t l2_id;
648         uint16_t l3_proto;
649         void *nh;
650         struct tcphdr *th;
651         uint32_t conn_hash;
652         unsigned bucket;
653
654         /* Get the hardware hash */
655         conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ,
656                                       mtod(m, uint8_t *));
657
658         eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size);
659         if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
660                 struct ether_vlan_header *veh = (struct ether_vlan_header *)eh;
661                 l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) |
662                         SFXGE_LRO_L2_ID_VLAN;
663                 l3_proto = veh->evl_proto;
664                 nh = veh + 1;
665         } else {
666                 l2_id = 0;
667                 l3_proto = eh->ether_type;
668                 nh = eh + 1;
669         }
670
671         /* Check whether this is a suitable packet (unfragmented
672          * TCP/IPv4 or TCP/IPv6).  If so, find the TCP header and
673          * length, and compute a hash if necessary.  If not, return.
674          */
675         if (l3_proto == htons(ETHERTYPE_IP)) {
676                 struct ip *iph = nh;
677                 if ((iph->ip_p - IPPROTO_TCP) |
678                     (iph->ip_hl - (sizeof(*iph) >> 2u)) |
679                     (iph->ip_off & htons(IP_MF | IP_OFFMASK)))
680                         goto deliver_now;
681                 th = (struct tcphdr *)(iph + 1);
682         } else if (l3_proto == htons(ETHERTYPE_IPV6)) {
683                 struct ip6_hdr *iph = nh;
684                 if (iph->ip6_nxt != IPPROTO_TCP)
685                         goto deliver_now;
686                 l2_id |= SFXGE_LRO_L2_ID_IPV6;
687                 th = (struct tcphdr *)(iph + 1);
688         } else {
689                 goto deliver_now;
690         }
691
692         bucket = conn_hash & rxq->lro.conns_mask;
693
694         TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) {
695                 if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash))
696                         continue;
697                 if ((c->source - th->th_sport) | (c->dest - th->th_dport))
698                         continue;
699                 if (c->mbuf != NULL) {
700                         if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
701                                 struct ip *c_iph, *iph = nh;
702                                 c_iph = c->nh;
703                                 if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) |
704                                     (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr))
705                                         continue;
706                         } else {
707                                 struct ip6_hdr *c_iph, *iph = nh;
708                                 c_iph = c->nh;
709                                 if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) |
710                                     ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst))
711                                         continue;
712                         }
713                 }
714
715                 /* Re-insert at head of list to reduce lookup time. */
716                 TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
717                 TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link);
718
719                 if (c->next_buf.mbuf != NULL) {
720                         if (!sfxge_lro_try_merge(rxq, c))
721                                 goto deliver_now;
722                 } else {
723                         LIST_INSERT_HEAD(&rxq->lro.active_conns, c,
724                             active_link);
725                 }
726                 c->next_buf = *rx_buf;
727                 c->next_eh = eh;
728                 c->next_nh = nh;
729
730                 rx_buf->mbuf = NULL;
731                 rx_buf->flags = EFX_DISCARD;
732                 return;
733         }
734
735         sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th);
736  deliver_now:
737         sfxge_rx_deliver(sc, rx_buf);
738 }
739
740 static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
741 {
742         struct sfxge_lro_state *st = &rxq->lro;
743         struct sfxge_lro_conn *c;
744         unsigned t;
745
746         while (!LIST_EMPTY(&st->active_conns)) {
747                 c = LIST_FIRST(&st->active_conns);
748                 if (!c->delivered && c->mbuf != NULL)
749                         sfxge_lro_deliver(st, c);
750                 if (sfxge_lro_try_merge(rxq, c)) {
751                         if (c->mbuf != NULL)
752                                 sfxge_lro_deliver(st, c);
753                         LIST_REMOVE(c, active_link);
754                 }
755                 c->delivered = 0;
756         }
757
758         t = *(volatile int *)&ticks;
759         if (__predict_false(t != st->last_purge_ticks))
760                 sfxge_lro_purge_idle(rxq, t);
761 }
762
763 void
764 sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop)
765 {
766         struct sfxge_softc *sc = rxq->sc;
767         int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO;
768         unsigned int index;
769         struct sfxge_evq *evq;
770         unsigned int completed;
771         unsigned int level;
772         struct mbuf *m;
773         struct sfxge_rx_sw_desc *prev = NULL;
774
775         index = rxq->index;
776         evq = sc->evq[index];
777
778         SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
779
780         completed = rxq->completed;
781         while (completed != rxq->pending) {
782                 unsigned int id;
783                 struct sfxge_rx_sw_desc *rx_desc;
784
785                 id = completed++ & rxq->ptr_mask;
786                 rx_desc = &rxq->queue[id];
787                 m = rx_desc->mbuf;
788
789                 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
790                         goto discard;
791
792                 if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
793                         goto discard;
794
795                 prefetch_read_many(mtod(m, caddr_t));
796
797                 /* Check for loopback packets */
798                 if (!(rx_desc->flags & EFX_PKT_IPV4) &&
799                     !(rx_desc->flags & EFX_PKT_IPV6)) {
800                         struct ether_header *etherhp;
801
802                         /*LINTED*/
803                         etherhp = mtod(m, struct ether_header *);
804
805                         if (etherhp->ether_type ==
806                             htons(SFXGE_ETHERTYPE_LOOPBACK)) {
807                                 EFSYS_PROBE(loopback);
808
809                                 rxq->loopback++;
810                                 goto discard;
811                         }
812                 }
813
814                 /* Pass packet up the stack or into LRO (pipelined) */
815                 if (prev != NULL) {
816                         if (lro_enabled)
817                                 sfxge_lro(rxq, prev);
818                         else
819                                 sfxge_rx_deliver(sc, prev);
820                 }
821                 prev = rx_desc;
822                 continue;
823
824 discard:
825                 /* Return the packet to the pool */
826                 m_free(m);
827                 rx_desc->mbuf = NULL;
828         }
829         rxq->completed = completed;
830
831         level = rxq->added - rxq->completed;
832
833         /* Pass last packet up the stack or into LRO */
834         if (prev != NULL) {
835                 if (lro_enabled)
836                         sfxge_lro(rxq, prev);
837                 else
838                         sfxge_rx_deliver(sc, prev);
839         }
840
841         /*
842          * If there are any pending flows and this is the end of the
843          * poll then they must be completed.
844          */
845         if (eop)
846                 sfxge_lro_end_of_burst(rxq);
847
848         /* Top up the queue if necessary */
849         if (level < rxq->refill_threshold)
850                 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE);
851 }
852
853 static void
854 sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index)
855 {
856         struct sfxge_rxq *rxq;
857         struct sfxge_evq *evq;
858         unsigned int count;
859
860         rxq = sc->rxq[index];
861         evq = sc->evq[index];
862
863         SFXGE_EVQ_LOCK(evq);
864
865         KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
866             ("rxq not started"));
867
868         rxq->init_state = SFXGE_RXQ_INITIALIZED;
869
870         callout_stop(&rxq->refill_callout);
871
872 again:
873         rxq->flush_state = SFXGE_FLUSH_PENDING;
874
875         /* Flush the receive queue */
876         efx_rx_qflush(rxq->common);
877
878         SFXGE_EVQ_UNLOCK(evq);
879
880         count = 0;
881         do {
882                 /* Spin for 100 ms */
883                 DELAY(100000);
884
885                 if (rxq->flush_state != SFXGE_FLUSH_PENDING)
886                         break;
887
888         } while (++count < 20);
889
890         SFXGE_EVQ_LOCK(evq);
891
892         if (rxq->flush_state == SFXGE_FLUSH_FAILED)
893                 goto again;
894
895         rxq->flush_state = SFXGE_FLUSH_DONE;
896
897         rxq->pending = rxq->added;
898         sfxge_rx_qcomplete(rxq, B_TRUE);
899
900         KASSERT(rxq->completed == rxq->pending,
901             ("rxq->completed != rxq->pending"));
902
903         rxq->added = 0;
904         rxq->pending = 0;
905         rxq->completed = 0;
906         rxq->loopback = 0;
907
908         /* Destroy the common code receive queue. */
909         efx_rx_qdestroy(rxq->common);
910
911         efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
912             EFX_RXQ_NBUFS(sc->rxq_entries));
913
914         SFXGE_EVQ_UNLOCK(evq);
915 }
916
917 static int
918 sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index)
919 {
920         struct sfxge_rxq *rxq;
921         efsys_mem_t *esmp;
922         struct sfxge_evq *evq;
923         int rc;
924
925         rxq = sc->rxq[index];
926         esmp = &rxq->mem;
927         evq = sc->evq[index];
928
929         KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
930             ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
931         KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
932             ("evq->init_state != SFXGE_EVQ_STARTED"));
933
934         /* Program the buffer table. */
935         if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp,
936             EFX_RXQ_NBUFS(sc->rxq_entries))) != 0)
937                 return (rc);
938
939         /* Create the common code receive queue. */
940         if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT,
941             esmp, sc->rxq_entries, rxq->buf_base_id, evq->common,
942             &rxq->common)) != 0)
943                 goto fail;
944
945         SFXGE_EVQ_LOCK(evq);
946
947         /* Enable the receive queue. */
948         efx_rx_qenable(rxq->common);
949
950         rxq->init_state = SFXGE_RXQ_STARTED;
951
952         /* Try to fill the queue from the pool. */
953         sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE);
954
955         SFXGE_EVQ_UNLOCK(evq);
956
957         return (0);
958
959 fail:
960         efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
961             EFX_RXQ_NBUFS(sc->rxq_entries));
962         return (rc);
963 }
964
965 void
966 sfxge_rx_stop(struct sfxge_softc *sc)
967 {
968         int index;
969
970         /* Stop the receive queue(s) */
971         index = sc->rxq_count;
972         while (--index >= 0)
973                 sfxge_rx_qstop(sc, index);
974
975         sc->rx_prefix_size = 0;
976         sc->rx_buffer_size = 0;
977
978         efx_rx_fini(sc->enp);
979 }
980
981 int
982 sfxge_rx_start(struct sfxge_softc *sc)
983 {
984         struct sfxge_intr *intr;
985         int index;
986         int rc;
987
988         intr = &sc->intr;
989
990         /* Initialize the common code receive module. */
991         if ((rc = efx_rx_init(sc->enp)) != 0)
992                 return (rc);
993
994         /* Calculate the receive packet buffer size. */
995         sc->rx_prefix_size = EFX_RX_PREFIX_SIZE;
996         sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) +
997                               sc->rx_prefix_size);
998
999         /* Select zone for packet buffers */
1000         if (sc->rx_buffer_size <= MCLBYTES)
1001                 sc->rx_buffer_zone = zone_clust;
1002         else if (sc->rx_buffer_size <= MJUMPAGESIZE)
1003                 sc->rx_buffer_zone = zone_jumbop;
1004         else if (sc->rx_buffer_size <= MJUM9BYTES)
1005                 sc->rx_buffer_zone = zone_jumbo9;
1006         else
1007                 sc->rx_buffer_zone = zone_jumbo16;
1008
1009         /*
1010          * Set up the scale table.  Enable all hash types and hash insertion.
1011          */
1012         for (index = 0; index < SFXGE_RX_SCALE_MAX; index++)
1013                 sc->rx_indir_table[index] = index % sc->rxq_count;
1014         if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table,
1015                                        SFXGE_RX_SCALE_MAX)) != 0)
1016                 goto fail;
1017         (void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ,
1018             (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) |
1019             (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE);
1020
1021         if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key,
1022             sizeof(toep_key))) != 0)
1023                 goto fail;
1024
1025         /* Start the receive queue(s). */
1026         for (index = 0; index < sc->rxq_count; index++) {
1027                 if ((rc = sfxge_rx_qstart(sc, index)) != 0)
1028                         goto fail2;
1029         }
1030
1031         return (0);
1032
1033 fail2:
1034         while (--index >= 0)
1035                 sfxge_rx_qstop(sc, index);
1036
1037 fail:
1038         efx_rx_fini(sc->enp);
1039
1040         return (rc);
1041 }
1042
1043 static void sfxge_lro_init(struct sfxge_rxq *rxq)
1044 {
1045         struct sfxge_lro_state *st = &rxq->lro;
1046         unsigned i;
1047
1048         st->conns_mask = lro_table_size - 1;
1049         KASSERT(!((st->conns_mask + 1) & st->conns_mask),
1050                 ("lro_table_size must be a power of 2"));
1051         st->sc = rxq->sc;
1052         st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]),
1053                            M_SFXGE, M_WAITOK);
1054         st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]),
1055                              M_SFXGE, M_WAITOK);
1056         for (i = 0; i <= st->conns_mask; ++i) {
1057                 TAILQ_INIT(&st->conns[i]);
1058                 st->conns_n[i] = 0;
1059         }
1060         LIST_INIT(&st->active_conns);
1061         TAILQ_INIT(&st->free_conns);
1062 }
1063
1064 static void sfxge_lro_fini(struct sfxge_rxq *rxq)
1065 {
1066         struct sfxge_lro_state *st = &rxq->lro;
1067         struct sfxge_lro_conn *c;
1068         unsigned i;
1069
1070         /* Return cleanly if sfxge_lro_init() has not been called. */
1071         if (st->conns == NULL)
1072                 return;
1073
1074         KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections"));
1075
1076         for (i = 0; i <= st->conns_mask; ++i) {
1077                 while (!TAILQ_EMPTY(&st->conns[i])) {
1078                         c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq);
1079                         sfxge_lro_drop(rxq, c);
1080                 }
1081         }
1082
1083         while (!TAILQ_EMPTY(&st->free_conns)) {
1084                 c = TAILQ_FIRST(&st->free_conns);
1085                 TAILQ_REMOVE(&st->free_conns, c, link);
1086                 KASSERT(!c->mbuf, ("found orphaned mbuf"));
1087                 free(c, M_SFXGE);
1088         }
1089
1090         free(st->conns_n, M_SFXGE);
1091         free(st->conns, M_SFXGE);
1092         st->conns = NULL;
1093 }
1094
1095 static void
1096 sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index)
1097 {
1098         struct sfxge_rxq *rxq;
1099
1100         rxq = sc->rxq[index];
1101
1102         KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
1103             ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
1104
1105         /* Free the context array and the flow table. */
1106         free(rxq->queue, M_SFXGE);
1107         sfxge_lro_fini(rxq);
1108
1109         /* Release DMA memory. */
1110         sfxge_dma_free(&rxq->mem);
1111
1112         sc->rxq[index] = NULL;
1113
1114         free(rxq, M_SFXGE);
1115 }
1116
1117 static int
1118 sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index)
1119 {
1120         struct sfxge_rxq *rxq;
1121         struct sfxge_evq *evq;
1122         efsys_mem_t *esmp;
1123         int rc;
1124
1125         KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count));
1126
1127         rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK);
1128         rxq->sc = sc;
1129         rxq->index = index;
1130         rxq->entries = sc->rxq_entries;
1131         rxq->ptr_mask = rxq->entries - 1;
1132         rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries);
1133
1134         sc->rxq[index] = rxq;
1135         esmp = &rxq->mem;
1136
1137         evq = sc->evq[index];
1138
1139         /* Allocate and zero DMA space. */
1140         if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0)
1141                 return (rc);
1142
1143         /* Allocate buffer table entries. */
1144         sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries),
1145                                  &rxq->buf_base_id);
1146
1147         /* Allocate the context array and the flow table. */
1148         rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries,
1149             M_SFXGE, M_WAITOK | M_ZERO);
1150         sfxge_lro_init(rxq);
1151
1152         callout_init(&rxq->refill_callout, B_TRUE);
1153
1154         rxq->init_state = SFXGE_RXQ_INITIALIZED;
1155
1156         return (0);
1157 }
1158
1159 static const struct {
1160         const char *name;
1161         size_t offset;
1162 } sfxge_rx_stats[] = {
1163 #define SFXGE_RX_STAT(name, member) \
1164         { #name, offsetof(struct sfxge_rxq, member) }
1165         SFXGE_RX_STAT(lro_merges, lro.n_merges),
1166         SFXGE_RX_STAT(lro_bursts, lro.n_bursts),
1167         SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start),
1168         SFXGE_RX_STAT(lro_misorder, lro.n_misorder),
1169         SFXGE_RX_STAT(lro_too_many, lro.n_too_many),
1170         SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream),
1171         SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle),
1172         SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed)
1173 };
1174
1175 static int
1176 sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS)
1177 {
1178         struct sfxge_softc *sc = arg1;
1179         unsigned int id = arg2;
1180         unsigned int sum, index;
1181
1182         /* Sum across all RX queues */
1183         sum = 0;
1184         for (index = 0; index < sc->rxq_count; index++)
1185                 sum += *(unsigned int *)((caddr_t)sc->rxq[index] +
1186                                          sfxge_rx_stats[id].offset);
1187
1188         return (SYSCTL_OUT(req, &sum, sizeof(sum)));
1189 }
1190
1191 static void
1192 sfxge_rx_stat_init(struct sfxge_softc *sc)
1193 {
1194         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1195         struct sysctl_oid_list *stat_list;
1196         unsigned int id;
1197
1198         stat_list = SYSCTL_CHILDREN(sc->stats_node);
1199
1200         for (id = 0; id < nitems(sfxge_rx_stats); id++) {
1201                 SYSCTL_ADD_PROC(
1202                         ctx, stat_list,
1203                         OID_AUTO, sfxge_rx_stats[id].name,
1204                         CTLTYPE_UINT|CTLFLAG_RD,
1205                         sc, id, sfxge_rx_stat_handler, "IU",
1206                         "");
1207         }
1208 }
1209
1210 void
1211 sfxge_rx_fini(struct sfxge_softc *sc)
1212 {
1213         int index;
1214
1215         index = sc->rxq_count;
1216         while (--index >= 0)
1217                 sfxge_rx_qfini(sc, index);
1218
1219         sc->rxq_count = 0;
1220 }
1221
1222 int
1223 sfxge_rx_init(struct sfxge_softc *sc)
1224 {
1225         struct sfxge_intr *intr;
1226         int index;
1227         int rc;
1228
1229         if (!ISP2(lro_table_size)) {
1230                 log(LOG_ERR, "%s=%u must be power of 2",
1231                     SFXGE_LRO_PARAM(table_size), lro_table_size);
1232                 rc = EINVAL;
1233                 goto fail_lro_table_size;
1234         }
1235
1236         if (lro_idle_ticks == 0)
1237                 lro_idle_ticks = hz / 10 + 1; /* 100 ms */
1238
1239         intr = &sc->intr;
1240
1241         sc->rxq_count = intr->n_alloc;
1242
1243         KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1244             ("intr->state != SFXGE_INTR_INITIALIZED"));
1245
1246         /* Initialize the receive queue(s) - one per interrupt. */
1247         for (index = 0; index < sc->rxq_count; index++) {
1248                 if ((rc = sfxge_rx_qinit(sc, index)) != 0)
1249                         goto fail;
1250         }
1251
1252         sfxge_rx_stat_init(sc);
1253
1254         return (0);
1255
1256 fail:
1257         /* Tear down the receive queue(s). */
1258         while (--index >= 0)
1259                 sfxge_rx_qfini(sc, index);
1260
1261         sc->rxq_count = 0;
1262
1263 fail_lro_table_size:
1264         return (rc);
1265 }