1 /**************************************************************************
3 Copyright (c) 2007-2009, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
51 #include <sys/sched.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 #include <sys/socket.h>
58 #include <net/ethernet.h>
60 #include <net/if_vlan_var.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/in.h>
64 #include <netinet/ip.h>
65 #include <netinet/tcp.h>
67 #include <dev/pci/pcireg.h>
68 #include <dev/pci/pcivar.h>
73 #include <cxgb_include.h>
77 int multiq_tx_enable = 1;
79 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
80 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
81 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
82 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
83 "size of per-queue mbuf ring");
85 static int cxgb_tx_coalesce_force = 0;
86 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
87 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
88 &cxgb_tx_coalesce_force, 0,
89 "coalesce small packets into a single work request regardless of ring state");
91 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1
92 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
93 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2
94 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5
95 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5
96 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2
97 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6
100 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
102 &cxgb_tx_coalesce_enable_start);
103 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
104 &cxgb_tx_coalesce_enable_start, 0,
105 "coalesce enable threshold");
106 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
107 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
108 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
109 &cxgb_tx_coalesce_enable_stop, 0,
110 "coalesce disable threshold");
111 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
112 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
113 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
114 &cxgb_tx_reclaim_threshold, 0,
115 "tx cleaning minimum threshold");
118 * XXX don't re-enable this until TOE stops assuming
121 static int recycle_enable = 0;
123 extern int cxgb_use_16k_clusters;
124 extern int nmbjumbop;
125 extern int nmbjumbo9;
126 extern int nmbjumbo16;
130 #define SGE_RX_SM_BUF_SIZE 1536
131 #define SGE_RX_DROP_THRES 16
132 #define SGE_RX_COPY_THRES 128
135 * Period of the Tx buffer reclaim timer. This timer does not need to run
136 * frequently as Tx buffers are usually reclaimed by new Tx packets.
138 #define TX_RECLAIM_PERIOD (hz >> 1)
141 * Values for sge_txq.flags
144 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
145 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
149 uint64_t flit[TX_DESC_FLITS];
159 struct rsp_desc { /* response queue descriptor */
160 struct rss_header rss_hdr;
163 uint8_t imm_data[47];
167 #define RX_SW_DESC_MAP_CREATED (1 << 0)
168 #define TX_SW_DESC_MAP_CREATED (1 << 1)
169 #define RX_SW_DESC_INUSE (1 << 3)
170 #define TX_SW_DESC_MAPPED (1 << 4)
172 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
173 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
174 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
175 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
177 struct tx_sw_desc { /* SW state per Tx descriptor */
183 struct rx_sw_desc { /* SW state per Rx descriptor */
196 struct refill_fl_cb_arg {
198 bus_dma_segment_t seg;
204 * Maps a number of flits to the number of Tx descriptors that can hold them.
207 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
209 * HW allows up to 4 descriptors to be combined into a WR.
211 static uint8_t flit_desc_map[] = {
213 #if SGE_NUM_GENBITS == 1
214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
215 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
216 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
217 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
218 #elif SGE_NUM_GENBITS == 2
219 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
220 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
221 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
222 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
224 # error "SGE_NUM_GENBITS must be 1 or 2"
228 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED)
229 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock)
230 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock)
231 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock)
232 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
233 #define TXQ_RING_NEEDS_ENQUEUE(qs) \
234 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
235 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
236 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \
237 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
238 #define TXQ_RING_DEQUEUE(qs) \
239 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
243 static void sge_timer_cb(void *arg);
244 static void sge_timer_reclaim(void *arg, int ncount);
245 static void sge_txq_reclaim_handler(void *arg, int ncount);
246 static void cxgb_start_locked(struct sge_qset *qs);
249 * XXX need to cope with bursty scheduling by looking at a wider
250 * window than we are now for determining the need for coalescing
253 static __inline uint64_t
254 check_pkt_coalesce(struct sge_qset *qs)
260 if (__predict_false(cxgb_tx_coalesce_force))
262 txq = &qs->txq[TXQ_ETH];
263 sc = qs->port->adapter;
264 fill = &sc->tunq_fill[qs->idx];
266 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
267 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
268 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
269 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
271 * if the hardware transmit queue is more than 1/8 full
272 * we mark it as coalescing - we drop back from coalescing
273 * when we go below 1/32 full and there are no packets enqueued,
274 * this provides us with some degree of hysteresis
276 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
277 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
279 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
282 return (sc->tunq_coalesce);
287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
290 #if _BYTE_ORDER == _LITTLE_ENDIAN
292 wr_hilo |= (((uint64_t)wr_lo)<<32);
295 wr_hilo |= (((uint64_t)wr_hi)<<32);
297 wrp->wrh_hilo = wr_hilo;
301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
310 struct coalesce_info {
316 coalesce_check(struct mbuf *m, void *arg)
318 struct coalesce_info *ci = arg;
319 int *count = &ci->count;
320 int *nbytes = &ci->nbytes;
322 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
323 (*count < 7) && (m->m_next == NULL))) {
332 cxgb_dequeue(struct sge_qset *qs)
334 struct mbuf *m, *m_head, *m_tail;
335 struct coalesce_info ci;
338 if (check_pkt_coalesce(qs) == 0)
339 return TXQ_RING_DEQUEUE(qs);
341 m_head = m_tail = NULL;
342 ci.count = ci.nbytes = 0;
344 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
345 if (m_head == NULL) {
347 } else if (m != NULL) {
348 m_tail->m_nextpkt = m;
353 panic("trying to coalesce %d packets in to one WR", ci.count);
358 * reclaim_completed_tx - reclaims completed Tx descriptors
359 * @adapter: the adapter
360 * @q: the Tx queue to reclaim completed descriptors from
362 * Reclaims Tx descriptors that the SGE has indicated it has processed,
363 * and frees the associated buffers if possible. Called with the Tx
367 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
369 struct sge_txq *q = &qs->txq[queue];
370 int reclaim = desc_reclaimable(q);
372 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
373 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
374 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
376 if (reclaim < reclaim_min)
379 mtx_assert(&qs->lock, MA_OWNED);
381 t3_free_tx_desc(qs, reclaim, queue);
382 q->cleaned += reclaim;
383 q->in_use -= reclaim;
385 if (isset(&qs->txq_stopped, TXQ_ETH))
386 clrbit(&qs->txq_stopped, TXQ_ETH);
392 * should_restart_tx - are there enough resources to restart a Tx queue?
395 * Checks if there are enough descriptors to restart a suspended Tx queue.
398 should_restart_tx(const struct sge_txq *q)
400 unsigned int r = q->processed - q->cleaned;
402 return q->in_use - r < (q->size >> 1);
406 * t3_sge_init - initialize SGE
408 * @p: the SGE parameters
410 * Performs SGE initialization needed every time after a chip reset.
411 * We do not initialize any of the queue sets here, instead the driver
412 * top-level must request those individually. We also do not enable DMA
413 * here, that should be done after the queues have been set up.
416 t3_sge_init(adapter_t *adap, struct sge_params *p)
420 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
422 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
423 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
424 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
425 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
426 #if SGE_NUM_GENBITS == 1
427 ctrl |= F_EGRGENCTRL;
429 if (adap->params.rev > 0) {
430 if (!(adap->flags & (USING_MSIX | USING_MSI)))
431 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
433 t3_write_reg(adap, A_SG_CONTROL, ctrl);
434 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
435 V_LORCQDRBTHRSH(512));
436 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
437 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
438 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
439 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
440 adap->params.rev < T3_REV_C ? 1000 : 500);
441 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
442 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
443 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
444 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
445 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
450 * sgl_len - calculates the size of an SGL of the given capacity
451 * @n: the number of SGL entries
453 * Calculates the number of flits needed for a scatter/gather list that
454 * can hold the given number of entries.
456 static __inline unsigned int
457 sgl_len(unsigned int n)
459 return ((3 * n) / 2 + (n & 1));
463 * get_imm_packet - return the next ingress packet buffer from a response
464 * @resp: the response descriptor containing the packet data
466 * Return a packet containing the immediate data of the given response.
469 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
472 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
473 m->m_ext.ext_buf = NULL;
474 m->m_ext.ext_type = 0;
475 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
479 static __inline u_int
480 flits_to_desc(u_int n)
482 return (flit_desc_map[n]);
485 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
486 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
487 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
488 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
490 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
491 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
495 * t3_sge_err_intr_handler - SGE async event interrupt handler
496 * @adapter: the adapter
498 * Interrupt handler for SGE asynchronous (non-data) events.
501 t3_sge_err_intr_handler(adapter_t *adapter)
503 unsigned int v, status;
505 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
506 if (status & SGE_PARERR)
507 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
508 status & SGE_PARERR);
509 if (status & SGE_FRAMINGERR)
510 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
511 status & SGE_FRAMINGERR);
512 if (status & F_RSPQCREDITOVERFOW)
513 CH_ALERT(adapter, "SGE response queue credit overflow\n");
515 if (status & F_RSPQDISABLED) {
516 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
519 "packet delivered to disabled response queue (0x%x)\n",
520 (v >> S_RSPQ0DISABLED) & 0xff);
523 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
524 if (status & SGE_FATALERR)
525 t3_fatal_err(adapter);
529 t3_sge_prep(adapter_t *adap, struct sge_params *p)
531 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
533 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
534 nqsets *= adap->params.nports;
536 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
538 while (!powerof2(fl_q_size))
541 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
544 #if __FreeBSD_version >= 700111
546 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
547 jumbo_buf_size = MJUM16BYTES;
549 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
550 jumbo_buf_size = MJUM9BYTES;
553 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
554 jumbo_buf_size = MJUMPAGESIZE;
556 while (!powerof2(jumbo_q_size))
559 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
560 device_printf(adap->dev,
561 "Insufficient clusters and/or jumbo buffers.\n");
563 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
565 for (i = 0; i < SGE_QSETS; ++i) {
566 struct qset_params *q = p->qset + i;
568 if (adap->params.nports > 2) {
569 q->coalesce_usecs = 50;
572 q->coalesce_usecs = 10;
574 q->coalesce_usecs = 5;
578 q->rspq_size = RSPQ_Q_SIZE;
579 q->fl_size = fl_q_size;
580 q->jumbo_size = jumbo_q_size;
581 q->jumbo_buf_size = jumbo_buf_size;
582 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
583 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
584 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
590 t3_sge_alloc(adapter_t *sc)
593 /* The parent tag. */
594 if (bus_dma_tag_create( NULL, /* parent */
595 1, 0, /* algnmnt, boundary */
596 BUS_SPACE_MAXADDR, /* lowaddr */
597 BUS_SPACE_MAXADDR, /* highaddr */
598 NULL, NULL, /* filter, filterarg */
599 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
600 BUS_SPACE_UNRESTRICTED, /* nsegments */
601 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
603 NULL, NULL, /* lock, lockarg */
605 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
610 * DMA tag for normal sized RX frames
612 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
613 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
614 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
615 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
620 * DMA tag for jumbo sized RX frames.
622 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
623 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
624 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
625 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
630 * DMA tag for TX frames.
632 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
633 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
634 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
635 NULL, NULL, &sc->tx_dmat)) {
636 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
644 t3_sge_free(struct adapter * sc)
647 if (sc->tx_dmat != NULL)
648 bus_dma_tag_destroy(sc->tx_dmat);
650 if (sc->rx_jumbo_dmat != NULL)
651 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
653 if (sc->rx_dmat != NULL)
654 bus_dma_tag_destroy(sc->rx_dmat);
656 if (sc->parent_dmat != NULL)
657 bus_dma_tag_destroy(sc->parent_dmat);
663 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
666 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
667 qs->rspq.polling = 0 /* p->polling */;
670 #if !defined(__i386__) && !defined(__amd64__)
672 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
674 struct refill_fl_cb_arg *cb_arg = arg;
676 cb_arg->error = error;
677 cb_arg->seg = segs[0];
683 * refill_fl - refill an SGE free-buffer list
684 * @sc: the controller softc
685 * @q: the free-list to refill
686 * @n: the number of new buffers to allocate
688 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
689 * The caller must assure that @n does not exceed the queue's capacity.
692 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
694 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
695 struct rx_desc *d = &q->desc[q->pidx];
696 struct refill_fl_cb_arg cb_arg;
704 * We only allocate a cluster, mbuf allocation happens after rx
706 if (q->zone == zone_pack) {
707 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
709 cl = m->m_ext.ext_buf;
711 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
713 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
714 uma_zfree(q->zone, cl);
718 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
719 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
720 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
721 uma_zfree(q->zone, cl);
724 sd->flags |= RX_SW_DESC_MAP_CREATED;
726 #if !defined(__i386__) && !defined(__amd64__)
727 err = bus_dmamap_load(q->entry_tag, sd->map,
728 cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
730 if (err != 0 || cb_arg.error) {
731 if (q->zone == zone_pack)
732 uma_zfree(q->zone, cl);
737 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
739 sd->flags |= RX_SW_DESC_INUSE;
742 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
743 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
744 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
745 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
750 if (++q->pidx == q->size) {
761 if (q->db_pending >= 32) {
763 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
769 * free_rx_bufs - free the Rx buffers on an SGE free list
770 * @sc: the controle softc
771 * @q: the SGE free list to clean up
773 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
774 * this queue should be stopped before calling this function.
777 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
779 u_int cidx = q->cidx;
781 while (q->credits--) {
782 struct rx_sw_desc *d = &q->sdesc[cidx];
784 if (d->flags & RX_SW_DESC_INUSE) {
785 bus_dmamap_unload(q->entry_tag, d->map);
786 bus_dmamap_destroy(q->entry_tag, d->map);
787 if (q->zone == zone_pack) {
788 m_init(d->m, zone_pack, MCLBYTES,
789 M_NOWAIT, MT_DATA, M_EXT);
790 uma_zfree(zone_pack, d->m);
792 m_init(d->m, zone_mbuf, MLEN,
793 M_NOWAIT, MT_DATA, 0);
794 uma_zfree(zone_mbuf, d->m);
795 uma_zfree(q->zone, d->rxsd_cl);
801 if (++cidx == q->size)
807 __refill_fl(adapter_t *adap, struct sge_fl *fl)
809 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
813 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
815 uint32_t reclaimable = fl->size - fl->credits;
818 refill_fl(adap, fl, min(max, reclaimable));
822 * recycle_rx_buf - recycle a receive buffer
823 * @adapter: the adapter
824 * @q: the SGE free list
825 * @idx: index of buffer to recycle
827 * Recycles the specified buffer on the given free list by adding it at
828 * the next available slot on the list.
831 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
833 struct rx_desc *from = &q->desc[idx];
834 struct rx_desc *to = &q->desc[q->pidx];
836 q->sdesc[q->pidx] = q->sdesc[idx];
837 to->addr_lo = from->addr_lo; // already big endian
838 to->addr_hi = from->addr_hi; // likewise
839 wmb(); /* necessary ? */
840 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
841 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
844 if (++q->pidx == q->size) {
848 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
852 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
857 *addr = segs[0].ds_addr;
861 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
862 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
863 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
865 size_t len = nelem * elem_size;
870 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
871 BUS_SPACE_MAXADDR_32BIT,
872 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
873 len, 0, NULL, NULL, tag)) != 0) {
874 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
878 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
880 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
884 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
889 len = nelem * sw_size;
890 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
893 if (parent_entry_tag == NULL)
896 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
897 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
898 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
899 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
900 NULL, NULL, entry_tag)) != 0) {
901 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
908 sge_slow_intr_handler(void *arg, int ncount)
912 t3_slow_intr_handler(sc);
916 * sge_timer_cb - perform periodic maintenance of an SGE qset
917 * @data: the SGE queue set to maintain
919 * Runs periodically from a timer to perform maintenance of an SGE queue
920 * set. It performs two tasks:
922 * a) Cleans up any completed Tx descriptors that may still be pending.
923 * Normal descriptor cleanup happens when new packets are added to a Tx
924 * queue so this timer is relatively infrequent and does any cleanup only
925 * if the Tx queue has not seen any new packets in a while. We make a
926 * best effort attempt to reclaim descriptors, in that we don't wait
927 * around if we cannot get a queue's lock (which most likely is because
928 * someone else is queueing new packets and so will also handle the clean
929 * up). Since control queues use immediate data exclusively we don't
930 * bother cleaning them up here.
932 * b) Replenishes Rx queues that have run out due to memory shortage.
933 * Normally new Rx buffers are added when existing ones are consumed but
934 * when out of memory a queue can become empty. We try to add only a few
935 * buffers here, the queue will be replenished fully as these new buffers
936 * are used up if memory shortage has subsided.
938 * c) Return coalesced response queue credits in case a response queue is
941 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
942 * fifo overflows and the FW doesn't implement any recovery scheme yet.
945 sge_timer_cb(void *arg)
948 if ((sc->flags & USING_MSIX) == 0) {
950 struct port_info *pi;
954 int reclaim_ofl, refill_rx;
956 if (sc->open_device_map == 0)
959 for (i = 0; i < sc->params.nports; i++) {
961 for (j = 0; j < pi->nqsets; j++) {
962 qs = &sc->sge.qs[pi->first_qset + j];
964 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
965 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
966 (qs->fl[1].credits < qs->fl[1].size));
967 if (reclaim_ofl || refill_rx) {
968 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
975 if (sc->params.nports > 2) {
978 for_each_port(sc, i) {
979 struct port_info *pi = &sc->port[i];
981 t3_write_reg(sc, A_SG_KDOORBELL,
983 (FW_TUNNEL_SGEEC_START + pi->first_qset));
986 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
987 sc->open_device_map != 0)
988 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
992 * This is meant to be a catch-all function to keep sge state private
997 t3_sge_init_adapter(adapter_t *sc)
999 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
1000 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1001 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
1006 t3_sge_reset_adapter(adapter_t *sc)
1008 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1013 t3_sge_init_port(struct port_info *pi)
1015 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
1020 * refill_rspq - replenish an SGE response queue
1021 * @adapter: the adapter
1022 * @q: the response queue to replenish
1023 * @credits: how many new responses to make available
1025 * Replenishes a response queue by making the supplied number of responses
1028 static __inline void
1029 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1032 /* mbufs are allocated on demand when a rspq entry is processed. */
1033 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1034 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1038 sge_txq_reclaim_handler(void *arg, int ncount)
1040 struct sge_qset *qs = arg;
1043 for (i = 0; i < 3; i++)
1044 reclaim_completed_tx(qs, 16, i);
1048 sge_timer_reclaim(void *arg, int ncount)
1050 struct port_info *pi = arg;
1051 int i, nqsets = pi->nqsets;
1052 adapter_t *sc = pi->adapter;
1053 struct sge_qset *qs;
1056 KASSERT((sc->flags & USING_MSIX) == 0,
1057 ("can't call timer reclaim for msi-x"));
1059 for (i = 0; i < nqsets; i++) {
1060 qs = &sc->sge.qs[pi->first_qset + i];
1062 reclaim_completed_tx(qs, 16, TXQ_OFLD);
1063 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1064 &sc->sge.qs[0].rspq.lock;
1066 if (mtx_trylock(lock)) {
1067 /* XXX currently assume that we are *NOT* polling */
1068 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1070 if (qs->fl[0].credits < qs->fl[0].size - 16)
1071 __refill_fl(sc, &qs->fl[0]);
1072 if (qs->fl[1].credits < qs->fl[1].size - 16)
1073 __refill_fl(sc, &qs->fl[1]);
1075 if (status & (1 << qs->rspq.cntxt_id)) {
1076 if (qs->rspq.credits) {
1077 refill_rspq(sc, &qs->rspq, 1);
1079 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1080 1 << qs->rspq.cntxt_id);
1089 * init_qset_cntxt - initialize an SGE queue set context info
1090 * @qs: the queue set
1091 * @id: the queue set id
1093 * Initializes the TIDs and context ids for the queues of a queue set.
1096 init_qset_cntxt(struct sge_qset *qs, u_int id)
1099 qs->rspq.cntxt_id = id;
1100 qs->fl[0].cntxt_id = 2 * id;
1101 qs->fl[1].cntxt_id = 2 * id + 1;
1102 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1103 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1104 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1105 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1106 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1108 mbufq_init(&qs->txq[TXQ_ETH].sendq);
1109 mbufq_init(&qs->txq[TXQ_OFLD].sendq);
1110 mbufq_init(&qs->txq[TXQ_CTRL].sendq);
1115 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1117 txq->in_use += ndesc;
1119 * XXX we don't handle stopping of queue
1120 * presumably start handles this when we bump against the end
1122 txqs->gen = txq->gen;
1123 txq->unacked += ndesc;
1124 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1126 txqs->pidx = txq->pidx;
1129 if (((txqs->pidx > txq->cidx) &&
1130 (txq->pidx < txqs->pidx) &&
1131 (txq->pidx >= txq->cidx)) ||
1132 ((txqs->pidx < txq->cidx) &&
1133 (txq->pidx >= txq-> cidx)) ||
1134 ((txqs->pidx < txq->cidx) &&
1135 (txq->cidx < txqs->pidx)))
1136 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1137 txqs->pidx, txq->pidx, txq->cidx);
1139 if (txq->pidx >= txq->size) {
1140 txq->pidx -= txq->size;
1147 * calc_tx_descs - calculate the number of Tx descriptors for a packet
1148 * @m: the packet mbufs
1149 * @nsegs: the number of segments
1151 * Returns the number of Tx descriptors needed for the given Ethernet
1152 * packet. Ethernet packets require addition of WR and CPL headers.
1154 static __inline unsigned int
1155 calc_tx_descs(const struct mbuf *m, int nsegs)
1159 if (m->m_pkthdr.len <= PIO_LEN)
1162 flits = sgl_len(nsegs) + 2;
1163 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1166 return flits_to_desc(flits);
1170 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1171 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1174 int err, pktlen, pass = 0;
1175 bus_dma_tag_t tag = txq->entry_tag;
1180 pktlen = m0->m_pkthdr.len;
1181 #if defined(__i386__) || defined(__amd64__)
1182 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
1186 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
1191 if (err == EFBIG && pass == 0) {
1193 /* Too many segments, try to defrag */
1194 m0 = m_defrag(m0, M_DONTWAIT);
1202 } else if (err == ENOMEM) {
1206 printf("map failure err=%d pktlen=%d\n", err, pktlen);
1212 #if !defined(__i386__) && !defined(__amd64__)
1213 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
1215 txsd->flags |= TX_SW_DESC_MAPPED;
1221 * make_sgl - populate a scatter/gather list for a packet
1222 * @sgp: the SGL to populate
1223 * @segs: the packet dma segments
1224 * @nsegs: the number of segments
1226 * Generates a scatter/gather list for the buffers that make up a packet
1227 * and returns the SGL size in 8-byte words. The caller must size the SGL
1230 static __inline void
1231 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1235 for (idx = 0, i = 0; i < nsegs; i++) {
1237 * firmware doesn't like empty segments
1239 if (segs[i].ds_len == 0)
1244 sgp->len[idx] = htobe32(segs[i].ds_len);
1245 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1256 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1257 * @adap: the adapter
1260 * Ring the doorbell if a Tx queue is asleep. There is a natural race,
1261 * where the HW is going to sleep just after we checked, however,
1262 * then the interrupt handler will detect the outstanding TX packet
1263 * and ring the doorbell for us.
1265 * When GTS is disabled we unconditionally ring the doorbell.
1267 static __inline void
1268 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
1271 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1272 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1273 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1275 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1278 t3_write_reg(adap, A_SG_KDOORBELL,
1279 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1282 if (mustring || ++q->db_pending >= 32) {
1283 wmb(); /* write descriptors before telling HW */
1284 t3_write_reg(adap, A_SG_KDOORBELL,
1285 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1291 static __inline void
1292 wr_gen2(struct tx_desc *d, unsigned int gen)
1294 #if SGE_NUM_GENBITS == 2
1295 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1300 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1301 * @ndesc: number of Tx descriptors spanned by the SGL
1302 * @txd: first Tx descriptor to be written
1303 * @txqs: txq state (generation and producer index)
1304 * @txq: the SGE Tx queue
1306 * @flits: number of flits to the start of the SGL in the first descriptor
1307 * @sgl_flits: the SGL size in flits
1308 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1309 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1311 * Write a work request header and an associated SGL. If the SGL is
1312 * small enough to fit into one Tx descriptor it has already been written
1313 * and we just need to write the WR header. Otherwise we distribute the
1314 * SGL across the number of descriptors it spans.
1317 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1318 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1319 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1322 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1323 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1325 if (__predict_true(ndesc == 1)) {
1326 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1327 V_WR_SGLSFLT(flits)) | wr_hi,
1328 htonl(V_WR_LEN(flits + sgl_flits) |
1329 V_WR_GEN(txqs->gen)) | wr_lo);
1331 wr_gen2(txd, txqs->gen);
1334 unsigned int ogen = txqs->gen;
1335 const uint64_t *fp = (const uint64_t *)sgl;
1336 struct work_request_hdr *wp = wrp;
1338 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1339 V_WR_SGLSFLT(flits)) | wr_hi;
1342 unsigned int avail = WR_FLITS - flits;
1344 if (avail > sgl_flits)
1346 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1355 if (++txqs->pidx == txq->size) {
1363 * when the head of the mbuf chain
1364 * is freed all clusters will be freed
1367 wrp = (struct work_request_hdr *)txd;
1368 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1369 V_WR_SGLSFLT(1)) | wr_hi;
1370 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1372 V_WR_GEN(txqs->gen)) | wr_lo;
1373 wr_gen2(txd, txqs->gen);
1376 wrp->wrh_hi |= htonl(F_WR_EOP);
1378 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1379 wr_gen2((struct tx_desc *)wp, ogen);
1383 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1384 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1386 #define GET_VTAG(cntrl, m) \
1388 if ((m)->m_flags & M_VLANTAG) \
1389 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1393 t3_encap(struct sge_qset *qs, struct mbuf **m)
1397 struct sge_txq *txq;
1398 struct txq_state txqs;
1399 struct port_info *pi;
1400 unsigned int ndesc, flits, cntrl, mlen;
1401 int err, nsegs, tso_info = 0;
1403 struct work_request_hdr *wrp;
1404 struct tx_sw_desc *txsd;
1405 struct sg_ent *sgp, *sgl;
1406 uint32_t wr_hi, wr_lo, sgl_flits;
1407 bus_dma_segment_t segs[TX_MAX_SEGS];
1409 struct tx_desc *txd;
1413 txq = &qs->txq[TXQ_ETH];
1414 txd = &txq->desc[txq->pidx];
1415 txsd = &txq->sdesc[txq->pidx];
1421 mtx_assert(&qs->lock, MA_OWNED);
1422 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1423 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1425 if (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1426 m0->m_pkthdr.csum_flags & (CSUM_TSO))
1427 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1429 if (m0->m_nextpkt != NULL) {
1430 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1434 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1435 &m0, segs, &nsegs))) {
1437 printf("failed ... err=%d\n", err);
1440 mlen = m0->m_pkthdr.len;
1441 ndesc = calc_tx_descs(m0, nsegs);
1443 txq_prod(txq, ndesc, &txqs);
1445 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1448 if (m0->m_nextpkt != NULL) {
1449 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1453 panic("trying to coalesce %d packets in to one WR", nsegs);
1454 txq->txq_coalesced += nsegs;
1455 wrp = (struct work_request_hdr *)txd;
1456 flits = nsegs*2 + 1;
1458 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1459 struct cpl_tx_pkt_batch_entry *cbe;
1461 uint32_t *hflit = (uint32_t *)&flit;
1462 int cflags = m0->m_pkthdr.csum_flags;
1464 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1465 GET_VTAG(cntrl, m0);
1466 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1467 if (__predict_false(!(cflags & CSUM_IP)))
1468 cntrl |= F_TXPKT_IPCSUM_DIS;
1469 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
1470 cntrl |= F_TXPKT_L4CSUM_DIS;
1472 hflit[0] = htonl(cntrl);
1473 hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1474 flit |= htobe64(1 << 24);
1475 cbe = &cpl_batch->pkt_entry[i];
1476 cbe->cntrl = hflit[0];
1477 cbe->len = hflit[1];
1478 cbe->addr = htobe64(segs[i].ds_addr);
1481 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1482 V_WR_SGLSFLT(flits)) |
1483 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1484 wr_lo = htonl(V_WR_LEN(flits) |
1485 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1486 set_wr_hdr(wrp, wr_hi, wr_lo);
1488 ETHER_BPF_MTAP(pi->ifp, m0);
1489 wr_gen2(txd, txqs.gen);
1490 check_ring_tx_db(sc, txq, 0);
1492 } else if (tso_info) {
1494 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1495 struct ether_header *eh;
1500 GET_VTAG(cntrl, m0);
1501 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1502 hdr->cntrl = htonl(cntrl);
1503 hdr->len = htonl(mlen | 0x80000000);
1505 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1506 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1507 m0, mlen, m0->m_pkthdr.tso_segsz,
1508 m0->m_pkthdr.csum_flags, m0->m_flags);
1509 panic("tx tso packet too small");
1512 /* Make sure that ether, ip, tcp headers are all in m0 */
1513 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1514 m0 = m_pullup(m0, TCPPKTHDRSIZE);
1515 if (__predict_false(m0 == NULL)) {
1516 /* XXX panic probably an overreaction */
1517 panic("couldn't fit header into mbuf");
1521 eh = mtod(m0, struct ether_header *);
1522 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1523 eth_type = CPL_ETH_II_VLAN;
1524 ip = (struct ip *)((struct ether_vlan_header *)eh + 1);
1526 eth_type = CPL_ETH_II;
1527 ip = (struct ip *)(eh + 1);
1529 tcp = (struct tcphdr *)(ip + 1);
1531 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1532 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1533 V_LSO_TCPHDR_WORDS(tcp->th_off);
1534 hdr->lso_info = htonl(tso_info);
1536 if (__predict_false(mlen <= PIO_LEN)) {
1538 * pkt not undersized but fits in PIO_LEN
1539 * Indicates a TSO bug at the higher levels.
1542 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1543 flits = (mlen + 7) / 8 + 3;
1544 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1545 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1546 F_WR_SOP | F_WR_EOP | txqs.compl);
1547 wr_lo = htonl(V_WR_LEN(flits) |
1548 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1549 set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1551 ETHER_BPF_MTAP(pi->ifp, m0);
1552 wr_gen2(txd, txqs.gen);
1553 check_ring_tx_db(sc, txq, 0);
1559 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1561 GET_VTAG(cntrl, m0);
1562 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1563 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1564 cntrl |= F_TXPKT_IPCSUM_DIS;
1565 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1566 cntrl |= F_TXPKT_L4CSUM_DIS;
1567 cpl->cntrl = htonl(cntrl);
1568 cpl->len = htonl(mlen | 0x80000000);
1570 if (mlen <= PIO_LEN) {
1572 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1573 flits = (mlen + 7) / 8 + 2;
1575 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1576 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1577 F_WR_SOP | F_WR_EOP | txqs.compl);
1578 wr_lo = htonl(V_WR_LEN(flits) |
1579 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1580 set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1582 ETHER_BPF_MTAP(pi->ifp, m0);
1583 wr_gen2(txd, txqs.gen);
1584 check_ring_tx_db(sc, txq, 0);
1590 wrp = (struct work_request_hdr *)txd;
1591 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1592 make_sgl(sgp, segs, nsegs);
1594 sgl_flits = sgl_len(nsegs);
1596 ETHER_BPF_MTAP(pi->ifp, m0);
1598 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1599 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1600 wr_lo = htonl(V_WR_TID(txq->token));
1601 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1602 sgl_flits, wr_hi, wr_lo);
1603 check_ring_tx_db(sc, txq, 0);
1609 cxgb_tx_watchdog(void *arg)
1611 struct sge_qset *qs = arg;
1612 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1614 if (qs->coalescing != 0 &&
1615 (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1618 else if (qs->coalescing == 0 &&
1619 (txq->in_use >= cxgb_tx_coalesce_enable_start))
1621 if (TXQ_TRYLOCK(qs)) {
1622 qs->qs_flags |= QS_FLUSHING;
1623 cxgb_start_locked(qs);
1624 qs->qs_flags &= ~QS_FLUSHING;
1627 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
1628 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1629 qs, txq->txq_watchdog.c_cpu);
1633 cxgb_tx_timeout(void *arg)
1635 struct sge_qset *qs = arg;
1636 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1638 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1640 if (TXQ_TRYLOCK(qs)) {
1641 qs->qs_flags |= QS_TIMEOUT;
1642 cxgb_start_locked(qs);
1643 qs->qs_flags &= ~QS_TIMEOUT;
1649 cxgb_start_locked(struct sge_qset *qs)
1651 struct mbuf *m_head = NULL;
1652 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1653 struct port_info *pi = qs->port;
1654 struct ifnet *ifp = pi->ifp;
1656 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1657 reclaim_completed_tx(qs, 0, TXQ_ETH);
1659 if (!pi->link_config.link_ok) {
1663 TXQ_LOCK_ASSERT(qs);
1664 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
1665 pi->link_config.link_ok) {
1666 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1668 if (txq->size - txq->in_use <= TX_MAX_DESC)
1671 if ((m_head = cxgb_dequeue(qs)) == NULL)
1674 * Encapsulation can modify our pointer, and or make it
1675 * NULL on failure. In that event, we can't requeue.
1677 if (t3_encap(qs, &m_head) || m_head == NULL)
1683 if (txq->db_pending)
1684 check_ring_tx_db(pi->adapter, txq, 1);
1686 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1687 pi->link_config.link_ok)
1688 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1689 qs, txq->txq_timer.c_cpu);
1695 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
1697 struct port_info *pi = qs->port;
1698 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1699 struct buf_ring *br = txq->txq_mr;
1702 avail = txq->size - txq->in_use;
1703 TXQ_LOCK_ASSERT(qs);
1706 * We can only do a direct transmit if the following are true:
1707 * - we aren't coalescing (ring < 3/4 full)
1708 * - the link is up -- checked in caller
1709 * - there are no packets enqueued already
1710 * - there is space in hardware transmit queue
1712 if (check_pkt_coalesce(qs) == 0 &&
1713 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
1714 if (t3_encap(qs, &m)) {
1716 (error = drbr_enqueue(ifp, br, m)) != 0)
1719 if (txq->db_pending)
1720 check_ring_tx_db(pi->adapter, txq, 1);
1723 * We've bypassed the buf ring so we need to update
1724 * the stats directly
1726 txq->txq_direct_packets++;
1727 txq->txq_direct_bytes += m->m_pkthdr.len;
1729 } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1732 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1733 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1734 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1735 cxgb_start_locked(qs);
1736 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1737 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1738 qs, txq->txq_timer.c_cpu);
1743 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
1745 struct sge_qset *qs;
1746 struct port_info *pi = ifp->if_softc;
1747 int error, qidx = pi->first_qset;
1749 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
1750 ||(!pi->link_config.link_ok)) {
1755 if (m->m_flags & M_FLOWID)
1756 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1758 qs = &pi->adapter->sge.qs[qidx];
1760 if (TXQ_TRYLOCK(qs)) {
1762 error = cxgb_transmit_locked(ifp, qs, m);
1765 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1769 cxgb_start(struct ifnet *ifp)
1771 struct port_info *pi = ifp->if_softc;
1772 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset];
1774 if (!pi->link_config.link_ok)
1778 cxgb_start_locked(qs);
1783 cxgb_qflush(struct ifnet *ifp)
1786 * flush any enqueued mbufs in the buf_rings
1787 * and in the transmit queues
1794 * write_imm - write a packet into a Tx descriptor as immediate data
1795 * @d: the Tx descriptor to write
1797 * @len: the length of packet data to write as immediate data
1798 * @gen: the generation bit value to write
1800 * Writes a packet as immediate data into a Tx descriptor. The packet
1801 * contains a work request at its beginning. We must write the packet
1802 * carefully so the SGE doesn't read accidentally before it's written in
1805 static __inline void
1806 write_imm(struct tx_desc *d, struct mbuf *m,
1807 unsigned int len, unsigned int gen)
1809 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1810 struct work_request_hdr *to = (struct work_request_hdr *)d;
1811 uint32_t wr_hi, wr_lo;
1814 panic("len too big %d\n", len);
1815 if (len < sizeof(*from))
1816 panic("len too small %d", len);
1818 memcpy(&to[1], &from[1], len - sizeof(*from));
1819 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1820 V_WR_BCNTLFLT(len & 7));
1821 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
1822 V_WR_LEN((len + 7) / 8));
1823 set_wr_hdr(to, wr_hi, wr_lo);
1828 * This check is a hack we should really fix the logic so
1829 * that this can't happen
1831 if (m->m_type != MT_DONTFREE)
1837 * check_desc_avail - check descriptor availability on a send queue
1838 * @adap: the adapter
1840 * @m: the packet needing the descriptors
1841 * @ndesc: the number of Tx descriptors needed
1842 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1844 * Checks if the requested number of Tx descriptors is available on an
1845 * SGE send queue. If the queue is already suspended or not enough
1846 * descriptors are available the packet is queued for later transmission.
1847 * Must be called with the Tx queue locked.
1849 * Returns 0 if enough descriptors are available, 1 if there aren't
1850 * enough descriptors and the packet has been queued, and 2 if the caller
1851 * needs to retry because there weren't enough descriptors at the
1852 * beginning of the call but some freed up in the mean time.
1855 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1856 struct mbuf *m, unsigned int ndesc,
1860 * XXX We currently only use this for checking the control queue
1861 * the control queue is only used for binding qsets which happens
1862 * at init time so we are guaranteed enough descriptors
1864 if (__predict_false(!mbufq_empty(&q->sendq))) {
1865 addq_exit: mbufq_tail(&q->sendq, m);
1868 if (__predict_false(q->size - q->in_use < ndesc)) {
1870 struct sge_qset *qs = txq_to_qset(q, qid);
1872 setbit(&qs->txq_stopped, qid);
1873 if (should_restart_tx(q) &&
1874 test_and_clear_bit(qid, &qs->txq_stopped))
1885 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1886 * @q: the SGE control Tx queue
1888 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1889 * that send only immediate data (presently just the control queues) and
1890 * thus do not have any mbufs
1892 static __inline void
1893 reclaim_completed_tx_imm(struct sge_txq *q)
1895 unsigned int reclaim = q->processed - q->cleaned;
1897 q->in_use -= reclaim;
1898 q->cleaned += reclaim;
1902 immediate(const struct mbuf *m)
1904 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1908 * ctrl_xmit - send a packet through an SGE control Tx queue
1909 * @adap: the adapter
1910 * @q: the control queue
1913 * Send a packet through an SGE control Tx queue. Packets sent through
1914 * a control queue must fit entirely as immediate data in a single Tx
1915 * descriptor and have no page fragments.
1918 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1921 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1922 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1924 if (__predict_false(!immediate(m))) {
1929 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1930 wrp->wrh_lo = htonl(V_WR_TID(q->token));
1933 again: reclaim_completed_tx_imm(q);
1935 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1936 if (__predict_false(ret)) {
1943 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1946 if (++q->pidx >= q->size) {
1952 t3_write_reg(adap, A_SG_KDOORBELL,
1953 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1959 * restart_ctrlq - restart a suspended control queue
1960 * @qs: the queue set cotaining the control queue
1962 * Resumes transmission on a suspended Tx control queue.
1965 restart_ctrlq(void *data, int npending)
1968 struct sge_qset *qs = (struct sge_qset *)data;
1969 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1970 adapter_t *adap = qs->port->adapter;
1973 again: reclaim_completed_tx_imm(q);
1975 while (q->in_use < q->size &&
1976 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1978 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1980 if (++q->pidx >= q->size) {
1986 if (!mbufq_empty(&q->sendq)) {
1987 setbit(&qs->txq_stopped, TXQ_CTRL);
1989 if (should_restart_tx(q) &&
1990 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1995 t3_write_reg(adap, A_SG_KDOORBELL,
1996 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2001 * Send a management message through control queue 0
2004 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
2006 return ctrl_xmit(adap, &adap->sge.qs[0], m);
2010 * free_qset - free the resources of an SGE queue set
2011 * @sc: the controller owning the queue set
2014 * Release the HW and SW resources associated with an SGE queue set, such
2015 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
2016 * queue set must be quiesced prior to calling this.
2019 t3_free_qset(adapter_t *sc, struct sge_qset *q)
2023 reclaim_completed_tx(q, 0, TXQ_ETH);
2024 if (q->txq[TXQ_ETH].txq_mr != NULL)
2025 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
2026 if (q->txq[TXQ_ETH].txq_ifq != NULL) {
2027 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
2028 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
2031 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2032 if (q->fl[i].desc) {
2033 mtx_lock_spin(&sc->sge.reg_lock);
2034 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2035 mtx_unlock_spin(&sc->sge.reg_lock);
2036 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2037 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2039 bus_dma_tag_destroy(q->fl[i].desc_tag);
2040 bus_dma_tag_destroy(q->fl[i].entry_tag);
2042 if (q->fl[i].sdesc) {
2043 free_rx_bufs(sc, &q->fl[i]);
2044 free(q->fl[i].sdesc, M_DEVBUF);
2048 mtx_unlock(&q->lock);
2049 MTX_DESTROY(&q->lock);
2050 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2051 if (q->txq[i].desc) {
2052 mtx_lock_spin(&sc->sge.reg_lock);
2053 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2054 mtx_unlock_spin(&sc->sge.reg_lock);
2055 bus_dmamap_unload(q->txq[i].desc_tag,
2056 q->txq[i].desc_map);
2057 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2058 q->txq[i].desc_map);
2059 bus_dma_tag_destroy(q->txq[i].desc_tag);
2060 bus_dma_tag_destroy(q->txq[i].entry_tag);
2062 if (q->txq[i].sdesc) {
2063 free(q->txq[i].sdesc, M_DEVBUF);
2068 mtx_lock_spin(&sc->sge.reg_lock);
2069 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2070 mtx_unlock_spin(&sc->sge.reg_lock);
2072 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2073 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2075 bus_dma_tag_destroy(q->rspq.desc_tag);
2076 MTX_DESTROY(&q->rspq.lock);
2080 tcp_lro_free(&q->lro.ctrl);
2083 bzero(q, sizeof(*q));
2087 * t3_free_sge_resources - free SGE resources
2088 * @sc: the adapter softc
2090 * Frees resources used by the SGE queue sets.
2093 t3_free_sge_resources(adapter_t *sc)
2097 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2098 nqsets += sc->port[i].nqsets;
2100 for (i = 0; i < nqsets; ++i) {
2101 TXQ_LOCK(&sc->sge.qs[i]);
2102 t3_free_qset(sc, &sc->sge.qs[i]);
2108 * t3_sge_start - enable SGE
2109 * @sc: the controller softc
2111 * Enables the SGE for DMAs. This is the last step in starting packet
2115 t3_sge_start(adapter_t *sc)
2117 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2121 * t3_sge_stop - disable SGE operation
2124 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2125 * from error interrupts) or from normal process context. In the latter
2126 * case it also disables any pending queue restart tasklets. Note that
2127 * if it is called in interrupt context it cannot disable the restart
2128 * tasklets as it cannot wait, however the tasklets will have no effect
2129 * since the doorbells are disabled and the driver will call this again
2130 * later from process context, at which time the tasklets will be stopped
2131 * if they are still running.
2134 t3_sge_stop(adapter_t *sc)
2138 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2143 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2144 nqsets += sc->port[i].nqsets;
2150 for (i = 0; i < nqsets; ++i) {
2151 struct sge_qset *qs = &sc->sge.qs[i];
2153 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2154 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2160 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
2161 * @adapter: the adapter
2162 * @q: the Tx queue to reclaim descriptors from
2163 * @reclaimable: the number of descriptors to reclaim
2164 * @m_vec_size: maximum number of buffers to reclaim
2165 * @desc_reclaimed: returns the number of descriptors reclaimed
2167 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2168 * Tx buffers. Called with the Tx queue lock held.
2170 * Returns number of buffers of reclaimed
2173 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2175 struct tx_sw_desc *txsd;
2176 unsigned int cidx, mask;
2177 struct sge_txq *q = &qs->txq[queue];
2180 T3_TRACE2(sc->tb[q->cntxt_id & 7],
2181 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2185 txsd = &q->sdesc[cidx];
2187 mtx_assert(&qs->lock, MA_OWNED);
2188 while (reclaimable--) {
2189 prefetch(q->sdesc[(cidx + 1) & mask].m);
2190 prefetch(q->sdesc[(cidx + 2) & mask].m);
2192 if (txsd->m != NULL) {
2193 if (txsd->flags & TX_SW_DESC_MAPPED) {
2194 bus_dmamap_unload(q->entry_tag, txsd->map);
2195 txsd->flags &= ~TX_SW_DESC_MAPPED;
2197 m_freem_list(txsd->m);
2203 if (++cidx == q->size) {
2213 * is_new_response - check if a response is newly written
2214 * @r: the response descriptor
2215 * @q: the response queue
2217 * Returns true if a response descriptor contains a yet unprocessed
2221 is_new_response(const struct rsp_desc *r,
2222 const struct sge_rspq *q)
2224 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2227 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2228 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2229 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2230 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2231 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2233 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2234 #define NOMEM_INTR_DELAY 2500
2237 * write_ofld_wr - write an offload work request
2238 * @adap: the adapter
2239 * @m: the packet to send
2241 * @pidx: index of the first Tx descriptor to write
2242 * @gen: the generation value to use
2243 * @ndesc: number of descriptors the packet will occupy
2245 * Write an offload work request to send the supplied packet. The packet
2246 * data already carry the work request with most fields populated.
2249 write_ofld_wr(adapter_t *adap, struct mbuf *m,
2250 struct sge_txq *q, unsigned int pidx,
2251 unsigned int gen, unsigned int ndesc,
2252 bus_dma_segment_t *segs, unsigned int nsegs)
2254 unsigned int sgl_flits, flits;
2255 struct work_request_hdr *from;
2256 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
2257 struct tx_desc *d = &q->desc[pidx];
2258 struct txq_state txqs;
2260 if (immediate(m) && nsegs == 0) {
2261 write_imm(d, m, m->m_len, gen);
2265 /* Only TX_DATA builds SGLs */
2266 from = mtod(m, struct work_request_hdr *);
2267 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
2269 flits = m->m_len / 8;
2270 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
2272 make_sgl(sgp, segs, nsegs);
2273 sgl_flits = sgl_len(nsegs);
2279 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
2280 from->wrh_hi, from->wrh_lo);
2284 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
2287 * Returns the number of Tx descriptors needed for the given offload
2288 * packet. These packets are already fully constructed.
2290 static __inline unsigned int
2291 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2293 unsigned int flits, cnt = 0;
2296 if (m->m_len <= WR_LEN && nsegs == 0)
2297 return (1); /* packet fits as immediate data */
2300 * This needs to be re-visited for TOE
2306 flits = m->m_len / 8;
2308 ndescs = flits_to_desc(flits + sgl_len(cnt));
2314 * ofld_xmit - send a packet through an offload queue
2315 * @adap: the adapter
2316 * @q: the Tx offload queue
2319 * Send an offload packet through an SGE offload queue.
2322 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2326 unsigned int pidx, gen;
2327 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2328 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2329 struct tx_sw_desc *stx;
2331 nsegs = m_get_sgllen(m);
2332 vsegs = m_get_sgl(m);
2333 ndesc = calc_tx_descs_ofld(m, nsegs);
2334 busdma_map_sgl(vsegs, segs, nsegs);
2336 stx = &q->sdesc[q->pidx];
2339 again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
2340 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2341 if (__predict_false(ret)) {
2343 printf("no ofld desc avail\n");
2345 m_set_priority(m, ndesc); /* save for restart */
2356 if (q->pidx >= q->size) {
2361 T3_TRACE5(adap->tb[q->cntxt_id & 7],
2362 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2363 ndesc, pidx, skb->len, skb->len - skb->data_len,
2364 skb_shinfo(skb)->nr_frags);
2368 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2369 check_ring_tx_db(adap, q, 1);
2374 * restart_offloadq - restart a suspended offload queue
2375 * @qs: the queue set cotaining the offload queue
2377 * Resumes transmission on a suspended Tx offload queue.
2380 restart_offloadq(void *data, int npending)
2383 struct sge_qset *qs = data;
2384 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2385 adapter_t *adap = qs->port->adapter;
2386 bus_dma_segment_t segs[TX_MAX_SEGS];
2387 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2391 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
2393 while ((m = mbufq_peek(&q->sendq)) != NULL) {
2394 unsigned int gen, pidx;
2395 unsigned int ndesc = m_get_priority(m);
2397 if (__predict_false(q->size - q->in_use < ndesc)) {
2398 setbit(&qs->txq_stopped, TXQ_OFLD);
2399 if (should_restart_tx(q) &&
2400 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2410 if (q->pidx >= q->size) {
2415 (void)mbufq_dequeue(&q->sendq);
2416 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2418 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2422 set_bit(TXQ_RUNNING, &q->flags);
2423 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2427 t3_write_reg(adap, A_SG_KDOORBELL,
2428 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2432 * queue_set - return the queue set a packet should use
2435 * Maps a packet to the SGE queue set it should use. The desired queue
2436 * set is carried in bits 1-3 in the packet's priority.
2439 queue_set(const struct mbuf *m)
2441 return m_get_priority(m) >> 1;
2445 * is_ctrl_pkt - return whether an offload packet is a control packet
2448 * Determines whether an offload packet should use an OFLD or a CTRL
2449 * Tx queue. This is indicated by bit 0 in the packet's priority.
2452 is_ctrl_pkt(const struct mbuf *m)
2454 return m_get_priority(m) & 1;
2458 * t3_offload_tx - send an offload packet
2459 * @tdev: the offload device to send to
2462 * Sends an offload packet. We use the packet priority to select the
2463 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2464 * should be sent as regular or control, bits 1-3 select the queue set.
2467 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2469 adapter_t *adap = tdev2adap(tdev);
2470 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2472 if (__predict_false(is_ctrl_pkt(m)))
2473 return ctrl_xmit(adap, qs, m);
2475 return ofld_xmit(adap, qs, m);
2479 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2480 * @tdev: the offload device that will be receiving the packets
2481 * @q: the SGE response queue that assembled the bundle
2482 * @m: the partial bundle
2483 * @n: the number of packets in the bundle
2485 * Delivers a (partial) bundle of Rx offload packets to an offload device.
2487 static __inline void
2488 deliver_partial_bundle(struct t3cdev *tdev,
2490 struct mbuf *mbufs[], int n)
2493 q->offload_bundles++;
2494 cxgb_ofld_recv(tdev, mbufs, n);
2499 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2500 struct mbuf *m, struct mbuf *rx_gather[],
2501 unsigned int gather_idx)
2505 m->m_pkthdr.header = mtod(m, void *);
2506 rx_gather[gather_idx++] = m;
2507 if (gather_idx == RX_BUNDLE_SIZE) {
2508 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2510 rq->offload_bundles++;
2512 return (gather_idx);
2516 restart_tx(struct sge_qset *qs)
2518 struct adapter *sc = qs->port->adapter;
2521 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2522 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2523 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2524 qs->txq[TXQ_OFLD].restarts++;
2525 DPRINTF("restarting TXQ_OFLD\n");
2526 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2528 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2529 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2530 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2531 qs->txq[TXQ_CTRL].in_use);
2533 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2534 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2535 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2536 qs->txq[TXQ_CTRL].restarts++;
2537 DPRINTF("restarting TXQ_CTRL\n");
2538 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2543 * t3_sge_alloc_qset - initialize an SGE queue set
2544 * @sc: the controller softc
2545 * @id: the queue set id
2546 * @nports: how many Ethernet ports will be using this queue set
2547 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2548 * @p: configuration parameters for this queue set
2549 * @ntxq: number of Tx queues for the queue set
2550 * @pi: port info for queue set
2552 * Allocate resources and initialize an SGE queue set. A queue set
2553 * comprises a response queue, two Rx free-buffer queues, and up to 3
2554 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2555 * queue, offload queue, and control queue.
2558 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2559 const struct qset_params *p, int ntxq, struct port_info *pi)
2561 struct sge_qset *q = &sc->sge.qs[id];
2564 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2567 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2568 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2569 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2572 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
2573 M_NOWAIT | M_ZERO)) == NULL) {
2574 device_printf(sc->dev, "failed to allocate ifq\n");
2577 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);
2578 callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
2579 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
2580 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
2581 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
2583 init_qset_cntxt(q, id);
2585 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2586 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2587 &q->fl[0].desc, &q->fl[0].sdesc,
2588 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2589 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2590 printf("error %d from alloc ring fl0\n", ret);
2594 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2595 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2596 &q->fl[1].desc, &q->fl[1].sdesc,
2597 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2598 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2599 printf("error %d from alloc ring fl1\n", ret);
2603 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2604 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2605 &q->rspq.desc_tag, &q->rspq.desc_map,
2606 NULL, NULL)) != 0) {
2607 printf("error %d from alloc ring rspq\n", ret);
2611 for (i = 0; i < ntxq; ++i) {
2612 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2614 if ((ret = alloc_ring(sc, p->txq_size[i],
2615 sizeof(struct tx_desc), sz,
2616 &q->txq[i].phys_addr, &q->txq[i].desc,
2617 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2618 &q->txq[i].desc_map,
2619 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2620 printf("error %d from alloc ring tx %i\n", ret, i);
2623 mbufq_init(&q->txq[i].sendq);
2625 q->txq[i].size = p->txq_size[i];
2628 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2629 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2630 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2631 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2633 q->fl[0].gen = q->fl[1].gen = 1;
2634 q->fl[0].size = p->fl_size;
2635 q->fl[1].size = p->jumbo_size;
2639 q->rspq.size = p->rspq_size;
2641 q->txq[TXQ_ETH].stop_thres = nports *
2642 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2644 q->fl[0].buf_size = MCLBYTES;
2645 q->fl[0].zone = zone_pack;
2646 q->fl[0].type = EXT_PACKET;
2648 if (p->jumbo_buf_size == MJUM16BYTES) {
2649 q->fl[1].zone = zone_jumbo16;
2650 q->fl[1].type = EXT_JUMBO16;
2651 } else if (p->jumbo_buf_size == MJUM9BYTES) {
2652 q->fl[1].zone = zone_jumbo9;
2653 q->fl[1].type = EXT_JUMBO9;
2654 } else if (p->jumbo_buf_size == MJUMPAGESIZE) {
2655 q->fl[1].zone = zone_jumbop;
2656 q->fl[1].type = EXT_JUMBOP;
2658 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
2662 q->fl[1].buf_size = p->jumbo_buf_size;
2664 /* Allocate and setup the lro_ctrl structure */
2665 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2667 ret = tcp_lro_init(&q->lro.ctrl);
2669 printf("error %d from tcp_lro_init\n", ret);
2673 q->lro.ctrl.ifp = pi->ifp;
2675 mtx_lock_spin(&sc->sge.reg_lock);
2676 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2677 q->rspq.phys_addr, q->rspq.size,
2678 q->fl[0].buf_size, 1, 0);
2680 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2684 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2685 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2686 q->fl[i].phys_addr, q->fl[i].size,
2687 q->fl[i].buf_size, p->cong_thres, 1,
2690 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2695 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2696 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2697 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2700 printf("error %d from t3_sge_init_ecntxt\n", ret);
2705 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2706 USE_GTS, SGE_CNTXT_OFLD, id,
2707 q->txq[TXQ_OFLD].phys_addr,
2708 q->txq[TXQ_OFLD].size, 0, 1, 0);
2710 printf("error %d from t3_sge_init_ecntxt\n", ret);
2716 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2718 q->txq[TXQ_CTRL].phys_addr,
2719 q->txq[TXQ_CTRL].size,
2720 q->txq[TXQ_CTRL].token, 1, 0);
2722 printf("error %d from t3_sge_init_ecntxt\n", ret);
2727 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2728 device_get_unit(sc->dev), irq_vec_idx);
2729 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2731 mtx_unlock_spin(&sc->sge.reg_lock);
2732 t3_update_qset_coalesce(q, p);
2735 refill_fl(sc, &q->fl[0], q->fl[0].size);
2736 refill_fl(sc, &q->fl[1], q->fl[1].size);
2737 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2739 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2740 V_NEWTIMER(q->rspq.holdoff_tmr));
2745 mtx_unlock_spin(&sc->sge.reg_lock);
2748 t3_free_qset(sc, q);
2754 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2755 * ethernet data. Hardware assistance with various checksums and any vlan tag
2756 * will also be taken into account here.
2759 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2761 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2762 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2763 struct ifnet *ifp = pi->ifp;
2765 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2767 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2768 cpl->csum_valid && cpl->csum == 0xffff) {
2769 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2770 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2771 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2772 m->m_pkthdr.csum_data = 0xffff;
2775 if (cpl->vlan_valid) {
2776 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2777 m->m_flags |= M_VLANTAG;
2780 m->m_pkthdr.rcvif = ifp;
2781 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2783 * adjust after conversion to mbuf chain
2785 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2786 m->m_len -= (sizeof(*cpl) + ethpad);
2787 m->m_data += (sizeof(*cpl) + ethpad);
2791 * get_packet - return the next ingress packet buffer from a free list
2792 * @adap: the adapter that received the packet
2793 * @drop_thres: # of remaining buffers before we start dropping packets
2794 * @qs: the qset that the SGE free list holding the packet belongs to
2795 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2796 * @r: response descriptor
2798 * Get the next packet from a free list and complete setup of the
2799 * sk_buff. If the packet is small we make a copy and recycle the
2800 * original buffer, otherwise we use the original buffer itself. If a
2801 * positive drop threshold is supplied packets are dropped and their
2802 * buffers recycled if (a) the number of remaining buffers is under the
2803 * threshold and the packet is too big to copy, or (b) the packet should
2804 * be copied but there is no memory for the copy.
2807 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2808 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2811 unsigned int len_cq = ntohl(r->len_cq);
2812 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2813 int mask, cidx = fl->cidx;
2814 struct rx_sw_desc *sd = &fl->sdesc[cidx];
2815 uint32_t len = G_RSPD_LEN(len_cq);
2816 uint32_t flags = M_EXT;
2817 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2822 mask = fl->size - 1;
2823 prefetch(fl->sdesc[(cidx + 1) & mask].m);
2824 prefetch(fl->sdesc[(cidx + 2) & mask].m);
2825 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2826 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2829 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2831 if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2832 sopeop == RSPQ_SOP_EOP) {
2833 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2835 cl = mtod(m, void *);
2836 memcpy(cl, sd->rxsd_cl, len);
2837 recycle_rx_buf(adap, fl, fl->cidx);
2838 m->m_pkthdr.len = m->m_len = len;
2840 mh->mh_head = mh->mh_tail = m;
2845 bus_dmamap_unload(fl->entry_tag, sd->map);
2849 if ((sopeop == RSPQ_SOP_EOP) ||
2850 (sopeop == RSPQ_SOP))
2852 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
2853 if (fl->zone == zone_pack) {
2855 * restore clobbered data pointer
2857 m->m_data = m->m_ext.ext_buf;
2859 m_cljset(m, cl, fl->type);
2868 mh->mh_head = mh->mh_tail = m;
2869 m->m_pkthdr.len = len;
2874 case RSPQ_NSOP_NEOP:
2875 if (mh->mh_tail == NULL) {
2876 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2880 mh->mh_tail->m_next = m;
2882 mh->mh_head->m_pkthdr.len += len;
2886 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2888 if (++fl->cidx == fl->size)
2895 * handle_rsp_cntrl_info - handles control information in a response
2896 * @qs: the queue set corresponding to the response
2897 * @flags: the response control flags
2899 * Handles the control information of an SGE response, such as GTS
2900 * indications and completion credits for the queue set's Tx queues.
2901 * HW coalesces credits, we don't do any extra SW coalescing.
2903 static __inline void
2904 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2906 unsigned int credits;
2909 if (flags & F_RSPD_TXQ0_GTS)
2910 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2912 credits = G_RSPD_TXQ0_CR(flags);
2914 qs->txq[TXQ_ETH].processed += credits;
2916 credits = G_RSPD_TXQ2_CR(flags);
2918 qs->txq[TXQ_CTRL].processed += credits;
2921 if (flags & F_RSPD_TXQ1_GTS)
2922 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2924 credits = G_RSPD_TXQ1_CR(flags);
2926 qs->txq[TXQ_OFLD].processed += credits;
2931 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2932 unsigned int sleeping)
2938 * process_responses - process responses from an SGE response queue
2939 * @adap: the adapter
2940 * @qs: the queue set to which the response queue belongs
2941 * @budget: how many responses can be processed in this round
2943 * Process responses from an SGE response queue up to the supplied budget.
2944 * Responses include received packets as well as credits and other events
2945 * for the queues that belong to the response queue's queue set.
2946 * A negative budget is effectively unlimited.
2948 * Additionally choose the interrupt holdoff time for the next interrupt
2949 * on this queue. If the system is under memory shortage use a fairly
2950 * long delay to help recovery.
2953 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2955 struct sge_rspq *rspq = &qs->rspq;
2956 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2957 int budget_left = budget;
2958 unsigned int sleeping = 0;
2959 int lro_enabled = qs->lro.enabled;
2961 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2962 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2965 static int last_holdoff = 0;
2966 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2967 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2968 last_holdoff = rspq->holdoff_tmr;
2971 rspq->next_holdoff = rspq->holdoff_tmr;
2973 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2974 int eth, eop = 0, ethpad = 0;
2975 uint32_t flags = ntohl(r->flags);
2976 uint32_t rss_csum = *(const uint32_t *)r;
2977 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2979 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2981 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2985 printf("async notification\n");
2987 if (rspq->rspq_mh.mh_head == NULL) {
2988 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2989 m = rspq->rspq_mh.mh_head;
2991 m = m_gethdr(M_DONTWAIT, MT_DATA);
2996 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2997 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2998 *mtod(m, char *) = CPL_ASYNC_NOTIF;
2999 rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
3001 rspq->async_notif++;
3003 } else if (flags & F_RSPD_IMM_DATA_VALID) {
3004 struct mbuf *m = NULL;
3006 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
3007 r->rss_hdr.opcode, rspq->cidx);
3008 if (rspq->rspq_mh.mh_head == NULL)
3009 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
3011 m = m_gethdr(M_DONTWAIT, MT_DATA);
3013 if (rspq->rspq_mh.mh_head == NULL && m == NULL) {
3015 rspq->next_holdoff = NOMEM_INTR_DELAY;
3019 get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
3022 } else if (r->len_cq) {
3023 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
3025 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
3027 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID;
3028 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash;
3036 if (flags & RSPD_CTRL_MASK) {
3037 sleeping |= flags & RSPD_GTS_MASK;
3038 handle_rsp_cntrl_info(qs, flags);
3042 if (__predict_false(++rspq->cidx == rspq->size)) {
3048 if (++rspq->credits >= 64) {
3049 refill_rspq(adap, rspq, rspq->credits);
3053 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
3057 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
3060 ngathered = rx_offload(&adap->tdev, rspq,
3061 rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
3062 rspq->rspq_mh.mh_head = NULL;
3063 DPRINTF("received offload packet\n");
3065 } else if (eth && eop) {
3066 struct mbuf *m = rspq->rspq_mh.mh_head;
3068 t3_rx_eth(adap, rspq, m, ethpad);
3071 * The T304 sends incoming packets on any qset. If LRO
3072 * is also enabled, we could end up sending packet up
3073 * lro_ctrl->ifp's input. That is incorrect.
3075 * The mbuf's rcvif was derived from the cpl header and
3076 * is accurate. Skip LRO and just use that.
3078 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
3080 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
3082 && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
3085 /* successfully queue'd for LRO */
3088 * LRO not enabled, packet unsuitable for LRO,
3089 * or unable to queue. Pass it up right now in
3092 struct ifnet *ifp = m->m_pkthdr.rcvif;
3093 (*ifp->if_input)(ifp, m);
3095 rspq->rspq_mh.mh_head = NULL;
3098 __refill_fl_lt(adap, &qs->fl[0], 32);
3099 __refill_fl_lt(adap, &qs->fl[1], 32);
3103 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
3107 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
3108 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
3109 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
3110 tcp_lro_flush(lro_ctrl, queued);
3115 check_ring_db(adap, qs, sleeping);
3117 mb(); /* commit Tx queue processed updates */
3118 if (__predict_false(qs->txq_stopped > 1))
3121 __refill_fl_lt(adap, &qs->fl[0], 512);
3122 __refill_fl_lt(adap, &qs->fl[1], 512);
3123 budget -= budget_left;
3128 * A helper function that processes responses and issues GTS.
3131 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3134 static int last_holdoff = 0;
3136 work = process_responses(adap, rspq_to_qset(rq), -1);
3138 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3139 printf("next_holdoff=%d\n", rq->next_holdoff);
3140 last_holdoff = rq->next_holdoff;
3142 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3143 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3150 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3151 * Handles data events from SGE response queues as well as error and other
3152 * async events as they all use the same interrupt pin. We use one SGE
3153 * response queue per port in this mode and protect all response queues with
3157 t3b_intr(void *data)
3160 adapter_t *adap = data;
3161 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3163 t3_write_reg(adap, A_PL_CLI, 0);
3164 map = t3_read_reg(adap, A_SG_DATA_INTR);
3169 if (__predict_false(map & F_ERRINTR))
3170 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3172 mtx_lock(&q0->lock);
3173 for_each_port(adap, i)
3175 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3176 mtx_unlock(&q0->lock);
3180 * The MSI interrupt handler. This needs to handle data events from SGE
3181 * response queues as well as error and other async events as they all use
3182 * the same MSI vector. We use one SGE response queue per port in this mode
3183 * and protect all response queues with queue 0's lock.
3186 t3_intr_msi(void *data)
3188 adapter_t *adap = data;
3189 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3190 int i, new_packets = 0;
3192 mtx_lock(&q0->lock);
3194 for_each_port(adap, i)
3195 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3197 mtx_unlock(&q0->lock);
3198 if (new_packets == 0)
3199 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3203 t3_intr_msix(void *data)
3205 struct sge_qset *qs = data;
3206 adapter_t *adap = qs->port->adapter;
3207 struct sge_rspq *rspq = &qs->rspq;
3209 if (process_responses_gts(adap, rspq) == 0)
3210 rspq->unhandled_irqs++;
3213 #define QDUMP_SBUF_SIZE 32 * 400
3215 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3217 struct sge_rspq *rspq;
3218 struct sge_qset *qs;
3219 int i, err, dump_end, idx;
3220 static int multiplier = 1;
3222 struct rsp_desc *rspd;
3226 qs = rspq_to_qset(rspq);
3227 if (rspq->rspq_dump_count == 0)
3229 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3231 "dump count is too large %d\n", rspq->rspq_dump_count);
3232 rspq->rspq_dump_count = 0;
3235 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3237 "dump start of %d is greater than queue size\n",
3238 rspq->rspq_dump_start);
3239 rspq->rspq_dump_start = 0;
3242 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3246 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3248 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3249 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3250 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3251 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3252 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3254 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3255 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3257 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3258 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3259 idx = i & (RSPQ_Q_SIZE-1);
3261 rspd = &rspq->desc[idx];
3262 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3263 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3264 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3265 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3266 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3267 be32toh(rspd->len_cq), rspd->intr_gen);
3269 if (sbuf_overflowed(sb)) {
3275 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3281 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3283 struct sge_txq *txq;
3284 struct sge_qset *qs;
3285 int i, j, err, dump_end;
3286 static int multiplier = 1;
3288 struct tx_desc *txd;
3289 uint32_t *WR, wr_hi, wr_lo, gen;
3293 qs = txq_to_qset(txq, TXQ_ETH);
3294 if (txq->txq_dump_count == 0) {
3297 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3299 "dump count is too large %d\n", txq->txq_dump_count);
3300 txq->txq_dump_count = 1;
3303 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3305 "dump start of %d is greater than queue size\n",
3306 txq->txq_dump_start);
3307 txq->txq_dump_start = 0;
3310 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3316 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3318 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3319 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3320 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3321 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3322 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3323 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3324 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3325 txq->txq_dump_start,
3326 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3328 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3329 for (i = txq->txq_dump_start; i < dump_end; i++) {
3330 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3331 WR = (uint32_t *)txd->flit;
3332 wr_hi = ntohl(WR[0]);
3333 wr_lo = ntohl(WR[1]);
3334 gen = G_WR_GEN(wr_lo);
3336 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3338 for (j = 2; j < 30; j += 4)
3339 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3340 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3343 if (sbuf_overflowed(sb)) {
3349 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3355 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3357 struct sge_txq *txq;
3358 struct sge_qset *qs;
3359 int i, j, err, dump_end;
3360 static int multiplier = 1;
3362 struct tx_desc *txd;
3363 uint32_t *WR, wr_hi, wr_lo, gen;
3366 qs = txq_to_qset(txq, TXQ_CTRL);
3367 if (txq->txq_dump_count == 0) {
3370 if (txq->txq_dump_count > 256) {
3372 "dump count is too large %d\n", txq->txq_dump_count);
3373 txq->txq_dump_count = 1;
3376 if (txq->txq_dump_start > 255) {
3378 "dump start of %d is greater than queue size\n",
3379 txq->txq_dump_start);
3380 txq->txq_dump_start = 0;
3385 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3386 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3387 txq->txq_dump_start,
3388 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3390 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3391 for (i = txq->txq_dump_start; i < dump_end; i++) {
3392 txd = &txq->desc[i & (255)];
3393 WR = (uint32_t *)txd->flit;
3394 wr_hi = ntohl(WR[0]);
3395 wr_lo = ntohl(WR[1]);
3396 gen = G_WR_GEN(wr_lo);
3398 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3400 for (j = 2; j < 30; j += 4)
3401 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3402 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3405 if (sbuf_overflowed(sb)) {
3411 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3417 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3419 adapter_t *sc = arg1;
3420 struct qset_params *qsp = &sc->params.sge.qset[0];
3422 struct sge_qset *qs;
3423 int i, j, err, nqsets = 0;
3426 if ((sc->flags & FULL_INIT_DONE) == 0)
3429 coalesce_usecs = qsp->coalesce_usecs;
3430 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3435 if (coalesce_usecs == qsp->coalesce_usecs)
3438 for (i = 0; i < sc->params.nports; i++)
3439 for (j = 0; j < sc->port[i].nqsets; j++)
3442 coalesce_usecs = max(1, coalesce_usecs);
3444 for (i = 0; i < nqsets; i++) {
3445 qs = &sc->sge.qs[i];
3446 qsp = &sc->params.sge.qset[i];
3447 qsp->coalesce_usecs = coalesce_usecs;
3449 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3450 &sc->sge.qs[0].rspq.lock;
3453 t3_update_qset_coalesce(qs, qsp);
3454 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3455 V_NEWTIMER(qs->rspq.holdoff_tmr));
3464 t3_add_attach_sysctls(adapter_t *sc)
3466 struct sysctl_ctx_list *ctx;
3467 struct sysctl_oid_list *children;
3469 ctx = device_get_sysctl_ctx(sc->dev);
3470 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3472 /* random information */
3473 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3475 CTLFLAG_RD, &sc->fw_version,
3476 0, "firmware version");
3477 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3479 CTLFLAG_RD, &sc->params.rev,
3481 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3483 CTLFLAG_RD, &sc->port_types,
3484 0, "type of ports");
3485 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3487 CTLFLAG_RW, &cxgb_debug,
3488 0, "enable verbose debugging output");
3489 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3490 CTLFLAG_RD, &sc->tunq_coalesce,
3491 "#tunneled packets freed");
3492 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3494 CTLFLAG_RD, &txq_fills,
3495 0, "#times txq overrun");
3499 static const char *rspq_name = "rspq";
3500 static const char *txq_names[] =
3508 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3510 struct port_info *p = arg1;
3516 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3518 t3_mac_update_stats(&p->mac);
3521 return (sysctl_handle_quad(oidp, parg, 0, req));
3525 t3_add_configured_sysctls(adapter_t *sc)
3527 struct sysctl_ctx_list *ctx;
3528 struct sysctl_oid_list *children;
3531 ctx = device_get_sysctl_ctx(sc->dev);
3532 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3534 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3536 CTLTYPE_INT|CTLFLAG_RW, sc,
3537 0, t3_set_coalesce_usecs,
3538 "I", "interrupt coalescing timer (us)");
3540 for (i = 0; i < sc->params.nports; i++) {
3541 struct port_info *pi = &sc->port[i];
3542 struct sysctl_oid *poid;
3543 struct sysctl_oid_list *poidlist;
3544 struct mac_stats *mstats = &pi->mac.stats;
3546 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3547 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3548 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3549 poidlist = SYSCTL_CHILDREN(poid);
3550 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3551 "nqsets", CTLFLAG_RD, &pi->nqsets,
3554 for (j = 0; j < pi->nqsets; j++) {
3555 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3556 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3557 *ctrlqpoid, *lropoid;
3558 struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3559 *txqpoidlist, *ctrlqpoidlist,
3561 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3563 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3565 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3566 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3567 qspoidlist = SYSCTL_CHILDREN(qspoid);
3569 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3570 CTLFLAG_RD, &qs->fl[0].empty, 0,
3571 "freelist #0 empty");
3572 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3573 CTLFLAG_RD, &qs->fl[1].empty, 0,
3574 "freelist #1 empty");
3576 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3577 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3578 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3580 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3581 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3582 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3584 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3585 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3586 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3588 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3589 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3590 lropoidlist = SYSCTL_CHILDREN(lropoid);
3592 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3593 CTLFLAG_RD, &qs->rspq.size,
3594 0, "#entries in response queue");
3595 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3596 CTLFLAG_RD, &qs->rspq.cidx,
3597 0, "consumer index");
3598 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3599 CTLFLAG_RD, &qs->rspq.credits,
3601 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3602 CTLFLAG_RD, &qs->rspq.starved,
3603 0, "#times starved");
3604 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3605 CTLFLAG_RD, &qs->rspq.phys_addr,
3606 "physical_address_of the queue");
3607 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3608 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3609 0, "start rspq dump entry");
3610 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3611 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3612 0, "#rspq entries to dump");
3613 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3614 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3615 0, t3_dump_rspq, "A", "dump of the response queue");
3617 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
3618 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
3619 "#tunneled packets dropped");
3620 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3621 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3622 0, "#tunneled packets waiting to be sent");
3624 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3625 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3626 0, "#tunneled packets queue producer index");
3627 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3628 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3629 0, "#tunneled packets queue consumer index");
3631 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3632 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3633 0, "#tunneled packets processed by the card");
3634 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3635 CTLFLAG_RD, &txq->cleaned,
3636 0, "#tunneled packets cleaned");
3637 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3638 CTLFLAG_RD, &txq->in_use,
3639 0, "#tunneled packet slots in use");
3640 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3641 CTLFLAG_RD, &txq->txq_frees,
3642 "#tunneled packets freed");
3643 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3644 CTLFLAG_RD, &txq->txq_skipped,
3645 0, "#tunneled packet descriptors skipped");
3646 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3647 CTLFLAG_RD, &txq->txq_coalesced,
3648 "#tunneled packets coalesced");
3649 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3650 CTLFLAG_RD, &txq->txq_enqueued,
3651 0, "#tunneled packets enqueued to hardware");
3652 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3653 CTLFLAG_RD, &qs->txq_stopped,
3654 0, "tx queues stopped");
3655 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3656 CTLFLAG_RD, &txq->phys_addr,
3657 "physical_address_of the queue");
3658 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3659 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3660 0, "txq generation");
3661 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3662 CTLFLAG_RD, &txq->cidx,
3663 0, "hardware queue cidx");
3664 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3665 CTLFLAG_RD, &txq->pidx,
3666 0, "hardware queue pidx");
3667 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3668 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3669 0, "txq start idx for dump");
3670 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3671 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3672 0, "txq #entries to dump");
3673 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3674 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3675 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3677 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3678 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3679 0, "ctrlq start idx for dump");
3680 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3681 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3682 0, "ctrl #entries to dump");
3683 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3684 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3685 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3687 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3688 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3689 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3690 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3691 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3692 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3693 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3694 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3697 /* Now add a node for mac stats. */
3698 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3699 CTLFLAG_RD, NULL, "MAC statistics");
3700 poidlist = SYSCTL_CHILDREN(poid);
3703 * We (ab)use the length argument (arg2) to pass on the offset
3704 * of the data that we are interested in. This is only required
3705 * for the quad counters that are updated from the hardware (we
3706 * make sure that we return the latest value).
3707 * sysctl_handle_macstat first updates *all* the counters from
3708 * the hardware, and then returns the latest value of the
3709 * requested counter. Best would be to update only the
3710 * requested counter from hardware, but t3_mac_update_stats()
3711 * hides all the register details and we don't want to dive into
3714 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3715 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3716 sysctl_handle_macstat, "QU", 0)
3717 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3718 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3719 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3720 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3721 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3722 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3723 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3724 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3725 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3726 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3727 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3728 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3729 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3730 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3731 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3732 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3733 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3734 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3735 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3736 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3737 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3738 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3739 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3740 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3741 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3742 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3743 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3744 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3745 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3746 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3747 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3748 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3749 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3750 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3751 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3752 CXGB_SYSCTL_ADD_QUAD(rx_short);
3753 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3754 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3755 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3756 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3757 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3758 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3759 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3760 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3761 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3762 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3763 #undef CXGB_SYSCTL_ADD_QUAD
3765 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3766 CTLFLAG_RD, &mstats->a, 0)
3767 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3768 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3769 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3770 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3771 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3772 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3773 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3774 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3775 CXGB_SYSCTL_ADD_ULONG(num_resets);
3776 CXGB_SYSCTL_ADD_ULONG(link_faults);
3777 #undef CXGB_SYSCTL_ADD_ULONG
3782 * t3_get_desc - dump an SGE descriptor for debugging purposes
3783 * @qs: the queue set
3784 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3785 * @idx: the descriptor index in the queue
3786 * @data: where to dump the descriptor contents
3788 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3789 * size of the descriptor.
3792 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3793 unsigned char *data)
3799 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3801 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3802 return sizeof(struct tx_desc);
3806 if (!qs->rspq.desc || idx >= qs->rspq.size)
3808 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3809 return sizeof(struct rsp_desc);
3813 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3815 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3816 return sizeof(struct rx_desc);