1 /**************************************************************************
3 Copyright (c) 2007-2009, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
39 #include <machine/bus.h>
40 #include <machine/resource.h>
41 #include <sys/bus_dma.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/taskqueue.h>
49 #include <sys/sched.h>
51 #include <sys/systm.h>
52 #include <sys/syslog.h>
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
67 #include <cxgb_include.h>
71 int multiq_tx_enable = 1;
73 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
74 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
75 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
76 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
77 "size of per-queue mbuf ring");
79 static int cxgb_tx_coalesce_force = 0;
80 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
81 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
82 &cxgb_tx_coalesce_force, 0,
83 "coalesce small packets into a single work request regardless of ring state");
85 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1
86 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
87 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2
88 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5
89 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5
90 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2
91 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6
94 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
95 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
96 &cxgb_tx_coalesce_enable_start);
97 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
98 &cxgb_tx_coalesce_enable_start, 0,
99 "coalesce enable threshold");
100 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
102 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
103 &cxgb_tx_coalesce_enable_stop, 0,
104 "coalesce disable threshold");
105 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
106 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
107 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
108 &cxgb_tx_reclaim_threshold, 0,
109 "tx cleaning minimum threshold");
112 * XXX don't re-enable this until TOE stops assuming
115 static int recycle_enable = 0;
116 int cxgb_ext_freed = 0;
117 int cxgb_ext_inited = 0;
119 int jumbo_q_size = 0;
121 extern int cxgb_use_16k_clusters;
122 extern int nmbjumbo4;
123 extern int nmbjumbo9;
124 extern int nmbjumbo16;
128 #define SGE_RX_SM_BUF_SIZE 1536
129 #define SGE_RX_DROP_THRES 16
130 #define SGE_RX_COPY_THRES 128
133 * Period of the Tx buffer reclaim timer. This timer does not need to run
134 * frequently as Tx buffers are usually reclaimed by new Tx packets.
136 #define TX_RECLAIM_PERIOD (hz >> 1)
139 * Values for sge_txq.flags
142 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
143 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
147 uint64_t flit[TX_DESC_FLITS];
157 struct rsp_desc { /* response queue descriptor */
158 struct rss_header rss_hdr;
161 uint8_t imm_data[47];
165 #define RX_SW_DESC_MAP_CREATED (1 << 0)
166 #define TX_SW_DESC_MAP_CREATED (1 << 1)
167 #define RX_SW_DESC_INUSE (1 << 3)
168 #define TX_SW_DESC_MAPPED (1 << 4)
170 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
171 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
172 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
173 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
175 struct tx_sw_desc { /* SW state per Tx descriptor */
181 struct rx_sw_desc { /* SW state per Rx descriptor */
194 struct refill_fl_cb_arg {
196 bus_dma_segment_t seg;
202 * Maps a number of flits to the number of Tx descriptors that can hold them.
205 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
207 * HW allows up to 4 descriptors to be combined into a WR.
209 static uint8_t flit_desc_map[] = {
211 #if SGE_NUM_GENBITS == 1
212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
213 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
214 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
215 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
216 #elif SGE_NUM_GENBITS == 2
217 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
218 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
219 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
220 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
222 # error "SGE_NUM_GENBITS must be 1 or 2"
226 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED)
227 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock)
228 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock)
229 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock)
230 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
231 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
232 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \
233 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
234 #define TXQ_RING_DEQUEUE(qs) \
235 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
239 static void sge_timer_cb(void *arg);
240 static void sge_timer_reclaim(void *arg, int ncount);
241 static void sge_txq_reclaim_handler(void *arg, int ncount);
242 static void cxgb_start_locked(struct sge_qset *qs);
245 * XXX need to cope with bursty scheduling by looking at a wider
246 * window than we are now for determining the need for coalescing
249 static __inline uint64_t
250 check_pkt_coalesce(struct sge_qset *qs)
256 if (__predict_false(cxgb_tx_coalesce_force))
258 txq = &qs->txq[TXQ_ETH];
259 sc = qs->port->adapter;
260 fill = &sc->tunq_fill[qs->idx];
262 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
263 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
264 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
265 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
267 * if the hardware transmit queue is more than 1/8 full
268 * we mark it as coalescing - we drop back from coalescing
269 * when we go below 1/32 full and there are no packets enqueued,
270 * this provides us with some degree of hysteresis
272 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
273 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
275 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
278 return (sc->tunq_coalesce);
283 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
286 #if _BYTE_ORDER == _LITTLE_ENDIAN
288 wr_hilo |= (((uint64_t)wr_lo)<<32);
291 wr_hilo |= (((uint64_t)wr_hi)<<32);
293 wrp->wrh_hilo = wr_hilo;
297 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
306 struct coalesce_info {
312 coalesce_check(struct mbuf *m, void *arg)
314 struct coalesce_info *ci = arg;
315 int *count = &ci->count;
316 int *nbytes = &ci->nbytes;
318 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
319 (*count < 7) && (m->m_next == NULL))) {
328 cxgb_dequeue(struct sge_qset *qs)
330 struct mbuf *m, *m_head, *m_tail;
331 struct coalesce_info ci;
334 if (check_pkt_coalesce(qs) == 0)
335 return TXQ_RING_DEQUEUE(qs);
337 m_head = m_tail = NULL;
338 ci.count = ci.nbytes = 0;
340 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
341 if (m_head == NULL) {
343 } else if (m != NULL) {
344 m_tail->m_nextpkt = m;
349 panic("trying to coalesce %d packets in to one WR", ci.count);
354 * reclaim_completed_tx - reclaims completed Tx descriptors
355 * @adapter: the adapter
356 * @q: the Tx queue to reclaim completed descriptors from
358 * Reclaims Tx descriptors that the SGE has indicated it has processed,
359 * and frees the associated buffers if possible. Called with the Tx
363 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
365 struct sge_txq *q = &qs->txq[queue];
366 int reclaim = desc_reclaimable(q);
368 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
369 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
370 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
372 if (reclaim < reclaim_min)
375 mtx_assert(&qs->lock, MA_OWNED);
377 t3_free_tx_desc(qs, reclaim, queue);
378 q->cleaned += reclaim;
379 q->in_use -= reclaim;
381 if (isset(&qs->txq_stopped, TXQ_ETH))
382 clrbit(&qs->txq_stopped, TXQ_ETH);
388 * should_restart_tx - are there enough resources to restart a Tx queue?
391 * Checks if there are enough descriptors to restart a suspended Tx queue.
394 should_restart_tx(const struct sge_txq *q)
396 unsigned int r = q->processed - q->cleaned;
398 return q->in_use - r < (q->size >> 1);
402 * t3_sge_init - initialize SGE
404 * @p: the SGE parameters
406 * Performs SGE initialization needed every time after a chip reset.
407 * We do not initialize any of the queue sets here, instead the driver
408 * top-level must request those individually. We also do not enable DMA
409 * here, that should be done after the queues have been set up.
412 t3_sge_init(adapter_t *adap, struct sge_params *p)
416 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
418 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
419 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
420 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
421 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
422 #if SGE_NUM_GENBITS == 1
423 ctrl |= F_EGRGENCTRL;
425 if (adap->params.rev > 0) {
426 if (!(adap->flags & (USING_MSIX | USING_MSI)))
427 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
429 t3_write_reg(adap, A_SG_CONTROL, ctrl);
430 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
431 V_LORCQDRBTHRSH(512));
432 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
433 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
434 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
435 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
436 adap->params.rev < T3_REV_C ? 1000 : 500);
437 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
438 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
439 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
440 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
441 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
446 * sgl_len - calculates the size of an SGL of the given capacity
447 * @n: the number of SGL entries
449 * Calculates the number of flits needed for a scatter/gather list that
450 * can hold the given number of entries.
452 static __inline unsigned int
453 sgl_len(unsigned int n)
455 return ((3 * n) / 2 + (n & 1));
459 * get_imm_packet - return the next ingress packet buffer from a response
460 * @resp: the response descriptor containing the packet data
462 * Return a packet containing the immediate data of the given response.
465 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
468 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
469 m->m_ext.ext_buf = NULL;
470 m->m_ext.ext_type = 0;
471 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
475 static __inline u_int
476 flits_to_desc(u_int n)
478 return (flit_desc_map[n]);
481 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
482 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
483 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
484 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
486 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
487 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
491 * t3_sge_err_intr_handler - SGE async event interrupt handler
492 * @adapter: the adapter
494 * Interrupt handler for SGE asynchronous (non-data) events.
497 t3_sge_err_intr_handler(adapter_t *adapter)
499 unsigned int v, status;
501 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
502 if (status & SGE_PARERR)
503 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
504 status & SGE_PARERR);
505 if (status & SGE_FRAMINGERR)
506 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
507 status & SGE_FRAMINGERR);
508 if (status & F_RSPQCREDITOVERFOW)
509 CH_ALERT(adapter, "SGE response queue credit overflow\n");
511 if (status & F_RSPQDISABLED) {
512 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
515 "packet delivered to disabled response queue (0x%x)\n",
516 (v >> S_RSPQ0DISABLED) & 0xff);
519 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
520 if (status & SGE_FATALERR)
521 t3_fatal_err(adapter);
525 t3_sge_prep(adapter_t *adap, struct sge_params *p)
529 nqsets = min(SGE_QSETS, mp_ncpus*4);
531 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
533 while (!powerof2(fl_q_size))
535 #if __FreeBSD_version >= 700111
536 if (cxgb_use_16k_clusters)
537 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
539 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
541 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
543 while (!powerof2(jumbo_q_size))
546 /* XXX Does ETHER_ALIGN need to be accounted for here? */
547 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
549 for (i = 0; i < SGE_QSETS; ++i) {
550 struct qset_params *q = p->qset + i;
552 if (adap->params.nports > 2) {
553 q->coalesce_usecs = 50;
556 q->coalesce_usecs = 10;
558 q->coalesce_usecs = 5;
562 q->rspq_size = RSPQ_Q_SIZE;
563 q->fl_size = fl_q_size;
564 q->jumbo_size = jumbo_q_size;
565 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
566 q->txq_size[TXQ_OFLD] = 1024;
567 q->txq_size[TXQ_CTRL] = 256;
573 t3_sge_alloc(adapter_t *sc)
576 /* The parent tag. */
577 if (bus_dma_tag_create( NULL, /* parent */
578 1, 0, /* algnmnt, boundary */
579 BUS_SPACE_MAXADDR, /* lowaddr */
580 BUS_SPACE_MAXADDR, /* highaddr */
581 NULL, NULL, /* filter, filterarg */
582 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
583 BUS_SPACE_UNRESTRICTED, /* nsegments */
584 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
586 NULL, NULL, /* lock, lockarg */
588 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
593 * DMA tag for normal sized RX frames
595 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
596 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
597 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
598 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
603 * DMA tag for jumbo sized RX frames.
605 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
606 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
607 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
608 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
613 * DMA tag for TX frames.
615 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
616 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
617 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
618 NULL, NULL, &sc->tx_dmat)) {
619 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
627 t3_sge_free(struct adapter * sc)
630 if (sc->tx_dmat != NULL)
631 bus_dma_tag_destroy(sc->tx_dmat);
633 if (sc->rx_jumbo_dmat != NULL)
634 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
636 if (sc->rx_dmat != NULL)
637 bus_dma_tag_destroy(sc->rx_dmat);
639 if (sc->parent_dmat != NULL)
640 bus_dma_tag_destroy(sc->parent_dmat);
646 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
649 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
650 qs->rspq.polling = 0 /* p->polling */;
653 #if !defined(__i386__) && !defined(__amd64__)
655 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
657 struct refill_fl_cb_arg *cb_arg = arg;
659 cb_arg->error = error;
660 cb_arg->seg = segs[0];
666 * refill_fl - refill an SGE free-buffer list
667 * @sc: the controller softc
668 * @q: the free-list to refill
669 * @n: the number of new buffers to allocate
671 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
672 * The caller must assure that @n does not exceed the queue's capacity.
675 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
677 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
678 struct rx_desc *d = &q->desc[q->pidx];
679 struct refill_fl_cb_arg cb_arg;
687 * We only allocate a cluster, mbuf allocation happens after rx
689 if (q->zone == zone_pack) {
690 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
692 cl = m->m_ext.ext_buf;
694 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
696 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
697 uma_zfree(q->zone, cl);
701 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
702 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
703 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
704 uma_zfree(q->zone, cl);
707 sd->flags |= RX_SW_DESC_MAP_CREATED;
709 #if !defined(__i386__) && !defined(__amd64__)
710 err = bus_dmamap_load(q->entry_tag, sd->map,
711 cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
713 if (err != 0 || cb_arg.error) {
714 if (q->zone == zone_pack)
715 uma_zfree(q->zone, cl);
720 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
722 sd->flags |= RX_SW_DESC_INUSE;
725 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
726 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
727 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
728 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
733 if (++q->pidx == q->size) {
745 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
750 * free_rx_bufs - free the Rx buffers on an SGE free list
751 * @sc: the controle softc
752 * @q: the SGE free list to clean up
754 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
755 * this queue should be stopped before calling this function.
758 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
760 u_int cidx = q->cidx;
762 while (q->credits--) {
763 struct rx_sw_desc *d = &q->sdesc[cidx];
765 if (d->flags & RX_SW_DESC_INUSE) {
766 bus_dmamap_unload(q->entry_tag, d->map);
767 bus_dmamap_destroy(q->entry_tag, d->map);
768 if (q->zone == zone_pack) {
769 m_init(d->m, zone_pack, MCLBYTES,
770 M_NOWAIT, MT_DATA, M_EXT);
771 uma_zfree(zone_pack, d->m);
773 m_init(d->m, zone_mbuf, MLEN,
774 M_NOWAIT, MT_DATA, 0);
775 uma_zfree(zone_mbuf, d->m);
776 uma_zfree(q->zone, d->rxsd_cl);
782 if (++cidx == q->size)
788 __refill_fl(adapter_t *adap, struct sge_fl *fl)
790 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
794 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
796 if ((fl->size - fl->credits) < max)
797 refill_fl(adap, fl, min(max, fl->size - fl->credits));
801 * recycle_rx_buf - recycle a receive buffer
802 * @adapter: the adapter
803 * @q: the SGE free list
804 * @idx: index of buffer to recycle
806 * Recycles the specified buffer on the given free list by adding it at
807 * the next available slot on the list.
810 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
812 struct rx_desc *from = &q->desc[idx];
813 struct rx_desc *to = &q->desc[q->pidx];
815 q->sdesc[q->pidx] = q->sdesc[idx];
816 to->addr_lo = from->addr_lo; // already big endian
817 to->addr_hi = from->addr_hi; // likewise
818 wmb(); /* necessary ? */
819 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
820 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
823 if (++q->pidx == q->size) {
827 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
831 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
836 *addr = segs[0].ds_addr;
840 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
841 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
842 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
844 size_t len = nelem * elem_size;
849 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
850 BUS_SPACE_MAXADDR_32BIT,
851 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
852 len, 0, NULL, NULL, tag)) != 0) {
853 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
857 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
859 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
863 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
868 len = nelem * sw_size;
869 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
872 if (parent_entry_tag == NULL)
875 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
876 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
877 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
878 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
879 NULL, NULL, entry_tag)) != 0) {
880 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
887 sge_slow_intr_handler(void *arg, int ncount)
891 t3_slow_intr_handler(sc);
895 * sge_timer_cb - perform periodic maintenance of an SGE qset
896 * @data: the SGE queue set to maintain
898 * Runs periodically from a timer to perform maintenance of an SGE queue
899 * set. It performs two tasks:
901 * a) Cleans up any completed Tx descriptors that may still be pending.
902 * Normal descriptor cleanup happens when new packets are added to a Tx
903 * queue so this timer is relatively infrequent and does any cleanup only
904 * if the Tx queue has not seen any new packets in a while. We make a
905 * best effort attempt to reclaim descriptors, in that we don't wait
906 * around if we cannot get a queue's lock (which most likely is because
907 * someone else is queueing new packets and so will also handle the clean
908 * up). Since control queues use immediate data exclusively we don't
909 * bother cleaning them up here.
911 * b) Replenishes Rx queues that have run out due to memory shortage.
912 * Normally new Rx buffers are added when existing ones are consumed but
913 * when out of memory a queue can become empty. We try to add only a few
914 * buffers here, the queue will be replenished fully as these new buffers
915 * are used up if memory shortage has subsided.
917 * c) Return coalesced response queue credits in case a response queue is
920 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
921 * fifo overflows and the FW doesn't implement any recovery scheme yet.
924 sge_timer_cb(void *arg)
927 if ((sc->flags & USING_MSIX) == 0) {
929 struct port_info *pi;
933 int reclaim_ofl, refill_rx;
935 if (sc->open_device_map == 0)
938 for (i = 0; i < sc->params.nports; i++) {
940 for (j = 0; j < pi->nqsets; j++) {
941 qs = &sc->sge.qs[pi->first_qset + j];
943 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
944 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
945 (qs->fl[1].credits < qs->fl[1].size));
946 if (reclaim_ofl || refill_rx) {
947 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
954 if (sc->params.nports > 2) {
957 for_each_port(sc, i) {
958 struct port_info *pi = &sc->port[i];
960 t3_write_reg(sc, A_SG_KDOORBELL,
962 (FW_TUNNEL_SGEEC_START + pi->first_qset));
965 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
966 sc->open_device_map != 0)
967 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
971 * This is meant to be a catch-all function to keep sge state private
976 t3_sge_init_adapter(adapter_t *sc)
978 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
979 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
980 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
985 t3_sge_reset_adapter(adapter_t *sc)
987 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
992 t3_sge_init_port(struct port_info *pi)
994 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
999 * refill_rspq - replenish an SGE response queue
1000 * @adapter: the adapter
1001 * @q: the response queue to replenish
1002 * @credits: how many new responses to make available
1004 * Replenishes a response queue by making the supplied number of responses
1007 static __inline void
1008 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1011 /* mbufs are allocated on demand when a rspq entry is processed. */
1012 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1013 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1017 sge_txq_reclaim_handler(void *arg, int ncount)
1019 struct sge_qset *qs = arg;
1022 for (i = 0; i < 3; i++)
1023 reclaim_completed_tx(qs, 16, i);
1027 sge_timer_reclaim(void *arg, int ncount)
1029 struct port_info *pi = arg;
1030 int i, nqsets = pi->nqsets;
1031 adapter_t *sc = pi->adapter;
1032 struct sge_qset *qs;
1035 KASSERT((sc->flags & USING_MSIX) == 0,
1036 ("can't call timer reclaim for msi-x"));
1038 for (i = 0; i < nqsets; i++) {
1039 qs = &sc->sge.qs[pi->first_qset + i];
1041 reclaim_completed_tx(qs, 16, TXQ_OFLD);
1042 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1043 &sc->sge.qs[0].rspq.lock;
1045 if (mtx_trylock(lock)) {
1046 /* XXX currently assume that we are *NOT* polling */
1047 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1049 if (qs->fl[0].credits < qs->fl[0].size - 16)
1050 __refill_fl(sc, &qs->fl[0]);
1051 if (qs->fl[1].credits < qs->fl[1].size - 16)
1052 __refill_fl(sc, &qs->fl[1]);
1054 if (status & (1 << qs->rspq.cntxt_id)) {
1055 if (qs->rspq.credits) {
1056 refill_rspq(sc, &qs->rspq, 1);
1058 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1059 1 << qs->rspq.cntxt_id);
1068 * init_qset_cntxt - initialize an SGE queue set context info
1069 * @qs: the queue set
1070 * @id: the queue set id
1072 * Initializes the TIDs and context ids for the queues of a queue set.
1075 init_qset_cntxt(struct sge_qset *qs, u_int id)
1078 qs->rspq.cntxt_id = id;
1079 qs->fl[0].cntxt_id = 2 * id;
1080 qs->fl[1].cntxt_id = 2 * id + 1;
1081 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1082 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1083 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1084 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1085 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1087 mbufq_init(&qs->txq[TXQ_ETH].sendq);
1088 mbufq_init(&qs->txq[TXQ_OFLD].sendq);
1089 mbufq_init(&qs->txq[TXQ_CTRL].sendq);
1094 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1096 txq->in_use += ndesc;
1098 * XXX we don't handle stopping of queue
1099 * presumably start handles this when we bump against the end
1101 txqs->gen = txq->gen;
1102 txq->unacked += ndesc;
1103 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1105 txqs->pidx = txq->pidx;
1108 if (((txqs->pidx > txq->cidx) &&
1109 (txq->pidx < txqs->pidx) &&
1110 (txq->pidx >= txq->cidx)) ||
1111 ((txqs->pidx < txq->cidx) &&
1112 (txq->pidx >= txq-> cidx)) ||
1113 ((txqs->pidx < txq->cidx) &&
1114 (txq->cidx < txqs->pidx)))
1115 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1116 txqs->pidx, txq->pidx, txq->cidx);
1118 if (txq->pidx >= txq->size) {
1119 txq->pidx -= txq->size;
1126 * calc_tx_descs - calculate the number of Tx descriptors for a packet
1127 * @m: the packet mbufs
1128 * @nsegs: the number of segments
1130 * Returns the number of Tx descriptors needed for the given Ethernet
1131 * packet. Ethernet packets require addition of WR and CPL headers.
1133 static __inline unsigned int
1134 calc_tx_descs(const struct mbuf *m, int nsegs)
1138 if (m->m_pkthdr.len <= PIO_LEN)
1141 flits = sgl_len(nsegs) + 2;
1142 #ifdef TSO_SUPPORTED
1143 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1146 return flits_to_desc(flits);
1150 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1151 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1154 int err, pktlen, pass = 0;
1155 bus_dma_tag_t tag = txq->entry_tag;
1160 pktlen = m0->m_pkthdr.len;
1161 #if defined(__i386__) || defined(__amd64__)
1162 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
1166 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
1171 if (err == EFBIG && pass == 0) {
1173 /* Too many segments, try to defrag */
1174 m0 = m_defrag(m0, M_DONTWAIT);
1182 } else if (err == ENOMEM) {
1186 printf("map failure err=%d pktlen=%d\n", err, pktlen);
1192 #if !defined(__i386__) && !defined(__amd64__)
1193 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
1195 txsd->flags |= TX_SW_DESC_MAPPED;
1201 * make_sgl - populate a scatter/gather list for a packet
1202 * @sgp: the SGL to populate
1203 * @segs: the packet dma segments
1204 * @nsegs: the number of segments
1206 * Generates a scatter/gather list for the buffers that make up a packet
1207 * and returns the SGL size in 8-byte words. The caller must size the SGL
1210 static __inline void
1211 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1215 for (idx = 0, i = 0; i < nsegs; i++) {
1217 * firmware doesn't like empty segments
1219 if (segs[i].ds_len == 0)
1224 sgp->len[idx] = htobe32(segs[i].ds_len);
1225 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1236 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1237 * @adap: the adapter
1240 * Ring the doorbell if a Tx queue is asleep. There is a natural race,
1241 * where the HW is going to sleep just after we checked, however,
1242 * then the interrupt handler will detect the outstanding TX packet
1243 * and ring the doorbell for us.
1245 * When GTS is disabled we unconditionally ring the doorbell.
1247 static __inline void
1248 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1251 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1252 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1253 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1255 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1258 t3_write_reg(adap, A_SG_KDOORBELL,
1259 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1262 wmb(); /* write descriptors before telling HW */
1263 t3_write_reg(adap, A_SG_KDOORBELL,
1264 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1268 static __inline void
1269 wr_gen2(struct tx_desc *d, unsigned int gen)
1271 #if SGE_NUM_GENBITS == 2
1272 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1277 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1278 * @ndesc: number of Tx descriptors spanned by the SGL
1279 * @txd: first Tx descriptor to be written
1280 * @txqs: txq state (generation and producer index)
1281 * @txq: the SGE Tx queue
1283 * @flits: number of flits to the start of the SGL in the first descriptor
1284 * @sgl_flits: the SGL size in flits
1285 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1286 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1288 * Write a work request header and an associated SGL. If the SGL is
1289 * small enough to fit into one Tx descriptor it has already been written
1290 * and we just need to write the WR header. Otherwise we distribute the
1291 * SGL across the number of descriptors it spans.
1294 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1295 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1296 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1299 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1300 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1302 if (__predict_true(ndesc == 1)) {
1303 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1304 V_WR_SGLSFLT(flits)) | wr_hi,
1305 htonl(V_WR_LEN(flits + sgl_flits) |
1306 V_WR_GEN(txqs->gen)) | wr_lo);
1308 wr_gen2(txd, txqs->gen);
1311 unsigned int ogen = txqs->gen;
1312 const uint64_t *fp = (const uint64_t *)sgl;
1313 struct work_request_hdr *wp = wrp;
1315 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1316 V_WR_SGLSFLT(flits)) | wr_hi;
1319 unsigned int avail = WR_FLITS - flits;
1321 if (avail > sgl_flits)
1323 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1332 if (++txqs->pidx == txq->size) {
1340 * when the head of the mbuf chain
1341 * is freed all clusters will be freed
1344 wrp = (struct work_request_hdr *)txd;
1345 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1346 V_WR_SGLSFLT(1)) | wr_hi;
1347 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1349 V_WR_GEN(txqs->gen)) | wr_lo;
1350 wr_gen2(txd, txqs->gen);
1353 wrp->wrh_hi |= htonl(F_WR_EOP);
1355 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1356 wr_gen2((struct tx_desc *)wp, ogen);
1360 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1361 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1363 #ifdef VLAN_SUPPORTED
1364 #define GET_VTAG(cntrl, m) \
1366 if ((m)->m_flags & M_VLANTAG) \
1367 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1371 #define GET_VTAG(cntrl, m)
1375 t3_encap(struct sge_qset *qs, struct mbuf **m)
1379 struct sge_txq *txq;
1380 struct txq_state txqs;
1381 struct port_info *pi;
1382 unsigned int ndesc, flits, cntrl, mlen;
1383 int err, nsegs, tso_info = 0;
1385 struct work_request_hdr *wrp;
1386 struct tx_sw_desc *txsd;
1387 struct sg_ent *sgp, *sgl;
1388 uint32_t wr_hi, wr_lo, sgl_flits;
1389 bus_dma_segment_t segs[TX_MAX_SEGS];
1391 struct tx_desc *txd;
1395 txq = &qs->txq[TXQ_ETH];
1396 txd = &txq->desc[txq->pidx];
1397 txsd = &txq->sdesc[txq->pidx];
1403 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1404 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1406 mtx_assert(&qs->lock, MA_OWNED);
1407 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1408 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1410 #ifdef VLAN_SUPPORTED
1411 if (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1412 m0->m_pkthdr.csum_flags & (CSUM_TSO))
1413 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1415 if (m0->m_nextpkt != NULL) {
1416 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1420 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1421 &m0, segs, &nsegs))) {
1423 printf("failed ... err=%d\n", err);
1426 mlen = m0->m_pkthdr.len;
1427 ndesc = calc_tx_descs(m0, nsegs);
1429 txq_prod(txq, ndesc, &txqs);
1431 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1434 if (m0->m_nextpkt != NULL) {
1435 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1439 panic("trying to coalesce %d packets in to one WR", nsegs);
1440 txq->txq_coalesced += nsegs;
1441 wrp = (struct work_request_hdr *)txd;
1442 flits = nsegs*2 + 1;
1444 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1445 struct cpl_tx_pkt_batch_entry *cbe;
1447 uint32_t *hflit = (uint32_t *)&flit;
1448 int cflags = m0->m_pkthdr.csum_flags;
1450 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1451 GET_VTAG(cntrl, m0);
1452 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1453 if (__predict_false(!(cflags & CSUM_IP)))
1454 cntrl |= F_TXPKT_IPCSUM_DIS;
1455 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
1456 cntrl |= F_TXPKT_L4CSUM_DIS;
1458 hflit[0] = htonl(cntrl);
1459 hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1460 flit |= htobe64(1 << 24);
1461 cbe = &cpl_batch->pkt_entry[i];
1462 cbe->cntrl = hflit[0];
1463 cbe->len = hflit[1];
1464 cbe->addr = htobe64(segs[i].ds_addr);
1467 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1468 V_WR_SGLSFLT(flits)) |
1469 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1470 wr_lo = htonl(V_WR_LEN(flits) |
1471 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1472 set_wr_hdr(wrp, wr_hi, wr_lo);
1474 wr_gen2(txd, txqs.gen);
1475 check_ring_tx_db(sc, txq);
1477 } else if (tso_info) {
1478 int min_size = TCPPKTHDRSIZE, eth_type, tagged;
1479 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1485 GET_VTAG(cntrl, m0);
1486 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1487 hdr->cntrl = htonl(cntrl);
1488 hdr->len = htonl(mlen | 0x80000000);
1490 DPRINTF("tso buf len=%d\n", mlen);
1492 tagged = m0->m_flags & M_VLANTAG;
1494 min_size -= ETHER_VLAN_ENCAP_LEN;
1496 if (__predict_false(mlen < min_size)) {
1497 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1498 m0, mlen, m0->m_pkthdr.tso_segsz,
1499 m0->m_pkthdr.csum_flags, m0->m_flags);
1500 panic("tx tso packet too small");
1503 /* Make sure that ether, ip, tcp headers are all in m0 */
1504 if (__predict_false(m0->m_len < min_size)) {
1505 m0 = m_pullup(m0, min_size);
1506 if (__predict_false(m0 == NULL)) {
1507 /* XXX panic probably an overreaction */
1508 panic("couldn't fit header into mbuf");
1511 pkthdr = m0->m_data;
1514 eth_type = CPL_ETH_II_VLAN;
1515 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1516 ETHER_VLAN_ENCAP_LEN);
1518 eth_type = CPL_ETH_II;
1519 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1521 tcp = (struct tcphdr *)((uint8_t *)ip +
1524 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1525 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1526 V_LSO_TCPHDR_WORDS(tcp->th_off);
1527 hdr->lso_info = htonl(tso_info);
1529 if (__predict_false(mlen <= PIO_LEN)) {
1530 /* pkt not undersized but fits in PIO_LEN
1531 * Indicates a TSO bug at the higher levels.
1534 DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1535 m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
1537 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1538 flits = (mlen + 7) / 8 + 3;
1539 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1540 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1541 F_WR_SOP | F_WR_EOP | txqs.compl);
1542 wr_lo = htonl(V_WR_LEN(flits) |
1543 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1544 set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1546 wr_gen2(txd, txqs.gen);
1547 check_ring_tx_db(sc, txq);
1552 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1554 GET_VTAG(cntrl, m0);
1555 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1556 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1557 cntrl |= F_TXPKT_IPCSUM_DIS;
1558 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1559 cntrl |= F_TXPKT_L4CSUM_DIS;
1560 cpl->cntrl = htonl(cntrl);
1561 cpl->len = htonl(mlen | 0x80000000);
1563 if (mlen <= PIO_LEN) {
1565 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1566 flits = (mlen + 7) / 8 + 2;
1568 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1569 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1570 F_WR_SOP | F_WR_EOP | txqs.compl);
1571 wr_lo = htonl(V_WR_LEN(flits) |
1572 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1573 set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1575 wr_gen2(txd, txqs.gen);
1576 check_ring_tx_db(sc, txq);
1581 wrp = (struct work_request_hdr *)txd;
1582 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1583 make_sgl(sgp, segs, nsegs);
1585 sgl_flits = sgl_len(nsegs);
1587 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1588 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1589 wr_lo = htonl(V_WR_TID(txq->token));
1590 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1591 sgl_flits, wr_hi, wr_lo);
1592 check_ring_tx_db(pi->adapter, txq);
1598 cxgb_tx_watchdog(void *arg)
1600 struct sge_qset *qs = arg;
1601 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1603 if (qs->coalescing != 0 &&
1604 (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1607 else if (qs->coalescing == 0 &&
1608 (txq->in_use >= cxgb_tx_coalesce_enable_start))
1610 if (TXQ_TRYLOCK(qs)) {
1611 qs->qs_flags |= QS_FLUSHING;
1612 cxgb_start_locked(qs);
1613 qs->qs_flags &= ~QS_FLUSHING;
1616 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
1617 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1618 qs, txq->txq_watchdog.c_cpu);
1622 cxgb_tx_timeout(void *arg)
1624 struct sge_qset *qs = arg;
1625 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1627 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1629 if (TXQ_TRYLOCK(qs)) {
1630 qs->qs_flags |= QS_TIMEOUT;
1631 cxgb_start_locked(qs);
1632 qs->qs_flags &= ~QS_TIMEOUT;
1638 cxgb_start_locked(struct sge_qset *qs)
1640 struct mbuf *m_head = NULL;
1641 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1643 int in_use_init = txq->in_use;
1644 struct port_info *pi = qs->port;
1645 struct ifnet *ifp = pi->ifp;
1646 avail = txq->size - txq->in_use - 4;
1647 txmax = min(TX_START_MAX_DESC, avail);
1649 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1650 reclaim_completed_tx(qs, 0, TXQ_ETH);
1652 if (!pi->link_config.link_ok) {
1656 TXQ_LOCK_ASSERT(qs);
1657 while ((txq->in_use - in_use_init < txmax) &&
1658 !TXQ_RING_EMPTY(qs) &&
1659 (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
1660 pi->link_config.link_ok) {
1661 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1663 if ((m_head = cxgb_dequeue(qs)) == NULL)
1666 * Encapsulation can modify our pointer, and or make it
1667 * NULL on failure. In that event, we can't requeue.
1669 if (t3_encap(qs, &m_head) || m_head == NULL)
1672 /* Send a copy of the frame to the BPF listener */
1673 ETHER_BPF_MTAP(ifp, m_head);
1676 * We sent via PIO, no longer need a copy
1678 if (m_head->m_nextpkt == NULL &&
1679 m_head->m_pkthdr.len <= PIO_LEN)
1684 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1685 pi->link_config.link_ok)
1686 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1687 qs, txq->txq_timer.c_cpu);
1693 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
1695 struct port_info *pi = qs->port;
1696 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1697 struct buf_ring *br = txq->txq_mr;
1700 avail = txq->size - txq->in_use;
1701 TXQ_LOCK_ASSERT(qs);
1704 * We can only do a direct transmit if the following are true:
1705 * - we aren't coalescing (ring < 3/4 full)
1706 * - the link is up -- checked in caller
1707 * - there are no packets enqueued already
1708 * - there is space in hardware transmit queue
1710 if (check_pkt_coalesce(qs) == 0 &&
1711 TXQ_RING_EMPTY(qs) && avail > 4) {
1712 if (t3_encap(qs, &m)) {
1714 (error = drbr_enqueue(ifp, br, m)) != 0)
1718 * We've bypassed the buf ring so we need to update
1719 * the stats directly
1721 txq->txq_direct_packets++;
1722 txq->txq_direct_bytes += m->m_pkthdr.len;
1724 ** Send a copy of the frame to the BPF
1725 ** listener and set the watchdog on.
1727 ETHER_BPF_MTAP(ifp, m);
1729 * We sent via PIO, no longer need a copy
1731 if (m->m_pkthdr.len <= PIO_LEN)
1735 } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1738 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1739 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1740 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1741 cxgb_start_locked(qs);
1742 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1743 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1744 qs, txq->txq_timer.c_cpu);
1749 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
1751 struct sge_qset *qs;
1752 struct port_info *pi = ifp->if_softc;
1753 int error, qidx = pi->first_qset;
1755 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
1756 ||(!pi->link_config.link_ok)) {
1761 if (m->m_flags & M_FLOWID)
1762 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1764 qs = &pi->adapter->sge.qs[qidx];
1766 if (TXQ_TRYLOCK(qs)) {
1768 error = cxgb_transmit_locked(ifp, qs, m);
1771 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1775 cxgb_start(struct ifnet *ifp)
1777 struct port_info *pi = ifp->if_softc;
1778 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset];
1780 if (!pi->link_config.link_ok)
1784 cxgb_start_locked(qs);
1789 cxgb_qflush(struct ifnet *ifp)
1792 * flush any enqueued mbufs in the buf_rings
1793 * and in the transmit queues
1800 * write_imm - write a packet into a Tx descriptor as immediate data
1801 * @d: the Tx descriptor to write
1803 * @len: the length of packet data to write as immediate data
1804 * @gen: the generation bit value to write
1806 * Writes a packet as immediate data into a Tx descriptor. The packet
1807 * contains a work request at its beginning. We must write the packet
1808 * carefully so the SGE doesn't read accidentally before it's written in
1811 static __inline void
1812 write_imm(struct tx_desc *d, struct mbuf *m,
1813 unsigned int len, unsigned int gen)
1815 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1816 struct work_request_hdr *to = (struct work_request_hdr *)d;
1817 uint32_t wr_hi, wr_lo;
1820 panic("len too big %d\n", len);
1821 if (len < sizeof(*from))
1822 panic("len too small %d", len);
1824 memcpy(&to[1], &from[1], len - sizeof(*from));
1825 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1826 V_WR_BCNTLFLT(len & 7));
1827 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
1828 V_WR_LEN((len + 7) / 8));
1829 set_wr_hdr(to, wr_hi, wr_lo);
1834 * This check is a hack we should really fix the logic so
1835 * that this can't happen
1837 if (m->m_type != MT_DONTFREE)
1843 * check_desc_avail - check descriptor availability on a send queue
1844 * @adap: the adapter
1846 * @m: the packet needing the descriptors
1847 * @ndesc: the number of Tx descriptors needed
1848 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1850 * Checks if the requested number of Tx descriptors is available on an
1851 * SGE send queue. If the queue is already suspended or not enough
1852 * descriptors are available the packet is queued for later transmission.
1853 * Must be called with the Tx queue locked.
1855 * Returns 0 if enough descriptors are available, 1 if there aren't
1856 * enough descriptors and the packet has been queued, and 2 if the caller
1857 * needs to retry because there weren't enough descriptors at the
1858 * beginning of the call but some freed up in the mean time.
1861 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1862 struct mbuf *m, unsigned int ndesc,
1866 * XXX We currently only use this for checking the control queue
1867 * the control queue is only used for binding qsets which happens
1868 * at init time so we are guaranteed enough descriptors
1870 if (__predict_false(!mbufq_empty(&q->sendq))) {
1871 addq_exit: mbufq_tail(&q->sendq, m);
1874 if (__predict_false(q->size - q->in_use < ndesc)) {
1876 struct sge_qset *qs = txq_to_qset(q, qid);
1878 setbit(&qs->txq_stopped, qid);
1879 if (should_restart_tx(q) &&
1880 test_and_clear_bit(qid, &qs->txq_stopped))
1891 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1892 * @q: the SGE control Tx queue
1894 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1895 * that send only immediate data (presently just the control queues) and
1896 * thus do not have any mbufs
1898 static __inline void
1899 reclaim_completed_tx_imm(struct sge_txq *q)
1901 unsigned int reclaim = q->processed - q->cleaned;
1903 q->in_use -= reclaim;
1904 q->cleaned += reclaim;
1908 immediate(const struct mbuf *m)
1910 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1914 * ctrl_xmit - send a packet through an SGE control Tx queue
1915 * @adap: the adapter
1916 * @q: the control queue
1919 * Send a packet through an SGE control Tx queue. Packets sent through
1920 * a control queue must fit entirely as immediate data in a single Tx
1921 * descriptor and have no page fragments.
1924 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1927 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1928 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1930 if (__predict_false(!immediate(m))) {
1935 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1936 wrp->wrh_lo = htonl(V_WR_TID(q->token));
1939 again: reclaim_completed_tx_imm(q);
1941 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1942 if (__predict_false(ret)) {
1949 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1952 if (++q->pidx >= q->size) {
1958 t3_write_reg(adap, A_SG_KDOORBELL,
1959 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1965 * restart_ctrlq - restart a suspended control queue
1966 * @qs: the queue set cotaining the control queue
1968 * Resumes transmission on a suspended Tx control queue.
1971 restart_ctrlq(void *data, int npending)
1974 struct sge_qset *qs = (struct sge_qset *)data;
1975 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1976 adapter_t *adap = qs->port->adapter;
1979 again: reclaim_completed_tx_imm(q);
1981 while (q->in_use < q->size &&
1982 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1984 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1986 if (++q->pidx >= q->size) {
1992 if (!mbufq_empty(&q->sendq)) {
1993 setbit(&qs->txq_stopped, TXQ_CTRL);
1995 if (should_restart_tx(q) &&
1996 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
2001 t3_write_reg(adap, A_SG_KDOORBELL,
2002 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2007 * Send a management message through control queue 0
2010 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
2012 return ctrl_xmit(adap, &adap->sge.qs[0], m);
2016 * free_qset - free the resources of an SGE queue set
2017 * @sc: the controller owning the queue set
2020 * Release the HW and SW resources associated with an SGE queue set, such
2021 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
2022 * queue set must be quiesced prior to calling this.
2025 t3_free_qset(adapter_t *sc, struct sge_qset *q)
2029 reclaim_completed_tx(q, 0, TXQ_ETH);
2030 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2031 if (q->txq[i].txq_mr != NULL)
2032 buf_ring_free(q->txq[i].txq_mr, M_DEVBUF);
2033 if (q->txq[i].txq_ifq != NULL) {
2034 ifq_delete(q->txq[i].txq_ifq);
2035 free(q->txq[i].txq_ifq, M_DEVBUF);
2039 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2040 if (q->fl[i].desc) {
2041 mtx_lock_spin(&sc->sge.reg_lock);
2042 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2043 mtx_unlock_spin(&sc->sge.reg_lock);
2044 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2045 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2047 bus_dma_tag_destroy(q->fl[i].desc_tag);
2048 bus_dma_tag_destroy(q->fl[i].entry_tag);
2050 if (q->fl[i].sdesc) {
2051 free_rx_bufs(sc, &q->fl[i]);
2052 free(q->fl[i].sdesc, M_DEVBUF);
2056 mtx_unlock(&q->lock);
2057 MTX_DESTROY(&q->lock);
2058 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2059 if (q->txq[i].desc) {
2060 mtx_lock_spin(&sc->sge.reg_lock);
2061 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2062 mtx_unlock_spin(&sc->sge.reg_lock);
2063 bus_dmamap_unload(q->txq[i].desc_tag,
2064 q->txq[i].desc_map);
2065 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2066 q->txq[i].desc_map);
2067 bus_dma_tag_destroy(q->txq[i].desc_tag);
2068 bus_dma_tag_destroy(q->txq[i].entry_tag);
2070 if (q->txq[i].sdesc) {
2071 free(q->txq[i].sdesc, M_DEVBUF);
2076 mtx_lock_spin(&sc->sge.reg_lock);
2077 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2078 mtx_unlock_spin(&sc->sge.reg_lock);
2080 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2081 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2083 bus_dma_tag_destroy(q->rspq.desc_tag);
2084 MTX_DESTROY(&q->rspq.lock);
2087 #ifdef LRO_SUPPORTED
2088 tcp_lro_free(&q->lro.ctrl);
2091 bzero(q, sizeof(*q));
2095 * t3_free_sge_resources - free SGE resources
2096 * @sc: the adapter softc
2098 * Frees resources used by the SGE queue sets.
2101 t3_free_sge_resources(adapter_t *sc)
2105 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2106 nqsets += sc->port[i].nqsets;
2108 for (i = 0; i < nqsets; ++i) {
2109 TXQ_LOCK(&sc->sge.qs[i]);
2110 t3_free_qset(sc, &sc->sge.qs[i]);
2116 * t3_sge_start - enable SGE
2117 * @sc: the controller softc
2119 * Enables the SGE for DMAs. This is the last step in starting packet
2123 t3_sge_start(adapter_t *sc)
2125 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2129 * t3_sge_stop - disable SGE operation
2132 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2133 * from error interrupts) or from normal process context. In the latter
2134 * case it also disables any pending queue restart tasklets. Note that
2135 * if it is called in interrupt context it cannot disable the restart
2136 * tasklets as it cannot wait, however the tasklets will have no effect
2137 * since the doorbells are disabled and the driver will call this again
2138 * later from process context, at which time the tasklets will be stopped
2139 * if they are still running.
2142 t3_sge_stop(adapter_t *sc)
2146 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2151 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2152 nqsets += sc->port[i].nqsets;
2158 for (i = 0; i < nqsets; ++i) {
2159 struct sge_qset *qs = &sc->sge.qs[i];
2161 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2162 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2168 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
2169 * @adapter: the adapter
2170 * @q: the Tx queue to reclaim descriptors from
2171 * @reclaimable: the number of descriptors to reclaim
2172 * @m_vec_size: maximum number of buffers to reclaim
2173 * @desc_reclaimed: returns the number of descriptors reclaimed
2175 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2176 * Tx buffers. Called with the Tx queue lock held.
2178 * Returns number of buffers of reclaimed
2181 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2183 struct tx_sw_desc *txsd;
2184 unsigned int cidx, mask;
2185 struct sge_txq *q = &qs->txq[queue];
2188 T3_TRACE2(sc->tb[q->cntxt_id & 7],
2189 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2193 txsd = &q->sdesc[cidx];
2195 mtx_assert(&qs->lock, MA_OWNED);
2196 while (reclaimable--) {
2197 prefetch(q->sdesc[(cidx + 1) & mask].m);
2198 prefetch(q->sdesc[(cidx + 2) & mask].m);
2200 if (txsd->m != NULL) {
2201 if (txsd->flags & TX_SW_DESC_MAPPED) {
2202 bus_dmamap_unload(q->entry_tag, txsd->map);
2203 txsd->flags &= ~TX_SW_DESC_MAPPED;
2205 m_freem_list(txsd->m);
2211 if (++cidx == q->size) {
2221 * is_new_response - check if a response is newly written
2222 * @r: the response descriptor
2223 * @q: the response queue
2225 * Returns true if a response descriptor contains a yet unprocessed
2229 is_new_response(const struct rsp_desc *r,
2230 const struct sge_rspq *q)
2232 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2235 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2236 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2237 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2238 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2239 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2241 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2242 #define NOMEM_INTR_DELAY 2500
2245 * write_ofld_wr - write an offload work request
2246 * @adap: the adapter
2247 * @m: the packet to send
2249 * @pidx: index of the first Tx descriptor to write
2250 * @gen: the generation value to use
2251 * @ndesc: number of descriptors the packet will occupy
2253 * Write an offload work request to send the supplied packet. The packet
2254 * data already carry the work request with most fields populated.
2257 write_ofld_wr(adapter_t *adap, struct mbuf *m,
2258 struct sge_txq *q, unsigned int pidx,
2259 unsigned int gen, unsigned int ndesc,
2260 bus_dma_segment_t *segs, unsigned int nsegs)
2262 unsigned int sgl_flits, flits;
2263 struct work_request_hdr *from;
2264 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
2265 struct tx_desc *d = &q->desc[pidx];
2266 struct txq_state txqs;
2268 if (immediate(m) && nsegs == 0) {
2269 write_imm(d, m, m->m_len, gen);
2273 /* Only TX_DATA builds SGLs */
2274 from = mtod(m, struct work_request_hdr *);
2275 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
2277 flits = m->m_len / 8;
2278 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
2280 make_sgl(sgp, segs, nsegs);
2281 sgl_flits = sgl_len(nsegs);
2287 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
2288 from->wrh_hi, from->wrh_lo);
2292 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
2295 * Returns the number of Tx descriptors needed for the given offload
2296 * packet. These packets are already fully constructed.
2298 static __inline unsigned int
2299 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2301 unsigned int flits, cnt = 0;
2304 if (m->m_len <= WR_LEN && nsegs == 0)
2305 return (1); /* packet fits as immediate data */
2308 * This needs to be re-visited for TOE
2314 flits = m->m_len / 8;
2316 ndescs = flits_to_desc(flits + sgl_len(cnt));
2322 * ofld_xmit - send a packet through an offload queue
2323 * @adap: the adapter
2324 * @q: the Tx offload queue
2327 * Send an offload packet through an SGE offload queue.
2330 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2334 unsigned int pidx, gen;
2335 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2336 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2337 struct tx_sw_desc *stx;
2339 nsegs = m_get_sgllen(m);
2340 vsegs = m_get_sgl(m);
2341 ndesc = calc_tx_descs_ofld(m, nsegs);
2342 busdma_map_sgl(vsegs, segs, nsegs);
2344 stx = &q->sdesc[q->pidx];
2347 again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
2348 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2349 if (__predict_false(ret)) {
2351 printf("no ofld desc avail\n");
2353 m_set_priority(m, ndesc); /* save for restart */
2364 if (q->pidx >= q->size) {
2369 T3_TRACE5(adap->tb[q->cntxt_id & 7],
2370 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2371 ndesc, pidx, skb->len, skb->len - skb->data_len,
2372 skb_shinfo(skb)->nr_frags);
2376 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2377 check_ring_tx_db(adap, q);
2382 * restart_offloadq - restart a suspended offload queue
2383 * @qs: the queue set cotaining the offload queue
2385 * Resumes transmission on a suspended Tx offload queue.
2388 restart_offloadq(void *data, int npending)
2391 struct sge_qset *qs = data;
2392 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2393 adapter_t *adap = qs->port->adapter;
2394 bus_dma_segment_t segs[TX_MAX_SEGS];
2395 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2399 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
2401 while ((m = mbufq_peek(&q->sendq)) != NULL) {
2402 unsigned int gen, pidx;
2403 unsigned int ndesc = m_get_priority(m);
2405 if (__predict_false(q->size - q->in_use < ndesc)) {
2406 setbit(&qs->txq_stopped, TXQ_OFLD);
2407 if (should_restart_tx(q) &&
2408 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2418 if (q->pidx >= q->size) {
2423 (void)mbufq_dequeue(&q->sendq);
2424 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2426 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2430 set_bit(TXQ_RUNNING, &q->flags);
2431 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2435 t3_write_reg(adap, A_SG_KDOORBELL,
2436 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2440 * queue_set - return the queue set a packet should use
2443 * Maps a packet to the SGE queue set it should use. The desired queue
2444 * set is carried in bits 1-3 in the packet's priority.
2447 queue_set(const struct mbuf *m)
2449 return m_get_priority(m) >> 1;
2453 * is_ctrl_pkt - return whether an offload packet is a control packet
2456 * Determines whether an offload packet should use an OFLD or a CTRL
2457 * Tx queue. This is indicated by bit 0 in the packet's priority.
2460 is_ctrl_pkt(const struct mbuf *m)
2462 return m_get_priority(m) & 1;
2466 * t3_offload_tx - send an offload packet
2467 * @tdev: the offload device to send to
2470 * Sends an offload packet. We use the packet priority to select the
2471 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2472 * should be sent as regular or control, bits 1-3 select the queue set.
2475 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2477 adapter_t *adap = tdev2adap(tdev);
2478 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2480 if (__predict_false(is_ctrl_pkt(m)))
2481 return ctrl_xmit(adap, qs, m);
2483 return ofld_xmit(adap, qs, m);
2487 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2488 * @tdev: the offload device that will be receiving the packets
2489 * @q: the SGE response queue that assembled the bundle
2490 * @m: the partial bundle
2491 * @n: the number of packets in the bundle
2493 * Delivers a (partial) bundle of Rx offload packets to an offload device.
2495 static __inline void
2496 deliver_partial_bundle(struct t3cdev *tdev,
2498 struct mbuf *mbufs[], int n)
2501 q->offload_bundles++;
2502 cxgb_ofld_recv(tdev, mbufs, n);
2507 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2508 struct mbuf *m, struct mbuf *rx_gather[],
2509 unsigned int gather_idx)
2513 m->m_pkthdr.header = mtod(m, void *);
2514 rx_gather[gather_idx++] = m;
2515 if (gather_idx == RX_BUNDLE_SIZE) {
2516 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2518 rq->offload_bundles++;
2520 return (gather_idx);
2524 restart_tx(struct sge_qset *qs)
2526 struct adapter *sc = qs->port->adapter;
2529 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2530 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2531 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2532 qs->txq[TXQ_OFLD].restarts++;
2533 DPRINTF("restarting TXQ_OFLD\n");
2534 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2536 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2537 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2538 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2539 qs->txq[TXQ_CTRL].in_use);
2541 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2542 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2543 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2544 qs->txq[TXQ_CTRL].restarts++;
2545 DPRINTF("restarting TXQ_CTRL\n");
2546 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2551 * t3_sge_alloc_qset - initialize an SGE queue set
2552 * @sc: the controller softc
2553 * @id: the queue set id
2554 * @nports: how many Ethernet ports will be using this queue set
2555 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2556 * @p: configuration parameters for this queue set
2557 * @ntxq: number of Tx queues for the queue set
2558 * @pi: port info for queue set
2560 * Allocate resources and initialize an SGE queue set. A queue set
2561 * comprises a response queue, two Rx free-buffer queues, and up to 3
2562 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2563 * queue, offload queue, and control queue.
2566 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2567 const struct qset_params *p, int ntxq, struct port_info *pi)
2569 struct sge_qset *q = &sc->sge.qs[id];
2572 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2575 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2577 if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2578 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2579 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2582 if ((q->txq[i].txq_ifq =
2583 malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO))
2585 device_printf(sc->dev, "failed to allocate ifq\n");
2588 ifq_init(q->txq[i].txq_ifq, pi->ifp);
2589 callout_init(&q->txq[i].txq_timer, 1);
2590 callout_init(&q->txq[i].txq_watchdog, 1);
2591 q->txq[i].txq_timer.c_cpu = id % mp_ncpus;
2592 q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus;
2594 init_qset_cntxt(q, id);
2596 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2597 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2598 &q->fl[0].desc, &q->fl[0].sdesc,
2599 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2600 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2601 printf("error %d from alloc ring fl0\n", ret);
2605 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2606 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2607 &q->fl[1].desc, &q->fl[1].sdesc,
2608 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2609 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2610 printf("error %d from alloc ring fl1\n", ret);
2614 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2615 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2616 &q->rspq.desc_tag, &q->rspq.desc_map,
2617 NULL, NULL)) != 0) {
2618 printf("error %d from alloc ring rspq\n", ret);
2622 for (i = 0; i < ntxq; ++i) {
2623 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2625 if ((ret = alloc_ring(sc, p->txq_size[i],
2626 sizeof(struct tx_desc), sz,
2627 &q->txq[i].phys_addr, &q->txq[i].desc,
2628 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2629 &q->txq[i].desc_map,
2630 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2631 printf("error %d from alloc ring tx %i\n", ret, i);
2634 mbufq_init(&q->txq[i].sendq);
2636 q->txq[i].size = p->txq_size[i];
2639 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2640 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2641 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2642 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2644 q->fl[0].gen = q->fl[1].gen = 1;
2645 q->fl[0].size = p->fl_size;
2646 q->fl[1].size = p->jumbo_size;
2650 q->rspq.size = p->rspq_size;
2652 q->txq[TXQ_ETH].stop_thres = nports *
2653 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2655 q->fl[0].buf_size = MCLBYTES;
2656 q->fl[0].zone = zone_pack;
2657 q->fl[0].type = EXT_PACKET;
2658 #if __FreeBSD_version > 800000
2659 if (cxgb_use_16k_clusters) {
2660 q->fl[1].buf_size = MJUM16BYTES;
2661 q->fl[1].zone = zone_jumbo16;
2662 q->fl[1].type = EXT_JUMBO16;
2664 q->fl[1].buf_size = MJUM9BYTES;
2665 q->fl[1].zone = zone_jumbo9;
2666 q->fl[1].type = EXT_JUMBO9;
2669 q->fl[1].buf_size = MJUMPAGESIZE;
2670 q->fl[1].zone = zone_jumbop;
2671 q->fl[1].type = EXT_JUMBOP;
2674 #ifdef LRO_SUPPORTED
2675 /* Allocate and setup the lro_ctrl structure */
2676 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2677 ret = tcp_lro_init(&q->lro.ctrl);
2679 printf("error %d from tcp_lro_init\n", ret);
2682 q->lro.ctrl.ifp = pi->ifp;
2685 mtx_lock_spin(&sc->sge.reg_lock);
2686 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2687 q->rspq.phys_addr, q->rspq.size,
2688 q->fl[0].buf_size, 1, 0);
2690 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2694 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2695 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2696 q->fl[i].phys_addr, q->fl[i].size,
2697 q->fl[i].buf_size, p->cong_thres, 1,
2700 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2705 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2706 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2707 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2710 printf("error %d from t3_sge_init_ecntxt\n", ret);
2715 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2716 USE_GTS, SGE_CNTXT_OFLD, id,
2717 q->txq[TXQ_OFLD].phys_addr,
2718 q->txq[TXQ_OFLD].size, 0, 1, 0);
2720 printf("error %d from t3_sge_init_ecntxt\n", ret);
2726 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2728 q->txq[TXQ_CTRL].phys_addr,
2729 q->txq[TXQ_CTRL].size,
2730 q->txq[TXQ_CTRL].token, 1, 0);
2732 printf("error %d from t3_sge_init_ecntxt\n", ret);
2737 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2738 device_get_unit(sc->dev), irq_vec_idx);
2739 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2741 mtx_unlock_spin(&sc->sge.reg_lock);
2742 t3_update_qset_coalesce(q, p);
2745 refill_fl(sc, &q->fl[0], q->fl[0].size);
2746 refill_fl(sc, &q->fl[1], q->fl[1].size);
2747 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2749 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2750 V_NEWTIMER(q->rspq.holdoff_tmr));
2755 mtx_unlock_spin(&sc->sge.reg_lock);
2758 t3_free_qset(sc, q);
2764 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2765 * ethernet data. Hardware assistance with various checksums and any vlan tag
2766 * will also be taken into account here.
2769 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2771 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2772 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2773 struct ifnet *ifp = pi->ifp;
2775 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2777 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2778 cpl->csum_valid && cpl->csum == 0xffff) {
2779 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2780 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2781 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2782 m->m_pkthdr.csum_data = 0xffff;
2785 * XXX need to add VLAN support for 6.x
2787 #ifdef VLAN_SUPPORTED
2788 if (__predict_false(cpl->vlan_valid)) {
2789 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2790 m->m_flags |= M_VLANTAG;
2794 m->m_pkthdr.rcvif = ifp;
2795 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2797 * adjust after conversion to mbuf chain
2799 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2800 m->m_len -= (sizeof(*cpl) + ethpad);
2801 m->m_data += (sizeof(*cpl) + ethpad);
2805 * get_packet - return the next ingress packet buffer from a free list
2806 * @adap: the adapter that received the packet
2807 * @drop_thres: # of remaining buffers before we start dropping packets
2808 * @qs: the qset that the SGE free list holding the packet belongs to
2809 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2810 * @r: response descriptor
2812 * Get the next packet from a free list and complete setup of the
2813 * sk_buff. If the packet is small we make a copy and recycle the
2814 * original buffer, otherwise we use the original buffer itself. If a
2815 * positive drop threshold is supplied packets are dropped and their
2816 * buffers recycled if (a) the number of remaining buffers is under the
2817 * threshold and the packet is too big to copy, or (b) the packet should
2818 * be copied but there is no memory for the copy.
2821 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2822 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2825 unsigned int len_cq = ntohl(r->len_cq);
2826 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2827 int mask, cidx = fl->cidx;
2828 struct rx_sw_desc *sd = &fl->sdesc[cidx];
2829 uint32_t len = G_RSPD_LEN(len_cq);
2830 uint32_t flags = M_EXT;
2831 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2836 mask = fl->size - 1;
2837 prefetch(fl->sdesc[(cidx + 1) & mask].m);
2838 prefetch(fl->sdesc[(cidx + 2) & mask].m);
2839 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2840 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2843 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2845 if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2846 sopeop == RSPQ_SOP_EOP) {
2847 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2849 cl = mtod(m, void *);
2850 memcpy(cl, sd->rxsd_cl, len);
2851 recycle_rx_buf(adap, fl, fl->cidx);
2852 m->m_pkthdr.len = m->m_len = len;
2854 mh->mh_head = mh->mh_tail = m;
2859 bus_dmamap_unload(fl->entry_tag, sd->map);
2863 if ((sopeop == RSPQ_SOP_EOP) ||
2864 (sopeop == RSPQ_SOP))
2866 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
2867 if (fl->zone == zone_pack) {
2869 * restore clobbered data pointer
2871 m->m_data = m->m_ext.ext_buf;
2873 m_cljset(m, cl, fl->type);
2882 mh->mh_head = mh->mh_tail = m;
2883 m->m_pkthdr.len = len;
2888 case RSPQ_NSOP_NEOP:
2889 if (mh->mh_tail == NULL) {
2890 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2894 mh->mh_tail->m_next = m;
2896 mh->mh_head->m_pkthdr.len += len;
2900 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2902 if (++fl->cidx == fl->size)
2909 * handle_rsp_cntrl_info - handles control information in a response
2910 * @qs: the queue set corresponding to the response
2911 * @flags: the response control flags
2913 * Handles the control information of an SGE response, such as GTS
2914 * indications and completion credits for the queue set's Tx queues.
2915 * HW coalesces credits, we don't do any extra SW coalescing.
2917 static __inline void
2918 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2920 unsigned int credits;
2923 if (flags & F_RSPD_TXQ0_GTS)
2924 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2926 credits = G_RSPD_TXQ0_CR(flags);
2928 qs->txq[TXQ_ETH].processed += credits;
2930 credits = G_RSPD_TXQ2_CR(flags);
2932 qs->txq[TXQ_CTRL].processed += credits;
2935 if (flags & F_RSPD_TXQ1_GTS)
2936 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2938 credits = G_RSPD_TXQ1_CR(flags);
2940 qs->txq[TXQ_OFLD].processed += credits;
2945 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2946 unsigned int sleeping)
2952 * process_responses - process responses from an SGE response queue
2953 * @adap: the adapter
2954 * @qs: the queue set to which the response queue belongs
2955 * @budget: how many responses can be processed in this round
2957 * Process responses from an SGE response queue up to the supplied budget.
2958 * Responses include received packets as well as credits and other events
2959 * for the queues that belong to the response queue's queue set.
2960 * A negative budget is effectively unlimited.
2962 * Additionally choose the interrupt holdoff time for the next interrupt
2963 * on this queue. If the system is under memory shortage use a fairly
2964 * long delay to help recovery.
2967 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2969 struct sge_rspq *rspq = &qs->rspq;
2970 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2971 int budget_left = budget;
2972 unsigned int sleeping = 0;
2973 #ifdef LRO_SUPPORTED
2974 int lro_enabled = qs->lro.enabled;
2976 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2978 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2981 static int last_holdoff = 0;
2982 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2983 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2984 last_holdoff = rspq->holdoff_tmr;
2987 rspq->next_holdoff = rspq->holdoff_tmr;
2989 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2990 int eth, eop = 0, ethpad = 0;
2991 uint32_t flags = ntohl(r->flags);
2992 uint32_t rss_csum = *(const uint32_t *)r;
2993 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2995 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2997 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
3001 printf("async notification\n");
3003 if (rspq->rspq_mh.mh_head == NULL) {
3004 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
3005 m = rspq->rspq_mh.mh_head;
3007 m = m_gethdr(M_DONTWAIT, MT_DATA);
3012 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
3013 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
3014 *mtod(m, char *) = CPL_ASYNC_NOTIF;
3015 rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
3017 rspq->async_notif++;
3019 } else if (flags & F_RSPD_IMM_DATA_VALID) {
3020 struct mbuf *m = NULL;
3022 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
3023 r->rss_hdr.opcode, rspq->cidx);
3024 if (rspq->rspq_mh.mh_head == NULL)
3025 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
3027 m = m_gethdr(M_DONTWAIT, MT_DATA);
3029 if (rspq->rspq_mh.mh_head == NULL && m == NULL) {
3031 rspq->next_holdoff = NOMEM_INTR_DELAY;
3035 get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
3038 } else if (r->len_cq) {
3039 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
3041 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
3043 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID;
3044 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash;
3052 if (flags & RSPD_CTRL_MASK) {
3053 sleeping |= flags & RSPD_GTS_MASK;
3054 handle_rsp_cntrl_info(qs, flags);
3058 if (__predict_false(++rspq->cidx == rspq->size)) {
3064 if (++rspq->credits >= (rspq->size / 4)) {
3065 refill_rspq(adap, rspq, rspq->credits);
3069 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
3073 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
3076 ngathered = rx_offload(&adap->tdev, rspq,
3077 rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
3078 rspq->rspq_mh.mh_head = NULL;
3079 DPRINTF("received offload packet\n");
3081 } else if (eth && eop) {
3082 struct mbuf *m = rspq->rspq_mh.mh_head;
3084 t3_rx_eth(adap, rspq, m, ethpad);
3086 #ifdef LRO_SUPPORTED
3088 * The T304 sends incoming packets on any qset. If LRO
3089 * is also enabled, we could end up sending packet up
3090 * lro_ctrl->ifp's input. That is incorrect.
3092 * The mbuf's rcvif was derived from the cpl header and
3093 * is accurate. Skip LRO and just use that.
3095 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
3097 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro &&
3098 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) {
3099 /* successfully queue'd for LRO */
3104 * LRO not enabled, packet unsuitable for LRO,
3105 * or unable to queue. Pass it up right now in
3108 struct ifnet *ifp = m->m_pkthdr.rcvif;
3109 (*ifp->if_input)(ifp, m);
3111 rspq->rspq_mh.mh_head = NULL;
3114 __refill_fl_lt(adap, &qs->fl[0], 32);
3115 __refill_fl_lt(adap, &qs->fl[1], 32);
3119 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
3121 #ifdef LRO_SUPPORTED
3123 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
3124 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
3125 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
3126 tcp_lro_flush(lro_ctrl, queued);
3131 check_ring_db(adap, qs, sleeping);
3133 mb(); /* commit Tx queue processed updates */
3134 if (__predict_false(qs->txq_stopped > 1))
3137 __refill_fl_lt(adap, &qs->fl[0], 512);
3138 __refill_fl_lt(adap, &qs->fl[1], 512);
3139 budget -= budget_left;
3144 * A helper function that processes responses and issues GTS.
3147 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3150 static int last_holdoff = 0;
3152 work = process_responses(adap, rspq_to_qset(rq), -1);
3154 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3155 printf("next_holdoff=%d\n", rq->next_holdoff);
3156 last_holdoff = rq->next_holdoff;
3158 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3159 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3166 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3167 * Handles data events from SGE response queues as well as error and other
3168 * async events as they all use the same interrupt pin. We use one SGE
3169 * response queue per port in this mode and protect all response queues with
3173 t3b_intr(void *data)
3176 adapter_t *adap = data;
3177 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3179 t3_write_reg(adap, A_PL_CLI, 0);
3180 map = t3_read_reg(adap, A_SG_DATA_INTR);
3185 if (__predict_false(map & F_ERRINTR))
3186 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3188 mtx_lock(&q0->lock);
3189 for_each_port(adap, i)
3191 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3192 mtx_unlock(&q0->lock);
3196 * The MSI interrupt handler. This needs to handle data events from SGE
3197 * response queues as well as error and other async events as they all use
3198 * the same MSI vector. We use one SGE response queue per port in this mode
3199 * and protect all response queues with queue 0's lock.
3202 t3_intr_msi(void *data)
3204 adapter_t *adap = data;
3205 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3206 int i, new_packets = 0;
3208 mtx_lock(&q0->lock);
3210 for_each_port(adap, i)
3211 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3213 mtx_unlock(&q0->lock);
3214 if (new_packets == 0)
3215 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3219 t3_intr_msix(void *data)
3221 struct sge_qset *qs = data;
3222 adapter_t *adap = qs->port->adapter;
3223 struct sge_rspq *rspq = &qs->rspq;
3225 if (process_responses_gts(adap, rspq) == 0)
3226 rspq->unhandled_irqs++;
3229 #define QDUMP_SBUF_SIZE 32 * 400
3231 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3233 struct sge_rspq *rspq;
3234 struct sge_qset *qs;
3235 int i, err, dump_end, idx;
3236 static int multiplier = 1;
3238 struct rsp_desc *rspd;
3242 qs = rspq_to_qset(rspq);
3243 if (rspq->rspq_dump_count == 0)
3245 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3247 "dump count is too large %d\n", rspq->rspq_dump_count);
3248 rspq->rspq_dump_count = 0;
3251 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3253 "dump start of %d is greater than queue size\n",
3254 rspq->rspq_dump_start);
3255 rspq->rspq_dump_start = 0;
3258 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3262 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3264 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3265 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3266 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3267 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3268 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3270 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3271 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3273 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3274 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3275 idx = i & (RSPQ_Q_SIZE-1);
3277 rspd = &rspq->desc[idx];
3278 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3279 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3280 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3281 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3282 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3283 be32toh(rspd->len_cq), rspd->intr_gen);
3285 if (sbuf_overflowed(sb)) {
3291 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3297 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3299 struct sge_txq *txq;
3300 struct sge_qset *qs;
3301 int i, j, err, dump_end;
3302 static int multiplier = 1;
3304 struct tx_desc *txd;
3305 uint32_t *WR, wr_hi, wr_lo, gen;
3309 qs = txq_to_qset(txq, TXQ_ETH);
3310 if (txq->txq_dump_count == 0) {
3313 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3315 "dump count is too large %d\n", txq->txq_dump_count);
3316 txq->txq_dump_count = 1;
3319 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3321 "dump start of %d is greater than queue size\n",
3322 txq->txq_dump_start);
3323 txq->txq_dump_start = 0;
3326 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3332 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3334 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3335 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3336 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3337 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3338 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3339 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3340 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3341 txq->txq_dump_start,
3342 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3344 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3345 for (i = txq->txq_dump_start; i < dump_end; i++) {
3346 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3347 WR = (uint32_t *)txd->flit;
3348 wr_hi = ntohl(WR[0]);
3349 wr_lo = ntohl(WR[1]);
3350 gen = G_WR_GEN(wr_lo);
3352 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3354 for (j = 2; j < 30; j += 4)
3355 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3356 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3359 if (sbuf_overflowed(sb)) {
3365 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3371 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3373 struct sge_txq *txq;
3374 struct sge_qset *qs;
3375 int i, j, err, dump_end;
3376 static int multiplier = 1;
3378 struct tx_desc *txd;
3379 uint32_t *WR, wr_hi, wr_lo, gen;
3382 qs = txq_to_qset(txq, TXQ_CTRL);
3383 if (txq->txq_dump_count == 0) {
3386 if (txq->txq_dump_count > 256) {
3388 "dump count is too large %d\n", txq->txq_dump_count);
3389 txq->txq_dump_count = 1;
3392 if (txq->txq_dump_start > 255) {
3394 "dump start of %d is greater than queue size\n",
3395 txq->txq_dump_start);
3396 txq->txq_dump_start = 0;
3401 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3402 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3403 txq->txq_dump_start,
3404 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3406 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3407 for (i = txq->txq_dump_start; i < dump_end; i++) {
3408 txd = &txq->desc[i & (255)];
3409 WR = (uint32_t *)txd->flit;
3410 wr_hi = ntohl(WR[0]);
3411 wr_lo = ntohl(WR[1]);
3412 gen = G_WR_GEN(wr_lo);
3414 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3416 for (j = 2; j < 30; j += 4)
3417 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3418 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3421 if (sbuf_overflowed(sb)) {
3427 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3433 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3435 adapter_t *sc = arg1;
3436 struct qset_params *qsp = &sc->params.sge.qset[0];
3438 struct sge_qset *qs;
3439 int i, j, err, nqsets = 0;
3442 if ((sc->flags & FULL_INIT_DONE) == 0)
3445 coalesce_usecs = qsp->coalesce_usecs;
3446 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3451 if (coalesce_usecs == qsp->coalesce_usecs)
3454 for (i = 0; i < sc->params.nports; i++)
3455 for (j = 0; j < sc->port[i].nqsets; j++)
3458 coalesce_usecs = max(1, coalesce_usecs);
3460 for (i = 0; i < nqsets; i++) {
3461 qs = &sc->sge.qs[i];
3462 qsp = &sc->params.sge.qset[i];
3463 qsp->coalesce_usecs = coalesce_usecs;
3465 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3466 &sc->sge.qs[0].rspq.lock;
3469 t3_update_qset_coalesce(qs, qsp);
3470 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3471 V_NEWTIMER(qs->rspq.holdoff_tmr));
3480 t3_add_attach_sysctls(adapter_t *sc)
3482 struct sysctl_ctx_list *ctx;
3483 struct sysctl_oid_list *children;
3485 ctx = device_get_sysctl_ctx(sc->dev);
3486 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3488 /* random information */
3489 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3491 CTLFLAG_RD, &sc->fw_version,
3492 0, "firmware version");
3493 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3495 CTLFLAG_RD, &sc->params.rev,
3497 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3499 CTLFLAG_RD, &sc->port_types,
3500 0, "type of ports");
3501 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3503 CTLFLAG_RW, &cxgb_debug,
3504 0, "enable verbose debugging output");
3505 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3506 CTLFLAG_RD, &sc->tunq_coalesce,
3507 "#tunneled packets freed");
3508 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3510 CTLFLAG_RD, &txq_fills,
3511 0, "#times txq overrun");
3515 static const char *rspq_name = "rspq";
3516 static const char *txq_names[] =
3524 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3526 struct port_info *p = arg1;
3532 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3534 t3_mac_update_stats(&p->mac);
3537 return (sysctl_handle_quad(oidp, parg, 0, req));
3541 t3_add_configured_sysctls(adapter_t *sc)
3543 struct sysctl_ctx_list *ctx;
3544 struct sysctl_oid_list *children;
3547 ctx = device_get_sysctl_ctx(sc->dev);
3548 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3552 CTLTYPE_INT|CTLFLAG_RW, sc,
3553 0, t3_set_coalesce_usecs,
3554 "I", "interrupt coalescing timer (us)");
3556 for (i = 0; i < sc->params.nports; i++) {
3557 struct port_info *pi = &sc->port[i];
3558 struct sysctl_oid *poid;
3559 struct sysctl_oid_list *poidlist;
3560 struct mac_stats *mstats = &pi->mac.stats;
3562 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3563 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3564 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3565 poidlist = SYSCTL_CHILDREN(poid);
3566 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3567 "nqsets", CTLFLAG_RD, &pi->nqsets,
3570 for (j = 0; j < pi->nqsets; j++) {
3571 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3572 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3573 *ctrlqpoid, *lropoid;
3574 struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3575 *txqpoidlist, *ctrlqpoidlist,
3577 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3579 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3581 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3582 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3583 qspoidlist = SYSCTL_CHILDREN(qspoid);
3585 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3586 CTLFLAG_RD, &qs->fl[0].empty, 0,
3587 "freelist #0 empty");
3588 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3589 CTLFLAG_RD, &qs->fl[1].empty, 0,
3590 "freelist #1 empty");
3592 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3593 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3594 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3596 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3597 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3598 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3600 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3601 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3602 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3604 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3605 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3606 lropoidlist = SYSCTL_CHILDREN(lropoid);
3608 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3609 CTLFLAG_RD, &qs->rspq.size,
3610 0, "#entries in response queue");
3611 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3612 CTLFLAG_RD, &qs->rspq.cidx,
3613 0, "consumer index");
3614 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3615 CTLFLAG_RD, &qs->rspq.credits,
3617 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3618 CTLFLAG_RD, &qs->rspq.phys_addr,
3619 "physical_address_of the queue");
3620 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3621 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3622 0, "start rspq dump entry");
3623 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3624 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3625 0, "#rspq entries to dump");
3626 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3627 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3628 0, t3_dump_rspq, "A", "dump of the response queue");
3631 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3632 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3633 0, "#tunneled packets dropped");
3634 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3635 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3636 0, "#tunneled packets waiting to be sent");
3638 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3639 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3640 0, "#tunneled packets queue producer index");
3641 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3642 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3643 0, "#tunneled packets queue consumer index");
3645 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3646 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3647 0, "#tunneled packets processed by the card");
3648 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3649 CTLFLAG_RD, &txq->cleaned,
3650 0, "#tunneled packets cleaned");
3651 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3652 CTLFLAG_RD, &txq->in_use,
3653 0, "#tunneled packet slots in use");
3654 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3655 CTLFLAG_RD, &txq->txq_frees,
3656 "#tunneled packets freed");
3657 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3658 CTLFLAG_RD, &txq->txq_skipped,
3659 0, "#tunneled packet descriptors skipped");
3660 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3661 CTLFLAG_RD, &txq->txq_coalesced,
3662 "#tunneled packets coalesced");
3663 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3664 CTLFLAG_RD, &txq->txq_enqueued,
3665 0, "#tunneled packets enqueued to hardware");
3666 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3667 CTLFLAG_RD, &qs->txq_stopped,
3668 0, "tx queues stopped");
3669 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3670 CTLFLAG_RD, &txq->phys_addr,
3671 "physical_address_of the queue");
3672 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3673 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3674 0, "txq generation");
3675 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3676 CTLFLAG_RD, &txq->cidx,
3677 0, "hardware queue cidx");
3678 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3679 CTLFLAG_RD, &txq->pidx,
3680 0, "hardware queue pidx");
3681 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3682 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3683 0, "txq start idx for dump");
3684 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3685 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3686 0, "txq #entries to dump");
3687 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3688 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3689 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3691 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3692 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3693 0, "ctrlq start idx for dump");
3694 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3695 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3696 0, "ctrl #entries to dump");
3697 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3698 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3699 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3701 #ifdef LRO_SUPPORTED
3702 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3703 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3704 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3705 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3706 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3707 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3708 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3709 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3713 /* Now add a node for mac stats. */
3714 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3715 CTLFLAG_RD, NULL, "MAC statistics");
3716 poidlist = SYSCTL_CHILDREN(poid);
3719 * We (ab)use the length argument (arg2) to pass on the offset
3720 * of the data that we are interested in. This is only required
3721 * for the quad counters that are updated from the hardware (we
3722 * make sure that we return the latest value).
3723 * sysctl_handle_macstat first updates *all* the counters from
3724 * the hardware, and then returns the latest value of the
3725 * requested counter. Best would be to update only the
3726 * requested counter from hardware, but t3_mac_update_stats()
3727 * hides all the register details and we don't want to dive into
3730 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3731 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3732 sysctl_handle_macstat, "QU", 0)
3733 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3734 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3735 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3736 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3737 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3738 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3739 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3740 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3741 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3742 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3743 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3744 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3745 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3746 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3747 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3748 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3749 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3750 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3751 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3752 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3753 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3754 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3755 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3756 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3757 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3758 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3759 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3760 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3761 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3762 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3763 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3764 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3765 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3766 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3767 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3768 CXGB_SYSCTL_ADD_QUAD(rx_short);
3769 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3770 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3771 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3772 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3773 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3774 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3775 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3776 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3777 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3778 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3779 #undef CXGB_SYSCTL_ADD_QUAD
3781 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3782 CTLFLAG_RD, &mstats->a, 0)
3783 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3784 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3785 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3786 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3787 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3788 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3789 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3790 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3791 CXGB_SYSCTL_ADD_ULONG(num_resets);
3792 CXGB_SYSCTL_ADD_ULONG(link_faults);
3793 #undef CXGB_SYSCTL_ADD_ULONG
3798 * t3_get_desc - dump an SGE descriptor for debugging purposes
3799 * @qs: the queue set
3800 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3801 * @idx: the descriptor index in the queue
3802 * @data: where to dump the descriptor contents
3804 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3805 * size of the descriptor.
3808 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3809 unsigned char *data)
3815 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3817 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3818 return sizeof(struct tx_desc);
3822 if (!qs->rspq.desc || idx >= qs->rspq.size)
3824 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3825 return sizeof(struct rsp_desc);
3829 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3831 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3832 return sizeof(struct rx_desc);