1 /**************************************************************************
3 Copyright (c) 2007-2009, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
51 #include <sys/sched.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 #include <sys/socket.h>
58 #include <net/ethernet.h>
60 #include <net/if_vlan_var.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/in.h>
64 #include <netinet/ip.h>
65 #include <netinet/tcp.h>
67 #include <dev/pci/pcireg.h>
68 #include <dev/pci/pcivar.h>
73 #include <cxgb_include.h>
77 int multiq_tx_enable = 1;
79 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
80 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
81 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
82 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
83 "size of per-queue mbuf ring");
85 static int cxgb_tx_coalesce_force = 0;
86 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
87 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
88 &cxgb_tx_coalesce_force, 0,
89 "coalesce small packets into a single work request regardless of ring state");
91 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1
92 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
93 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2
94 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5
95 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5
96 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2
97 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6
100 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
102 &cxgb_tx_coalesce_enable_start);
103 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
104 &cxgb_tx_coalesce_enable_start, 0,
105 "coalesce enable threshold");
106 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
107 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
108 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
109 &cxgb_tx_coalesce_enable_stop, 0,
110 "coalesce disable threshold");
111 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
112 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
113 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
114 &cxgb_tx_reclaim_threshold, 0,
115 "tx cleaning minimum threshold");
118 * XXX don't re-enable this until TOE stops assuming
121 static int recycle_enable = 0;
123 extern int cxgb_use_16k_clusters;
124 extern int nmbjumbop;
125 extern int nmbjumbo9;
126 extern int nmbjumbo16;
130 #define SGE_RX_SM_BUF_SIZE 1536
131 #define SGE_RX_DROP_THRES 16
132 #define SGE_RX_COPY_THRES 128
135 * Period of the Tx buffer reclaim timer. This timer does not need to run
136 * frequently as Tx buffers are usually reclaimed by new Tx packets.
138 #define TX_RECLAIM_PERIOD (hz >> 1)
141 * Values for sge_txq.flags
144 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
145 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
149 uint64_t flit[TX_DESC_FLITS];
159 struct rsp_desc { /* response queue descriptor */
160 struct rss_header rss_hdr;
163 uint8_t imm_data[47];
167 #define RX_SW_DESC_MAP_CREATED (1 << 0)
168 #define TX_SW_DESC_MAP_CREATED (1 << 1)
169 #define RX_SW_DESC_INUSE (1 << 3)
170 #define TX_SW_DESC_MAPPED (1 << 4)
172 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
173 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
174 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
175 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
177 struct tx_sw_desc { /* SW state per Tx descriptor */
183 struct rx_sw_desc { /* SW state per Rx descriptor */
196 struct refill_fl_cb_arg {
198 bus_dma_segment_t seg;
204 * Maps a number of flits to the number of Tx descriptors that can hold them.
207 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
209 * HW allows up to 4 descriptors to be combined into a WR.
211 static uint8_t flit_desc_map[] = {
213 #if SGE_NUM_GENBITS == 1
214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
215 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
216 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
217 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
218 #elif SGE_NUM_GENBITS == 2
219 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
220 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
221 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
222 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
224 # error "SGE_NUM_GENBITS must be 1 or 2"
228 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED)
229 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock)
230 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock)
231 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock)
232 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
233 #define TXQ_RING_NEEDS_ENQUEUE(qs) \
234 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
235 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
236 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \
237 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
238 #define TXQ_RING_DEQUEUE(qs) \
239 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
243 static void sge_timer_cb(void *arg);
244 static void sge_timer_reclaim(void *arg, int ncount);
245 static void sge_txq_reclaim_handler(void *arg, int ncount);
246 static void cxgb_start_locked(struct sge_qset *qs);
249 * XXX need to cope with bursty scheduling by looking at a wider
250 * window than we are now for determining the need for coalescing
253 static __inline uint64_t
254 check_pkt_coalesce(struct sge_qset *qs)
260 if (__predict_false(cxgb_tx_coalesce_force))
262 txq = &qs->txq[TXQ_ETH];
263 sc = qs->port->adapter;
264 fill = &sc->tunq_fill[qs->idx];
266 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
267 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
268 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
269 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
271 * if the hardware transmit queue is more than 1/8 full
272 * we mark it as coalescing - we drop back from coalescing
273 * when we go below 1/32 full and there are no packets enqueued,
274 * this provides us with some degree of hysteresis
276 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
277 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
279 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
282 return (sc->tunq_coalesce);
287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
290 #if _BYTE_ORDER == _LITTLE_ENDIAN
292 wr_hilo |= (((uint64_t)wr_lo)<<32);
295 wr_hilo |= (((uint64_t)wr_hi)<<32);
297 wrp->wrh_hilo = wr_hilo;
301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
310 struct coalesce_info {
316 coalesce_check(struct mbuf *m, void *arg)
318 struct coalesce_info *ci = arg;
319 int *count = &ci->count;
320 int *nbytes = &ci->nbytes;
322 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
323 (*count < 7) && (m->m_next == NULL))) {
332 cxgb_dequeue(struct sge_qset *qs)
334 struct mbuf *m, *m_head, *m_tail;
335 struct coalesce_info ci;
338 if (check_pkt_coalesce(qs) == 0)
339 return TXQ_RING_DEQUEUE(qs);
341 m_head = m_tail = NULL;
342 ci.count = ci.nbytes = 0;
344 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
345 if (m_head == NULL) {
347 } else if (m != NULL) {
348 m_tail->m_nextpkt = m;
353 panic("trying to coalesce %d packets in to one WR", ci.count);
358 * reclaim_completed_tx - reclaims completed Tx descriptors
359 * @adapter: the adapter
360 * @q: the Tx queue to reclaim completed descriptors from
362 * Reclaims Tx descriptors that the SGE has indicated it has processed,
363 * and frees the associated buffers if possible. Called with the Tx
367 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
369 struct sge_txq *q = &qs->txq[queue];
370 int reclaim = desc_reclaimable(q);
372 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
373 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
374 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
376 if (reclaim < reclaim_min)
379 mtx_assert(&qs->lock, MA_OWNED);
381 t3_free_tx_desc(qs, reclaim, queue);
382 q->cleaned += reclaim;
383 q->in_use -= reclaim;
385 if (isset(&qs->txq_stopped, TXQ_ETH))
386 clrbit(&qs->txq_stopped, TXQ_ETH);
392 * should_restart_tx - are there enough resources to restart a Tx queue?
395 * Checks if there are enough descriptors to restart a suspended Tx queue.
398 should_restart_tx(const struct sge_txq *q)
400 unsigned int r = q->processed - q->cleaned;
402 return q->in_use - r < (q->size >> 1);
406 * t3_sge_init - initialize SGE
408 * @p: the SGE parameters
410 * Performs SGE initialization needed every time after a chip reset.
411 * We do not initialize any of the queue sets here, instead the driver
412 * top-level must request those individually. We also do not enable DMA
413 * here, that should be done after the queues have been set up.
416 t3_sge_init(adapter_t *adap, struct sge_params *p)
420 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
422 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
423 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
424 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
425 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
426 #if SGE_NUM_GENBITS == 1
427 ctrl |= F_EGRGENCTRL;
429 if (adap->params.rev > 0) {
430 if (!(adap->flags & (USING_MSIX | USING_MSI)))
431 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
433 t3_write_reg(adap, A_SG_CONTROL, ctrl);
434 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
435 V_LORCQDRBTHRSH(512));
436 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
437 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
438 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
439 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
440 adap->params.rev < T3_REV_C ? 1000 : 500);
441 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
442 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
443 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
444 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
445 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
450 * sgl_len - calculates the size of an SGL of the given capacity
451 * @n: the number of SGL entries
453 * Calculates the number of flits needed for a scatter/gather list that
454 * can hold the given number of entries.
456 static __inline unsigned int
457 sgl_len(unsigned int n)
459 return ((3 * n) / 2 + (n & 1));
463 * get_imm_packet - return the next ingress packet buffer from a response
464 * @resp: the response descriptor containing the packet data
466 * Return a packet containing the immediate data of the given response.
469 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
472 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
473 m->m_ext.ext_buf = NULL;
474 m->m_ext.ext_type = 0;
475 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
479 static __inline u_int
480 flits_to_desc(u_int n)
482 return (flit_desc_map[n]);
485 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
486 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
487 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
488 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
490 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
491 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
495 * t3_sge_err_intr_handler - SGE async event interrupt handler
496 * @adapter: the adapter
498 * Interrupt handler for SGE asynchronous (non-data) events.
501 t3_sge_err_intr_handler(adapter_t *adapter)
503 unsigned int v, status;
505 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
506 if (status & SGE_PARERR)
507 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
508 status & SGE_PARERR);
509 if (status & SGE_FRAMINGERR)
510 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
511 status & SGE_FRAMINGERR);
512 if (status & F_RSPQCREDITOVERFOW)
513 CH_ALERT(adapter, "SGE response queue credit overflow\n");
515 if (status & F_RSPQDISABLED) {
516 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
519 "packet delivered to disabled response queue (0x%x)\n",
520 (v >> S_RSPQ0DISABLED) & 0xff);
523 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
524 if (status & SGE_FATALERR)
525 t3_fatal_err(adapter);
529 t3_sge_prep(adapter_t *adap, struct sge_params *p)
531 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
533 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
534 nqsets *= adap->params.nports;
536 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
538 while (!powerof2(fl_q_size))
541 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
544 #if __FreeBSD_version >= 700111
546 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
547 jumbo_buf_size = MJUM16BYTES;
549 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
550 jumbo_buf_size = MJUM9BYTES;
553 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
554 jumbo_buf_size = MJUMPAGESIZE;
556 while (!powerof2(jumbo_q_size))
559 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
560 device_printf(adap->dev,
561 "Insufficient clusters and/or jumbo buffers.\n");
563 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
565 for (i = 0; i < SGE_QSETS; ++i) {
566 struct qset_params *q = p->qset + i;
568 if (adap->params.nports > 2) {
569 q->coalesce_usecs = 50;
572 q->coalesce_usecs = 10;
574 q->coalesce_usecs = 5;
578 q->rspq_size = RSPQ_Q_SIZE;
579 q->fl_size = fl_q_size;
580 q->jumbo_size = jumbo_q_size;
581 q->jumbo_buf_size = jumbo_buf_size;
582 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
583 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
584 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
590 t3_sge_alloc(adapter_t *sc)
593 /* The parent tag. */
594 if (bus_dma_tag_create( NULL, /* parent */
595 1, 0, /* algnmnt, boundary */
596 BUS_SPACE_MAXADDR, /* lowaddr */
597 BUS_SPACE_MAXADDR, /* highaddr */
598 NULL, NULL, /* filter, filterarg */
599 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
600 BUS_SPACE_UNRESTRICTED, /* nsegments */
601 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
603 NULL, NULL, /* lock, lockarg */
605 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
610 * DMA tag for normal sized RX frames
612 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
613 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
614 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
615 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
620 * DMA tag for jumbo sized RX frames.
622 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
623 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
624 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
625 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
630 * DMA tag for TX frames.
632 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
633 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
634 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
635 NULL, NULL, &sc->tx_dmat)) {
636 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
644 t3_sge_free(struct adapter * sc)
647 if (sc->tx_dmat != NULL)
648 bus_dma_tag_destroy(sc->tx_dmat);
650 if (sc->rx_jumbo_dmat != NULL)
651 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
653 if (sc->rx_dmat != NULL)
654 bus_dma_tag_destroy(sc->rx_dmat);
656 if (sc->parent_dmat != NULL)
657 bus_dma_tag_destroy(sc->parent_dmat);
663 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
666 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
667 qs->rspq.polling = 0 /* p->polling */;
670 #if !defined(__i386__) && !defined(__amd64__)
672 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
674 struct refill_fl_cb_arg *cb_arg = arg;
676 cb_arg->error = error;
677 cb_arg->seg = segs[0];
683 * refill_fl - refill an SGE free-buffer list
684 * @sc: the controller softc
685 * @q: the free-list to refill
686 * @n: the number of new buffers to allocate
688 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
689 * The caller must assure that @n does not exceed the queue's capacity.
692 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
694 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
695 struct rx_desc *d = &q->desc[q->pidx];
696 struct refill_fl_cb_arg cb_arg;
704 * We only allocate a cluster, mbuf allocation happens after rx
706 if (q->zone == zone_pack) {
707 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
709 cl = m->m_ext.ext_buf;
711 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
713 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
714 uma_zfree(q->zone, cl);
718 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
719 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
720 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
721 uma_zfree(q->zone, cl);
724 sd->flags |= RX_SW_DESC_MAP_CREATED;
726 #if !defined(__i386__) && !defined(__amd64__)
727 err = bus_dmamap_load(q->entry_tag, sd->map,
728 cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
730 if (err != 0 || cb_arg.error) {
731 if (q->zone == zone_pack)
732 uma_zfree(q->zone, cl);
737 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
739 sd->flags |= RX_SW_DESC_INUSE;
742 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
743 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
744 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
745 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
750 if (++q->pidx == q->size) {
761 if (q->db_pending >= 32) {
763 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
769 * free_rx_bufs - free the Rx buffers on an SGE free list
770 * @sc: the controle softc
771 * @q: the SGE free list to clean up
773 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
774 * this queue should be stopped before calling this function.
777 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
779 u_int cidx = q->cidx;
781 while (q->credits--) {
782 struct rx_sw_desc *d = &q->sdesc[cidx];
784 if (d->flags & RX_SW_DESC_INUSE) {
785 bus_dmamap_unload(q->entry_tag, d->map);
786 bus_dmamap_destroy(q->entry_tag, d->map);
787 if (q->zone == zone_pack) {
788 m_init(d->m, zone_pack, MCLBYTES,
789 M_NOWAIT, MT_DATA, M_EXT);
790 uma_zfree(zone_pack, d->m);
792 m_init(d->m, zone_mbuf, MLEN,
793 M_NOWAIT, MT_DATA, 0);
794 uma_zfree(zone_mbuf, d->m);
795 uma_zfree(q->zone, d->rxsd_cl);
801 if (++cidx == q->size)
807 __refill_fl(adapter_t *adap, struct sge_fl *fl)
809 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
813 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
815 uint32_t reclaimable = fl->size - fl->credits;
818 refill_fl(adap, fl, min(max, reclaimable));
822 * recycle_rx_buf - recycle a receive buffer
823 * @adapter: the adapter
824 * @q: the SGE free list
825 * @idx: index of buffer to recycle
827 * Recycles the specified buffer on the given free list by adding it at
828 * the next available slot on the list.
831 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
833 struct rx_desc *from = &q->desc[idx];
834 struct rx_desc *to = &q->desc[q->pidx];
836 q->sdesc[q->pidx] = q->sdesc[idx];
837 to->addr_lo = from->addr_lo; // already big endian
838 to->addr_hi = from->addr_hi; // likewise
839 wmb(); /* necessary ? */
840 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
841 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
844 if (++q->pidx == q->size) {
848 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
852 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
857 *addr = segs[0].ds_addr;
861 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
862 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
863 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
865 size_t len = nelem * elem_size;
870 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
871 BUS_SPACE_MAXADDR_32BIT,
872 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
873 len, 0, NULL, NULL, tag)) != 0) {
874 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
878 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
880 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
884 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
889 len = nelem * sw_size;
890 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
893 if (parent_entry_tag == NULL)
896 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
897 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
898 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
899 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
900 NULL, NULL, entry_tag)) != 0) {
901 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
908 sge_slow_intr_handler(void *arg, int ncount)
912 t3_slow_intr_handler(sc);
913 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
914 (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
918 * sge_timer_cb - perform periodic maintenance of an SGE qset
919 * @data: the SGE queue set to maintain
921 * Runs periodically from a timer to perform maintenance of an SGE queue
922 * set. It performs two tasks:
924 * a) Cleans up any completed Tx descriptors that may still be pending.
925 * Normal descriptor cleanup happens when new packets are added to a Tx
926 * queue so this timer is relatively infrequent and does any cleanup only
927 * if the Tx queue has not seen any new packets in a while. We make a
928 * best effort attempt to reclaim descriptors, in that we don't wait
929 * around if we cannot get a queue's lock (which most likely is because
930 * someone else is queueing new packets and so will also handle the clean
931 * up). Since control queues use immediate data exclusively we don't
932 * bother cleaning them up here.
934 * b) Replenishes Rx queues that have run out due to memory shortage.
935 * Normally new Rx buffers are added when existing ones are consumed but
936 * when out of memory a queue can become empty. We try to add only a few
937 * buffers here, the queue will be replenished fully as these new buffers
938 * are used up if memory shortage has subsided.
940 * c) Return coalesced response queue credits in case a response queue is
943 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
944 * fifo overflows and the FW doesn't implement any recovery scheme yet.
947 sge_timer_cb(void *arg)
950 if ((sc->flags & USING_MSIX) == 0) {
952 struct port_info *pi;
956 int reclaim_ofl, refill_rx;
958 if (sc->open_device_map == 0)
961 for (i = 0; i < sc->params.nports; i++) {
963 for (j = 0; j < pi->nqsets; j++) {
964 qs = &sc->sge.qs[pi->first_qset + j];
966 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
967 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
968 (qs->fl[1].credits < qs->fl[1].size));
969 if (reclaim_ofl || refill_rx) {
970 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
977 if (sc->params.nports > 2) {
980 for_each_port(sc, i) {
981 struct port_info *pi = &sc->port[i];
983 t3_write_reg(sc, A_SG_KDOORBELL,
985 (FW_TUNNEL_SGEEC_START + pi->first_qset));
988 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
989 sc->open_device_map != 0)
990 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
994 * This is meant to be a catch-all function to keep sge state private
999 t3_sge_init_adapter(adapter_t *sc)
1001 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
1002 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1003 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
1008 t3_sge_reset_adapter(adapter_t *sc)
1010 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1015 t3_sge_init_port(struct port_info *pi)
1017 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
1022 * refill_rspq - replenish an SGE response queue
1023 * @adapter: the adapter
1024 * @q: the response queue to replenish
1025 * @credits: how many new responses to make available
1027 * Replenishes a response queue by making the supplied number of responses
1030 static __inline void
1031 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1034 /* mbufs are allocated on demand when a rspq entry is processed. */
1035 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1036 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1040 sge_txq_reclaim_handler(void *arg, int ncount)
1042 struct sge_qset *qs = arg;
1045 for (i = 0; i < 3; i++)
1046 reclaim_completed_tx(qs, 16, i);
1050 sge_timer_reclaim(void *arg, int ncount)
1052 struct port_info *pi = arg;
1053 int i, nqsets = pi->nqsets;
1054 adapter_t *sc = pi->adapter;
1055 struct sge_qset *qs;
1058 KASSERT((sc->flags & USING_MSIX) == 0,
1059 ("can't call timer reclaim for msi-x"));
1061 for (i = 0; i < nqsets; i++) {
1062 qs = &sc->sge.qs[pi->first_qset + i];
1064 reclaim_completed_tx(qs, 16, TXQ_OFLD);
1065 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1066 &sc->sge.qs[0].rspq.lock;
1068 if (mtx_trylock(lock)) {
1069 /* XXX currently assume that we are *NOT* polling */
1070 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1072 if (qs->fl[0].credits < qs->fl[0].size - 16)
1073 __refill_fl(sc, &qs->fl[0]);
1074 if (qs->fl[1].credits < qs->fl[1].size - 16)
1075 __refill_fl(sc, &qs->fl[1]);
1077 if (status & (1 << qs->rspq.cntxt_id)) {
1078 if (qs->rspq.credits) {
1079 refill_rspq(sc, &qs->rspq, 1);
1081 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1082 1 << qs->rspq.cntxt_id);
1091 * init_qset_cntxt - initialize an SGE queue set context info
1092 * @qs: the queue set
1093 * @id: the queue set id
1095 * Initializes the TIDs and context ids for the queues of a queue set.
1098 init_qset_cntxt(struct sge_qset *qs, u_int id)
1101 qs->rspq.cntxt_id = id;
1102 qs->fl[0].cntxt_id = 2 * id;
1103 qs->fl[1].cntxt_id = 2 * id + 1;
1104 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1105 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1106 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1107 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1108 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1110 mbufq_init(&qs->txq[TXQ_ETH].sendq);
1111 mbufq_init(&qs->txq[TXQ_OFLD].sendq);
1112 mbufq_init(&qs->txq[TXQ_CTRL].sendq);
1117 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1119 txq->in_use += ndesc;
1121 * XXX we don't handle stopping of queue
1122 * presumably start handles this when we bump against the end
1124 txqs->gen = txq->gen;
1125 txq->unacked += ndesc;
1126 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1128 txqs->pidx = txq->pidx;
1131 if (((txqs->pidx > txq->cidx) &&
1132 (txq->pidx < txqs->pidx) &&
1133 (txq->pidx >= txq->cidx)) ||
1134 ((txqs->pidx < txq->cidx) &&
1135 (txq->pidx >= txq-> cidx)) ||
1136 ((txqs->pidx < txq->cidx) &&
1137 (txq->cidx < txqs->pidx)))
1138 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1139 txqs->pidx, txq->pidx, txq->cidx);
1141 if (txq->pidx >= txq->size) {
1142 txq->pidx -= txq->size;
1149 * calc_tx_descs - calculate the number of Tx descriptors for a packet
1150 * @m: the packet mbufs
1151 * @nsegs: the number of segments
1153 * Returns the number of Tx descriptors needed for the given Ethernet
1154 * packet. Ethernet packets require addition of WR and CPL headers.
1156 static __inline unsigned int
1157 calc_tx_descs(const struct mbuf *m, int nsegs)
1161 if (m->m_pkthdr.len <= PIO_LEN)
1164 flits = sgl_len(nsegs) + 2;
1165 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1168 return flits_to_desc(flits);
1172 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1173 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1176 int err, pktlen, pass = 0;
1177 bus_dma_tag_t tag = txq->entry_tag;
1182 pktlen = m0->m_pkthdr.len;
1183 #if defined(__i386__) || defined(__amd64__)
1184 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
1188 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
1193 if (err == EFBIG && pass == 0) {
1195 /* Too many segments, try to defrag */
1196 m0 = m_defrag(m0, M_DONTWAIT);
1204 } else if (err == ENOMEM) {
1208 printf("map failure err=%d pktlen=%d\n", err, pktlen);
1214 #if !defined(__i386__) && !defined(__amd64__)
1215 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
1217 txsd->flags |= TX_SW_DESC_MAPPED;
1223 * make_sgl - populate a scatter/gather list for a packet
1224 * @sgp: the SGL to populate
1225 * @segs: the packet dma segments
1226 * @nsegs: the number of segments
1228 * Generates a scatter/gather list for the buffers that make up a packet
1229 * and returns the SGL size in 8-byte words. The caller must size the SGL
1232 static __inline void
1233 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1237 for (idx = 0, i = 0; i < nsegs; i++) {
1239 * firmware doesn't like empty segments
1241 if (segs[i].ds_len == 0)
1246 sgp->len[idx] = htobe32(segs[i].ds_len);
1247 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1258 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1259 * @adap: the adapter
1262 * Ring the doorbell if a Tx queue is asleep. There is a natural race,
1263 * where the HW is going to sleep just after we checked, however,
1264 * then the interrupt handler will detect the outstanding TX packet
1265 * and ring the doorbell for us.
1267 * When GTS is disabled we unconditionally ring the doorbell.
1269 static __inline void
1270 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
1273 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1274 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1275 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1277 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1280 t3_write_reg(adap, A_SG_KDOORBELL,
1281 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1284 if (mustring || ++q->db_pending >= 32) {
1285 wmb(); /* write descriptors before telling HW */
1286 t3_write_reg(adap, A_SG_KDOORBELL,
1287 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1293 static __inline void
1294 wr_gen2(struct tx_desc *d, unsigned int gen)
1296 #if SGE_NUM_GENBITS == 2
1297 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1302 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1303 * @ndesc: number of Tx descriptors spanned by the SGL
1304 * @txd: first Tx descriptor to be written
1305 * @txqs: txq state (generation and producer index)
1306 * @txq: the SGE Tx queue
1308 * @flits: number of flits to the start of the SGL in the first descriptor
1309 * @sgl_flits: the SGL size in flits
1310 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1311 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1313 * Write a work request header and an associated SGL. If the SGL is
1314 * small enough to fit into one Tx descriptor it has already been written
1315 * and we just need to write the WR header. Otherwise we distribute the
1316 * SGL across the number of descriptors it spans.
1319 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1320 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1321 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1324 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1325 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1327 if (__predict_true(ndesc == 1)) {
1328 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1329 V_WR_SGLSFLT(flits)) | wr_hi,
1330 htonl(V_WR_LEN(flits + sgl_flits) |
1331 V_WR_GEN(txqs->gen)) | wr_lo);
1333 wr_gen2(txd, txqs->gen);
1336 unsigned int ogen = txqs->gen;
1337 const uint64_t *fp = (const uint64_t *)sgl;
1338 struct work_request_hdr *wp = wrp;
1340 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1341 V_WR_SGLSFLT(flits)) | wr_hi;
1344 unsigned int avail = WR_FLITS - flits;
1346 if (avail > sgl_flits)
1348 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1357 if (++txqs->pidx == txq->size) {
1365 * when the head of the mbuf chain
1366 * is freed all clusters will be freed
1369 wrp = (struct work_request_hdr *)txd;
1370 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1371 V_WR_SGLSFLT(1)) | wr_hi;
1372 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1374 V_WR_GEN(txqs->gen)) | wr_lo;
1375 wr_gen2(txd, txqs->gen);
1378 wrp->wrh_hi |= htonl(F_WR_EOP);
1380 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1381 wr_gen2((struct tx_desc *)wp, ogen);
1385 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1386 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1388 #define GET_VTAG(cntrl, m) \
1390 if ((m)->m_flags & M_VLANTAG) \
1391 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1395 t3_encap(struct sge_qset *qs, struct mbuf **m)
1399 struct sge_txq *txq;
1400 struct txq_state txqs;
1401 struct port_info *pi;
1402 unsigned int ndesc, flits, cntrl, mlen;
1403 int err, nsegs, tso_info = 0;
1405 struct work_request_hdr *wrp;
1406 struct tx_sw_desc *txsd;
1407 struct sg_ent *sgp, *sgl;
1408 uint32_t wr_hi, wr_lo, sgl_flits;
1409 bus_dma_segment_t segs[TX_MAX_SEGS];
1411 struct tx_desc *txd;
1415 txq = &qs->txq[TXQ_ETH];
1416 txd = &txq->desc[txq->pidx];
1417 txsd = &txq->sdesc[txq->pidx];
1423 mtx_assert(&qs->lock, MA_OWNED);
1424 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1425 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1427 if (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1428 m0->m_pkthdr.csum_flags & (CSUM_TSO))
1429 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1431 if (m0->m_nextpkt != NULL) {
1432 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1436 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1437 &m0, segs, &nsegs))) {
1439 printf("failed ... err=%d\n", err);
1442 mlen = m0->m_pkthdr.len;
1443 ndesc = calc_tx_descs(m0, nsegs);
1445 txq_prod(txq, ndesc, &txqs);
1447 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1450 if (m0->m_nextpkt != NULL) {
1451 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1455 panic("trying to coalesce %d packets in to one WR", nsegs);
1456 txq->txq_coalesced += nsegs;
1457 wrp = (struct work_request_hdr *)txd;
1458 flits = nsegs*2 + 1;
1460 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1461 struct cpl_tx_pkt_batch_entry *cbe;
1463 uint32_t *hflit = (uint32_t *)&flit;
1464 int cflags = m0->m_pkthdr.csum_flags;
1466 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1467 GET_VTAG(cntrl, m0);
1468 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1469 if (__predict_false(!(cflags & CSUM_IP)))
1470 cntrl |= F_TXPKT_IPCSUM_DIS;
1471 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
1472 cntrl |= F_TXPKT_L4CSUM_DIS;
1474 hflit[0] = htonl(cntrl);
1475 hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1476 flit |= htobe64(1 << 24);
1477 cbe = &cpl_batch->pkt_entry[i];
1478 cbe->cntrl = hflit[0];
1479 cbe->len = hflit[1];
1480 cbe->addr = htobe64(segs[i].ds_addr);
1483 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1484 V_WR_SGLSFLT(flits)) |
1485 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1486 wr_lo = htonl(V_WR_LEN(flits) |
1487 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1488 set_wr_hdr(wrp, wr_hi, wr_lo);
1490 ETHER_BPF_MTAP(pi->ifp, m0);
1491 wr_gen2(txd, txqs.gen);
1492 check_ring_tx_db(sc, txq, 0);
1494 } else if (tso_info) {
1496 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1497 struct ether_header *eh;
1502 GET_VTAG(cntrl, m0);
1503 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1504 hdr->cntrl = htonl(cntrl);
1505 hdr->len = htonl(mlen | 0x80000000);
1507 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1508 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1509 m0, mlen, m0->m_pkthdr.tso_segsz,
1510 m0->m_pkthdr.csum_flags, m0->m_flags);
1511 panic("tx tso packet too small");
1514 /* Make sure that ether, ip, tcp headers are all in m0 */
1515 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1516 m0 = m_pullup(m0, TCPPKTHDRSIZE);
1517 if (__predict_false(m0 == NULL)) {
1518 /* XXX panic probably an overreaction */
1519 panic("couldn't fit header into mbuf");
1523 eh = mtod(m0, struct ether_header *);
1524 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1525 eth_type = CPL_ETH_II_VLAN;
1526 ip = (struct ip *)((struct ether_vlan_header *)eh + 1);
1528 eth_type = CPL_ETH_II;
1529 ip = (struct ip *)(eh + 1);
1531 tcp = (struct tcphdr *)(ip + 1);
1533 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1534 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1535 V_LSO_TCPHDR_WORDS(tcp->th_off);
1536 hdr->lso_info = htonl(tso_info);
1538 if (__predict_false(mlen <= PIO_LEN)) {
1540 * pkt not undersized but fits in PIO_LEN
1541 * Indicates a TSO bug at the higher levels.
1544 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1545 flits = (mlen + 7) / 8 + 3;
1546 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1547 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1548 F_WR_SOP | F_WR_EOP | txqs.compl);
1549 wr_lo = htonl(V_WR_LEN(flits) |
1550 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1551 set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1553 ETHER_BPF_MTAP(pi->ifp, m0);
1554 wr_gen2(txd, txqs.gen);
1555 check_ring_tx_db(sc, txq, 0);
1561 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1563 GET_VTAG(cntrl, m0);
1564 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1565 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1566 cntrl |= F_TXPKT_IPCSUM_DIS;
1567 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1568 cntrl |= F_TXPKT_L4CSUM_DIS;
1569 cpl->cntrl = htonl(cntrl);
1570 cpl->len = htonl(mlen | 0x80000000);
1572 if (mlen <= PIO_LEN) {
1574 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1575 flits = (mlen + 7) / 8 + 2;
1577 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1578 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1579 F_WR_SOP | F_WR_EOP | txqs.compl);
1580 wr_lo = htonl(V_WR_LEN(flits) |
1581 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1582 set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1584 ETHER_BPF_MTAP(pi->ifp, m0);
1585 wr_gen2(txd, txqs.gen);
1586 check_ring_tx_db(sc, txq, 0);
1592 wrp = (struct work_request_hdr *)txd;
1593 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1594 make_sgl(sgp, segs, nsegs);
1596 sgl_flits = sgl_len(nsegs);
1598 ETHER_BPF_MTAP(pi->ifp, m0);
1600 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1601 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1602 wr_lo = htonl(V_WR_TID(txq->token));
1603 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1604 sgl_flits, wr_hi, wr_lo);
1605 check_ring_tx_db(sc, txq, 0);
1611 cxgb_tx_watchdog(void *arg)
1613 struct sge_qset *qs = arg;
1614 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1616 if (qs->coalescing != 0 &&
1617 (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1620 else if (qs->coalescing == 0 &&
1621 (txq->in_use >= cxgb_tx_coalesce_enable_start))
1623 if (TXQ_TRYLOCK(qs)) {
1624 qs->qs_flags |= QS_FLUSHING;
1625 cxgb_start_locked(qs);
1626 qs->qs_flags &= ~QS_FLUSHING;
1629 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
1630 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1631 qs, txq->txq_watchdog.c_cpu);
1635 cxgb_tx_timeout(void *arg)
1637 struct sge_qset *qs = arg;
1638 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1640 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1642 if (TXQ_TRYLOCK(qs)) {
1643 qs->qs_flags |= QS_TIMEOUT;
1644 cxgb_start_locked(qs);
1645 qs->qs_flags &= ~QS_TIMEOUT;
1651 cxgb_start_locked(struct sge_qset *qs)
1653 struct mbuf *m_head = NULL;
1654 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1655 struct port_info *pi = qs->port;
1656 struct ifnet *ifp = pi->ifp;
1658 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1659 reclaim_completed_tx(qs, 0, TXQ_ETH);
1661 if (!pi->link_config.link_ok) {
1665 TXQ_LOCK_ASSERT(qs);
1666 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
1667 pi->link_config.link_ok) {
1668 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1670 if (txq->size - txq->in_use <= TX_MAX_DESC)
1673 if ((m_head = cxgb_dequeue(qs)) == NULL)
1676 * Encapsulation can modify our pointer, and or make it
1677 * NULL on failure. In that event, we can't requeue.
1679 if (t3_encap(qs, &m_head) || m_head == NULL)
1685 if (txq->db_pending)
1686 check_ring_tx_db(pi->adapter, txq, 1);
1688 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1689 pi->link_config.link_ok)
1690 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1691 qs, txq->txq_timer.c_cpu);
1697 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
1699 struct port_info *pi = qs->port;
1700 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1701 struct buf_ring *br = txq->txq_mr;
1704 avail = txq->size - txq->in_use;
1705 TXQ_LOCK_ASSERT(qs);
1708 * We can only do a direct transmit if the following are true:
1709 * - we aren't coalescing (ring < 3/4 full)
1710 * - the link is up -- checked in caller
1711 * - there are no packets enqueued already
1712 * - there is space in hardware transmit queue
1714 if (check_pkt_coalesce(qs) == 0 &&
1715 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
1716 if (t3_encap(qs, &m)) {
1718 (error = drbr_enqueue(ifp, br, m)) != 0)
1721 if (txq->db_pending)
1722 check_ring_tx_db(pi->adapter, txq, 1);
1725 * We've bypassed the buf ring so we need to update
1726 * the stats directly
1728 txq->txq_direct_packets++;
1729 txq->txq_direct_bytes += m->m_pkthdr.len;
1731 } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1734 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1735 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1736 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1737 cxgb_start_locked(qs);
1738 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1739 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1740 qs, txq->txq_timer.c_cpu);
1745 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
1747 struct sge_qset *qs;
1748 struct port_info *pi = ifp->if_softc;
1749 int error, qidx = pi->first_qset;
1751 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
1752 ||(!pi->link_config.link_ok)) {
1757 if (m->m_flags & M_FLOWID)
1758 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1760 qs = &pi->adapter->sge.qs[qidx];
1762 if (TXQ_TRYLOCK(qs)) {
1764 error = cxgb_transmit_locked(ifp, qs, m);
1767 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1771 cxgb_start(struct ifnet *ifp)
1773 struct port_info *pi = ifp->if_softc;
1774 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset];
1776 if (!pi->link_config.link_ok)
1780 cxgb_start_locked(qs);
1785 cxgb_qflush(struct ifnet *ifp)
1788 * flush any enqueued mbufs in the buf_rings
1789 * and in the transmit queues
1796 * write_imm - write a packet into a Tx descriptor as immediate data
1797 * @d: the Tx descriptor to write
1799 * @len: the length of packet data to write as immediate data
1800 * @gen: the generation bit value to write
1802 * Writes a packet as immediate data into a Tx descriptor. The packet
1803 * contains a work request at its beginning. We must write the packet
1804 * carefully so the SGE doesn't read accidentally before it's written in
1807 static __inline void
1808 write_imm(struct tx_desc *d, struct mbuf *m,
1809 unsigned int len, unsigned int gen)
1811 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1812 struct work_request_hdr *to = (struct work_request_hdr *)d;
1813 uint32_t wr_hi, wr_lo;
1816 panic("len too big %d\n", len);
1817 if (len < sizeof(*from))
1818 panic("len too small %d", len);
1820 memcpy(&to[1], &from[1], len - sizeof(*from));
1821 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1822 V_WR_BCNTLFLT(len & 7));
1823 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
1824 V_WR_LEN((len + 7) / 8));
1825 set_wr_hdr(to, wr_hi, wr_lo);
1830 * This check is a hack we should really fix the logic so
1831 * that this can't happen
1833 if (m->m_type != MT_DONTFREE)
1839 * check_desc_avail - check descriptor availability on a send queue
1840 * @adap: the adapter
1842 * @m: the packet needing the descriptors
1843 * @ndesc: the number of Tx descriptors needed
1844 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1846 * Checks if the requested number of Tx descriptors is available on an
1847 * SGE send queue. If the queue is already suspended or not enough
1848 * descriptors are available the packet is queued for later transmission.
1849 * Must be called with the Tx queue locked.
1851 * Returns 0 if enough descriptors are available, 1 if there aren't
1852 * enough descriptors and the packet has been queued, and 2 if the caller
1853 * needs to retry because there weren't enough descriptors at the
1854 * beginning of the call but some freed up in the mean time.
1857 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1858 struct mbuf *m, unsigned int ndesc,
1862 * XXX We currently only use this for checking the control queue
1863 * the control queue is only used for binding qsets which happens
1864 * at init time so we are guaranteed enough descriptors
1866 if (__predict_false(!mbufq_empty(&q->sendq))) {
1867 addq_exit: mbufq_tail(&q->sendq, m);
1870 if (__predict_false(q->size - q->in_use < ndesc)) {
1872 struct sge_qset *qs = txq_to_qset(q, qid);
1874 setbit(&qs->txq_stopped, qid);
1875 if (should_restart_tx(q) &&
1876 test_and_clear_bit(qid, &qs->txq_stopped))
1887 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1888 * @q: the SGE control Tx queue
1890 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1891 * that send only immediate data (presently just the control queues) and
1892 * thus do not have any mbufs
1894 static __inline void
1895 reclaim_completed_tx_imm(struct sge_txq *q)
1897 unsigned int reclaim = q->processed - q->cleaned;
1899 q->in_use -= reclaim;
1900 q->cleaned += reclaim;
1904 immediate(const struct mbuf *m)
1906 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1910 * ctrl_xmit - send a packet through an SGE control Tx queue
1911 * @adap: the adapter
1912 * @q: the control queue
1915 * Send a packet through an SGE control Tx queue. Packets sent through
1916 * a control queue must fit entirely as immediate data in a single Tx
1917 * descriptor and have no page fragments.
1920 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1923 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1924 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1926 if (__predict_false(!immediate(m))) {
1931 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1932 wrp->wrh_lo = htonl(V_WR_TID(q->token));
1935 again: reclaim_completed_tx_imm(q);
1937 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1938 if (__predict_false(ret)) {
1945 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1948 if (++q->pidx >= q->size) {
1954 t3_write_reg(adap, A_SG_KDOORBELL,
1955 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1961 * restart_ctrlq - restart a suspended control queue
1962 * @qs: the queue set cotaining the control queue
1964 * Resumes transmission on a suspended Tx control queue.
1967 restart_ctrlq(void *data, int npending)
1970 struct sge_qset *qs = (struct sge_qset *)data;
1971 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1972 adapter_t *adap = qs->port->adapter;
1975 again: reclaim_completed_tx_imm(q);
1977 while (q->in_use < q->size &&
1978 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1980 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1982 if (++q->pidx >= q->size) {
1988 if (!mbufq_empty(&q->sendq)) {
1989 setbit(&qs->txq_stopped, TXQ_CTRL);
1991 if (should_restart_tx(q) &&
1992 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1997 t3_write_reg(adap, A_SG_KDOORBELL,
1998 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2003 * Send a management message through control queue 0
2006 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
2008 return ctrl_xmit(adap, &adap->sge.qs[0], m);
2012 * free_qset - free the resources of an SGE queue set
2013 * @sc: the controller owning the queue set
2016 * Release the HW and SW resources associated with an SGE queue set, such
2017 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
2018 * queue set must be quiesced prior to calling this.
2021 t3_free_qset(adapter_t *sc, struct sge_qset *q)
2025 reclaim_completed_tx(q, 0, TXQ_ETH);
2026 if (q->txq[TXQ_ETH].txq_mr != NULL)
2027 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
2028 if (q->txq[TXQ_ETH].txq_ifq != NULL) {
2029 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
2030 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
2033 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2034 if (q->fl[i].desc) {
2035 mtx_lock_spin(&sc->sge.reg_lock);
2036 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2037 mtx_unlock_spin(&sc->sge.reg_lock);
2038 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2039 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2041 bus_dma_tag_destroy(q->fl[i].desc_tag);
2042 bus_dma_tag_destroy(q->fl[i].entry_tag);
2044 if (q->fl[i].sdesc) {
2045 free_rx_bufs(sc, &q->fl[i]);
2046 free(q->fl[i].sdesc, M_DEVBUF);
2050 mtx_unlock(&q->lock);
2051 MTX_DESTROY(&q->lock);
2052 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2053 if (q->txq[i].desc) {
2054 mtx_lock_spin(&sc->sge.reg_lock);
2055 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2056 mtx_unlock_spin(&sc->sge.reg_lock);
2057 bus_dmamap_unload(q->txq[i].desc_tag,
2058 q->txq[i].desc_map);
2059 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2060 q->txq[i].desc_map);
2061 bus_dma_tag_destroy(q->txq[i].desc_tag);
2062 bus_dma_tag_destroy(q->txq[i].entry_tag);
2064 if (q->txq[i].sdesc) {
2065 free(q->txq[i].sdesc, M_DEVBUF);
2070 mtx_lock_spin(&sc->sge.reg_lock);
2071 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2072 mtx_unlock_spin(&sc->sge.reg_lock);
2074 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2075 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2077 bus_dma_tag_destroy(q->rspq.desc_tag);
2078 MTX_DESTROY(&q->rspq.lock);
2082 tcp_lro_free(&q->lro.ctrl);
2085 bzero(q, sizeof(*q));
2089 * t3_free_sge_resources - free SGE resources
2090 * @sc: the adapter softc
2092 * Frees resources used by the SGE queue sets.
2095 t3_free_sge_resources(adapter_t *sc)
2099 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2100 nqsets += sc->port[i].nqsets;
2102 for (i = 0; i < nqsets; ++i) {
2103 TXQ_LOCK(&sc->sge.qs[i]);
2104 t3_free_qset(sc, &sc->sge.qs[i]);
2110 * t3_sge_start - enable SGE
2111 * @sc: the controller softc
2113 * Enables the SGE for DMAs. This is the last step in starting packet
2117 t3_sge_start(adapter_t *sc)
2119 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2123 * t3_sge_stop - disable SGE operation
2126 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2127 * from error interrupts) or from normal process context. In the latter
2128 * case it also disables any pending queue restart tasklets. Note that
2129 * if it is called in interrupt context it cannot disable the restart
2130 * tasklets as it cannot wait, however the tasklets will have no effect
2131 * since the doorbells are disabled and the driver will call this again
2132 * later from process context, at which time the tasklets will be stopped
2133 * if they are still running.
2136 t3_sge_stop(adapter_t *sc)
2140 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2145 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2146 nqsets += sc->port[i].nqsets;
2152 for (i = 0; i < nqsets; ++i) {
2153 struct sge_qset *qs = &sc->sge.qs[i];
2155 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2156 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2162 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
2163 * @adapter: the adapter
2164 * @q: the Tx queue to reclaim descriptors from
2165 * @reclaimable: the number of descriptors to reclaim
2166 * @m_vec_size: maximum number of buffers to reclaim
2167 * @desc_reclaimed: returns the number of descriptors reclaimed
2169 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2170 * Tx buffers. Called with the Tx queue lock held.
2172 * Returns number of buffers of reclaimed
2175 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2177 struct tx_sw_desc *txsd;
2178 unsigned int cidx, mask;
2179 struct sge_txq *q = &qs->txq[queue];
2182 T3_TRACE2(sc->tb[q->cntxt_id & 7],
2183 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2187 txsd = &q->sdesc[cidx];
2189 mtx_assert(&qs->lock, MA_OWNED);
2190 while (reclaimable--) {
2191 prefetch(q->sdesc[(cidx + 1) & mask].m);
2192 prefetch(q->sdesc[(cidx + 2) & mask].m);
2194 if (txsd->m != NULL) {
2195 if (txsd->flags & TX_SW_DESC_MAPPED) {
2196 bus_dmamap_unload(q->entry_tag, txsd->map);
2197 txsd->flags &= ~TX_SW_DESC_MAPPED;
2199 m_freem_list(txsd->m);
2205 if (++cidx == q->size) {
2215 * is_new_response - check if a response is newly written
2216 * @r: the response descriptor
2217 * @q: the response queue
2219 * Returns true if a response descriptor contains a yet unprocessed
2223 is_new_response(const struct rsp_desc *r,
2224 const struct sge_rspq *q)
2226 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2229 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2230 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2231 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2232 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2233 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2235 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2236 #define NOMEM_INTR_DELAY 2500
2239 * write_ofld_wr - write an offload work request
2240 * @adap: the adapter
2241 * @m: the packet to send
2243 * @pidx: index of the first Tx descriptor to write
2244 * @gen: the generation value to use
2245 * @ndesc: number of descriptors the packet will occupy
2247 * Write an offload work request to send the supplied packet. The packet
2248 * data already carry the work request with most fields populated.
2251 write_ofld_wr(adapter_t *adap, struct mbuf *m,
2252 struct sge_txq *q, unsigned int pidx,
2253 unsigned int gen, unsigned int ndesc,
2254 bus_dma_segment_t *segs, unsigned int nsegs)
2256 unsigned int sgl_flits, flits;
2257 struct work_request_hdr *from;
2258 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
2259 struct tx_desc *d = &q->desc[pidx];
2260 struct txq_state txqs;
2262 if (immediate(m) && nsegs == 0) {
2263 write_imm(d, m, m->m_len, gen);
2267 /* Only TX_DATA builds SGLs */
2268 from = mtod(m, struct work_request_hdr *);
2269 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
2271 flits = m->m_len / 8;
2272 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
2274 make_sgl(sgp, segs, nsegs);
2275 sgl_flits = sgl_len(nsegs);
2281 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
2282 from->wrh_hi, from->wrh_lo);
2286 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
2289 * Returns the number of Tx descriptors needed for the given offload
2290 * packet. These packets are already fully constructed.
2292 static __inline unsigned int
2293 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2295 unsigned int flits, cnt = 0;
2298 if (m->m_len <= WR_LEN && nsegs == 0)
2299 return (1); /* packet fits as immediate data */
2302 * This needs to be re-visited for TOE
2308 flits = m->m_len / 8;
2310 ndescs = flits_to_desc(flits + sgl_len(cnt));
2316 * ofld_xmit - send a packet through an offload queue
2317 * @adap: the adapter
2318 * @q: the Tx offload queue
2321 * Send an offload packet through an SGE offload queue.
2324 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2328 unsigned int pidx, gen;
2329 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2330 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2331 struct tx_sw_desc *stx;
2333 nsegs = m_get_sgllen(m);
2334 vsegs = m_get_sgl(m);
2335 ndesc = calc_tx_descs_ofld(m, nsegs);
2336 busdma_map_sgl(vsegs, segs, nsegs);
2338 stx = &q->sdesc[q->pidx];
2341 again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
2342 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2343 if (__predict_false(ret)) {
2345 printf("no ofld desc avail\n");
2347 m_set_priority(m, ndesc); /* save for restart */
2358 if (q->pidx >= q->size) {
2363 T3_TRACE5(adap->tb[q->cntxt_id & 7],
2364 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2365 ndesc, pidx, skb->len, skb->len - skb->data_len,
2366 skb_shinfo(skb)->nr_frags);
2370 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2371 check_ring_tx_db(adap, q, 1);
2376 * restart_offloadq - restart a suspended offload queue
2377 * @qs: the queue set cotaining the offload queue
2379 * Resumes transmission on a suspended Tx offload queue.
2382 restart_offloadq(void *data, int npending)
2385 struct sge_qset *qs = data;
2386 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2387 adapter_t *adap = qs->port->adapter;
2388 bus_dma_segment_t segs[TX_MAX_SEGS];
2389 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2393 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
2395 while ((m = mbufq_peek(&q->sendq)) != NULL) {
2396 unsigned int gen, pidx;
2397 unsigned int ndesc = m_get_priority(m);
2399 if (__predict_false(q->size - q->in_use < ndesc)) {
2400 setbit(&qs->txq_stopped, TXQ_OFLD);
2401 if (should_restart_tx(q) &&
2402 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2412 if (q->pidx >= q->size) {
2417 (void)mbufq_dequeue(&q->sendq);
2418 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2420 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2424 set_bit(TXQ_RUNNING, &q->flags);
2425 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2429 t3_write_reg(adap, A_SG_KDOORBELL,
2430 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2434 * queue_set - return the queue set a packet should use
2437 * Maps a packet to the SGE queue set it should use. The desired queue
2438 * set is carried in bits 1-3 in the packet's priority.
2441 queue_set(const struct mbuf *m)
2443 return m_get_priority(m) >> 1;
2447 * is_ctrl_pkt - return whether an offload packet is a control packet
2450 * Determines whether an offload packet should use an OFLD or a CTRL
2451 * Tx queue. This is indicated by bit 0 in the packet's priority.
2454 is_ctrl_pkt(const struct mbuf *m)
2456 return m_get_priority(m) & 1;
2460 * t3_offload_tx - send an offload packet
2461 * @tdev: the offload device to send to
2464 * Sends an offload packet. We use the packet priority to select the
2465 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2466 * should be sent as regular or control, bits 1-3 select the queue set.
2469 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2471 adapter_t *adap = tdev2adap(tdev);
2472 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2474 if (__predict_false(is_ctrl_pkt(m)))
2475 return ctrl_xmit(adap, qs, m);
2477 return ofld_xmit(adap, qs, m);
2481 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2482 * @tdev: the offload device that will be receiving the packets
2483 * @q: the SGE response queue that assembled the bundle
2484 * @m: the partial bundle
2485 * @n: the number of packets in the bundle
2487 * Delivers a (partial) bundle of Rx offload packets to an offload device.
2489 static __inline void
2490 deliver_partial_bundle(struct t3cdev *tdev,
2492 struct mbuf *mbufs[], int n)
2495 q->offload_bundles++;
2496 cxgb_ofld_recv(tdev, mbufs, n);
2501 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2502 struct mbuf *m, struct mbuf *rx_gather[],
2503 unsigned int gather_idx)
2507 m->m_pkthdr.header = mtod(m, void *);
2508 rx_gather[gather_idx++] = m;
2509 if (gather_idx == RX_BUNDLE_SIZE) {
2510 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2512 rq->offload_bundles++;
2514 return (gather_idx);
2518 restart_tx(struct sge_qset *qs)
2520 struct adapter *sc = qs->port->adapter;
2523 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2524 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2525 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2526 qs->txq[TXQ_OFLD].restarts++;
2527 DPRINTF("restarting TXQ_OFLD\n");
2528 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2530 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2531 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2532 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2533 qs->txq[TXQ_CTRL].in_use);
2535 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2536 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2537 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2538 qs->txq[TXQ_CTRL].restarts++;
2539 DPRINTF("restarting TXQ_CTRL\n");
2540 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2545 * t3_sge_alloc_qset - initialize an SGE queue set
2546 * @sc: the controller softc
2547 * @id: the queue set id
2548 * @nports: how many Ethernet ports will be using this queue set
2549 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2550 * @p: configuration parameters for this queue set
2551 * @ntxq: number of Tx queues for the queue set
2552 * @pi: port info for queue set
2554 * Allocate resources and initialize an SGE queue set. A queue set
2555 * comprises a response queue, two Rx free-buffer queues, and up to 3
2556 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2557 * queue, offload queue, and control queue.
2560 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2561 const struct qset_params *p, int ntxq, struct port_info *pi)
2563 struct sge_qset *q = &sc->sge.qs[id];
2566 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2569 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2570 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2571 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2574 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
2575 M_NOWAIT | M_ZERO)) == NULL) {
2576 device_printf(sc->dev, "failed to allocate ifq\n");
2579 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);
2580 callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
2581 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
2582 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
2583 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
2585 init_qset_cntxt(q, id);
2587 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2588 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2589 &q->fl[0].desc, &q->fl[0].sdesc,
2590 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2591 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2592 printf("error %d from alloc ring fl0\n", ret);
2596 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2597 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2598 &q->fl[1].desc, &q->fl[1].sdesc,
2599 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2600 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2601 printf("error %d from alloc ring fl1\n", ret);
2605 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2606 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2607 &q->rspq.desc_tag, &q->rspq.desc_map,
2608 NULL, NULL)) != 0) {
2609 printf("error %d from alloc ring rspq\n", ret);
2613 for (i = 0; i < ntxq; ++i) {
2614 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2616 if ((ret = alloc_ring(sc, p->txq_size[i],
2617 sizeof(struct tx_desc), sz,
2618 &q->txq[i].phys_addr, &q->txq[i].desc,
2619 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2620 &q->txq[i].desc_map,
2621 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2622 printf("error %d from alloc ring tx %i\n", ret, i);
2625 mbufq_init(&q->txq[i].sendq);
2627 q->txq[i].size = p->txq_size[i];
2630 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2631 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2632 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2633 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2635 q->fl[0].gen = q->fl[1].gen = 1;
2636 q->fl[0].size = p->fl_size;
2637 q->fl[1].size = p->jumbo_size;
2641 q->rspq.size = p->rspq_size;
2643 q->txq[TXQ_ETH].stop_thres = nports *
2644 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2646 q->fl[0].buf_size = MCLBYTES;
2647 q->fl[0].zone = zone_pack;
2648 q->fl[0].type = EXT_PACKET;
2650 if (p->jumbo_buf_size == MJUM16BYTES) {
2651 q->fl[1].zone = zone_jumbo16;
2652 q->fl[1].type = EXT_JUMBO16;
2653 } else if (p->jumbo_buf_size == MJUM9BYTES) {
2654 q->fl[1].zone = zone_jumbo9;
2655 q->fl[1].type = EXT_JUMBO9;
2656 } else if (p->jumbo_buf_size == MJUMPAGESIZE) {
2657 q->fl[1].zone = zone_jumbop;
2658 q->fl[1].type = EXT_JUMBOP;
2660 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
2664 q->fl[1].buf_size = p->jumbo_buf_size;
2666 /* Allocate and setup the lro_ctrl structure */
2667 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2669 ret = tcp_lro_init(&q->lro.ctrl);
2671 printf("error %d from tcp_lro_init\n", ret);
2675 q->lro.ctrl.ifp = pi->ifp;
2677 mtx_lock_spin(&sc->sge.reg_lock);
2678 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2679 q->rspq.phys_addr, q->rspq.size,
2680 q->fl[0].buf_size, 1, 0);
2682 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2686 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2687 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2688 q->fl[i].phys_addr, q->fl[i].size,
2689 q->fl[i].buf_size, p->cong_thres, 1,
2692 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2697 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2698 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2699 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2702 printf("error %d from t3_sge_init_ecntxt\n", ret);
2707 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2708 USE_GTS, SGE_CNTXT_OFLD, id,
2709 q->txq[TXQ_OFLD].phys_addr,
2710 q->txq[TXQ_OFLD].size, 0, 1, 0);
2712 printf("error %d from t3_sge_init_ecntxt\n", ret);
2718 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2720 q->txq[TXQ_CTRL].phys_addr,
2721 q->txq[TXQ_CTRL].size,
2722 q->txq[TXQ_CTRL].token, 1, 0);
2724 printf("error %d from t3_sge_init_ecntxt\n", ret);
2729 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2730 device_get_unit(sc->dev), irq_vec_idx);
2731 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2733 mtx_unlock_spin(&sc->sge.reg_lock);
2734 t3_update_qset_coalesce(q, p);
2737 refill_fl(sc, &q->fl[0], q->fl[0].size);
2738 refill_fl(sc, &q->fl[1], q->fl[1].size);
2739 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2741 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2742 V_NEWTIMER(q->rspq.holdoff_tmr));
2747 mtx_unlock_spin(&sc->sge.reg_lock);
2750 t3_free_qset(sc, q);
2756 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2757 * ethernet data. Hardware assistance with various checksums and any vlan tag
2758 * will also be taken into account here.
2761 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2763 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2764 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2765 struct ifnet *ifp = pi->ifp;
2767 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2769 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2770 cpl->csum_valid && cpl->csum == 0xffff) {
2771 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2772 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2773 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2774 m->m_pkthdr.csum_data = 0xffff;
2777 if (cpl->vlan_valid) {
2778 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2779 m->m_flags |= M_VLANTAG;
2782 m->m_pkthdr.rcvif = ifp;
2783 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2785 * adjust after conversion to mbuf chain
2787 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2788 m->m_len -= (sizeof(*cpl) + ethpad);
2789 m->m_data += (sizeof(*cpl) + ethpad);
2793 * get_packet - return the next ingress packet buffer from a free list
2794 * @adap: the adapter that received the packet
2795 * @drop_thres: # of remaining buffers before we start dropping packets
2796 * @qs: the qset that the SGE free list holding the packet belongs to
2797 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2798 * @r: response descriptor
2800 * Get the next packet from a free list and complete setup of the
2801 * sk_buff. If the packet is small we make a copy and recycle the
2802 * original buffer, otherwise we use the original buffer itself. If a
2803 * positive drop threshold is supplied packets are dropped and their
2804 * buffers recycled if (a) the number of remaining buffers is under the
2805 * threshold and the packet is too big to copy, or (b) the packet should
2806 * be copied but there is no memory for the copy.
2809 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2810 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2813 unsigned int len_cq = ntohl(r->len_cq);
2814 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2815 int mask, cidx = fl->cidx;
2816 struct rx_sw_desc *sd = &fl->sdesc[cidx];
2817 uint32_t len = G_RSPD_LEN(len_cq);
2818 uint32_t flags = M_EXT;
2819 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2824 mask = fl->size - 1;
2825 prefetch(fl->sdesc[(cidx + 1) & mask].m);
2826 prefetch(fl->sdesc[(cidx + 2) & mask].m);
2827 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2828 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2831 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2833 if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2834 sopeop == RSPQ_SOP_EOP) {
2835 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2837 cl = mtod(m, void *);
2838 memcpy(cl, sd->rxsd_cl, len);
2839 recycle_rx_buf(adap, fl, fl->cidx);
2840 m->m_pkthdr.len = m->m_len = len;
2842 mh->mh_head = mh->mh_tail = m;
2847 bus_dmamap_unload(fl->entry_tag, sd->map);
2851 if ((sopeop == RSPQ_SOP_EOP) ||
2852 (sopeop == RSPQ_SOP))
2854 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
2855 if (fl->zone == zone_pack) {
2857 * restore clobbered data pointer
2859 m->m_data = m->m_ext.ext_buf;
2861 m_cljset(m, cl, fl->type);
2870 mh->mh_head = mh->mh_tail = m;
2871 m->m_pkthdr.len = len;
2876 case RSPQ_NSOP_NEOP:
2877 if (mh->mh_tail == NULL) {
2878 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2882 mh->mh_tail->m_next = m;
2884 mh->mh_head->m_pkthdr.len += len;
2888 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2890 if (++fl->cidx == fl->size)
2897 * handle_rsp_cntrl_info - handles control information in a response
2898 * @qs: the queue set corresponding to the response
2899 * @flags: the response control flags
2901 * Handles the control information of an SGE response, such as GTS
2902 * indications and completion credits for the queue set's Tx queues.
2903 * HW coalesces credits, we don't do any extra SW coalescing.
2905 static __inline void
2906 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2908 unsigned int credits;
2911 if (flags & F_RSPD_TXQ0_GTS)
2912 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2914 credits = G_RSPD_TXQ0_CR(flags);
2916 qs->txq[TXQ_ETH].processed += credits;
2918 credits = G_RSPD_TXQ2_CR(flags);
2920 qs->txq[TXQ_CTRL].processed += credits;
2923 if (flags & F_RSPD_TXQ1_GTS)
2924 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2926 credits = G_RSPD_TXQ1_CR(flags);
2928 qs->txq[TXQ_OFLD].processed += credits;
2933 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2934 unsigned int sleeping)
2940 * process_responses - process responses from an SGE response queue
2941 * @adap: the adapter
2942 * @qs: the queue set to which the response queue belongs
2943 * @budget: how many responses can be processed in this round
2945 * Process responses from an SGE response queue up to the supplied budget.
2946 * Responses include received packets as well as credits and other events
2947 * for the queues that belong to the response queue's queue set.
2948 * A negative budget is effectively unlimited.
2950 * Additionally choose the interrupt holdoff time for the next interrupt
2951 * on this queue. If the system is under memory shortage use a fairly
2952 * long delay to help recovery.
2955 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2957 struct sge_rspq *rspq = &qs->rspq;
2958 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2959 int budget_left = budget;
2960 unsigned int sleeping = 0;
2961 int lro_enabled = qs->lro.enabled;
2963 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2964 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2966 struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
2968 static int last_holdoff = 0;
2969 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2970 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2971 last_holdoff = rspq->holdoff_tmr;
2974 rspq->next_holdoff = rspq->holdoff_tmr;
2976 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2977 int eth, eop = 0, ethpad = 0;
2978 uint32_t flags = ntohl(r->flags);
2979 uint32_t rss_csum = *(const uint32_t *)r;
2980 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2982 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2984 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2988 printf("async notification\n");
2990 if (mh->mh_head == NULL) {
2991 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2994 m = m_gethdr(M_DONTWAIT, MT_DATA);
2999 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
3000 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
3001 *mtod(m, char *) = CPL_ASYNC_NOTIF;
3002 rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
3004 rspq->async_notif++;
3006 } else if (flags & F_RSPD_IMM_DATA_VALID) {
3007 struct mbuf *m = NULL;
3009 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
3010 r->rss_hdr.opcode, rspq->cidx);
3011 if (mh->mh_head == NULL)
3012 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
3014 m = m_gethdr(M_DONTWAIT, MT_DATA);
3016 if (mh->mh_head == NULL && m == NULL) {
3018 rspq->next_holdoff = NOMEM_INTR_DELAY;
3022 get_imm_packet(adap, r, mh->mh_head);
3025 } else if (r->len_cq) {
3026 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
3028 eop = get_packet(adap, drop_thresh, qs, mh, r);
3030 if (r->rss_hdr.hash_type && !adap->timestamp)
3031 mh->mh_head->m_flags |= M_FLOWID;
3032 mh->mh_head->m_pkthdr.flowid = rss_hash;
3040 if (flags & RSPD_CTRL_MASK) {
3041 sleeping |= flags & RSPD_GTS_MASK;
3042 handle_rsp_cntrl_info(qs, flags);
3046 if (__predict_false(++rspq->cidx == rspq->size)) {
3052 if (++rspq->credits >= 64) {
3053 refill_rspq(adap, rspq, rspq->credits);
3057 mh->mh_head->m_pkthdr.csum_data = rss_csum;
3061 m_set_priority(mh->mh_head, rss_hash);
3064 ngathered = rx_offload(&adap->tdev, rspq,
3065 mh->mh_head, offload_mbufs, ngathered);
3067 DPRINTF("received offload packet\n");
3069 } else if (eth && eop) {
3070 struct mbuf *m = mh->mh_head;
3072 t3_rx_eth(adap, rspq, m, ethpad);
3075 * The T304 sends incoming packets on any qset. If LRO
3076 * is also enabled, we could end up sending packet up
3077 * lro_ctrl->ifp's input. That is incorrect.
3079 * The mbuf's rcvif was derived from the cpl header and
3080 * is accurate. Skip LRO and just use that.
3082 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
3084 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
3086 && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
3089 /* successfully queue'd for LRO */
3092 * LRO not enabled, packet unsuitable for LRO,
3093 * or unable to queue. Pass it up right now in
3096 struct ifnet *ifp = m->m_pkthdr.rcvif;
3097 (*ifp->if_input)(ifp, m);
3102 __refill_fl_lt(adap, &qs->fl[0], 32);
3103 __refill_fl_lt(adap, &qs->fl[1], 32);
3107 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
3111 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
3112 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
3113 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
3114 tcp_lro_flush(lro_ctrl, queued);
3119 check_ring_db(adap, qs, sleeping);
3121 mb(); /* commit Tx queue processed updates */
3122 if (__predict_false(qs->txq_stopped > 1))
3125 __refill_fl_lt(adap, &qs->fl[0], 512);
3126 __refill_fl_lt(adap, &qs->fl[1], 512);
3127 budget -= budget_left;
3132 * A helper function that processes responses and issues GTS.
3135 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3138 static int last_holdoff = 0;
3140 work = process_responses(adap, rspq_to_qset(rq), -1);
3142 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3143 printf("next_holdoff=%d\n", rq->next_holdoff);
3144 last_holdoff = rq->next_holdoff;
3146 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3147 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3154 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3155 * Handles data events from SGE response queues as well as error and other
3156 * async events as they all use the same interrupt pin. We use one SGE
3157 * response queue per port in this mode and protect all response queues with
3161 t3b_intr(void *data)
3164 adapter_t *adap = data;
3165 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3167 t3_write_reg(adap, A_PL_CLI, 0);
3168 map = t3_read_reg(adap, A_SG_DATA_INTR);
3173 if (__predict_false(map & F_ERRINTR)) {
3174 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3175 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3176 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3179 mtx_lock(&q0->lock);
3180 for_each_port(adap, i)
3182 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3183 mtx_unlock(&q0->lock);
3187 * The MSI interrupt handler. This needs to handle data events from SGE
3188 * response queues as well as error and other async events as they all use
3189 * the same MSI vector. We use one SGE response queue per port in this mode
3190 * and protect all response queues with queue 0's lock.
3193 t3_intr_msi(void *data)
3195 adapter_t *adap = data;
3196 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3197 int i, new_packets = 0;
3199 mtx_lock(&q0->lock);
3201 for_each_port(adap, i)
3202 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3204 mtx_unlock(&q0->lock);
3205 if (new_packets == 0) {
3206 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3207 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3208 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3213 t3_intr_msix(void *data)
3215 struct sge_qset *qs = data;
3216 adapter_t *adap = qs->port->adapter;
3217 struct sge_rspq *rspq = &qs->rspq;
3219 if (process_responses_gts(adap, rspq) == 0)
3220 rspq->unhandled_irqs++;
3223 #define QDUMP_SBUF_SIZE 32 * 400
3225 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3227 struct sge_rspq *rspq;
3228 struct sge_qset *qs;
3229 int i, err, dump_end, idx;
3230 static int multiplier = 1;
3232 struct rsp_desc *rspd;
3236 qs = rspq_to_qset(rspq);
3237 if (rspq->rspq_dump_count == 0)
3239 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3241 "dump count is too large %d\n", rspq->rspq_dump_count);
3242 rspq->rspq_dump_count = 0;
3245 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3247 "dump start of %d is greater than queue size\n",
3248 rspq->rspq_dump_start);
3249 rspq->rspq_dump_start = 0;
3252 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3256 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3258 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3259 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3260 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3261 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3262 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3264 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3265 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3267 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3268 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3269 idx = i & (RSPQ_Q_SIZE-1);
3271 rspd = &rspq->desc[idx];
3272 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3273 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3274 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3275 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3276 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3277 be32toh(rspd->len_cq), rspd->intr_gen);
3279 if (sbuf_overflowed(sb)) {
3285 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3291 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3293 struct sge_txq *txq;
3294 struct sge_qset *qs;
3295 int i, j, err, dump_end;
3296 static int multiplier = 1;
3298 struct tx_desc *txd;
3299 uint32_t *WR, wr_hi, wr_lo, gen;
3303 qs = txq_to_qset(txq, TXQ_ETH);
3304 if (txq->txq_dump_count == 0) {
3307 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3309 "dump count is too large %d\n", txq->txq_dump_count);
3310 txq->txq_dump_count = 1;
3313 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3315 "dump start of %d is greater than queue size\n",
3316 txq->txq_dump_start);
3317 txq->txq_dump_start = 0;
3320 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3326 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3328 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3329 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3330 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3331 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3332 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3333 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3334 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3335 txq->txq_dump_start,
3336 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3338 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3339 for (i = txq->txq_dump_start; i < dump_end; i++) {
3340 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3341 WR = (uint32_t *)txd->flit;
3342 wr_hi = ntohl(WR[0]);
3343 wr_lo = ntohl(WR[1]);
3344 gen = G_WR_GEN(wr_lo);
3346 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3348 for (j = 2; j < 30; j += 4)
3349 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3350 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3353 if (sbuf_overflowed(sb)) {
3359 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3365 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3367 struct sge_txq *txq;
3368 struct sge_qset *qs;
3369 int i, j, err, dump_end;
3370 static int multiplier = 1;
3372 struct tx_desc *txd;
3373 uint32_t *WR, wr_hi, wr_lo, gen;
3376 qs = txq_to_qset(txq, TXQ_CTRL);
3377 if (txq->txq_dump_count == 0) {
3380 if (txq->txq_dump_count > 256) {
3382 "dump count is too large %d\n", txq->txq_dump_count);
3383 txq->txq_dump_count = 1;
3386 if (txq->txq_dump_start > 255) {
3388 "dump start of %d is greater than queue size\n",
3389 txq->txq_dump_start);
3390 txq->txq_dump_start = 0;
3395 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3396 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3397 txq->txq_dump_start,
3398 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3400 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3401 for (i = txq->txq_dump_start; i < dump_end; i++) {
3402 txd = &txq->desc[i & (255)];
3403 WR = (uint32_t *)txd->flit;
3404 wr_hi = ntohl(WR[0]);
3405 wr_lo = ntohl(WR[1]);
3406 gen = G_WR_GEN(wr_lo);
3408 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3410 for (j = 2; j < 30; j += 4)
3411 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3412 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3415 if (sbuf_overflowed(sb)) {
3421 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3427 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3429 adapter_t *sc = arg1;
3430 struct qset_params *qsp = &sc->params.sge.qset[0];
3432 struct sge_qset *qs;
3433 int i, j, err, nqsets = 0;
3436 if ((sc->flags & FULL_INIT_DONE) == 0)
3439 coalesce_usecs = qsp->coalesce_usecs;
3440 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3445 if (coalesce_usecs == qsp->coalesce_usecs)
3448 for (i = 0; i < sc->params.nports; i++)
3449 for (j = 0; j < sc->port[i].nqsets; j++)
3452 coalesce_usecs = max(1, coalesce_usecs);
3454 for (i = 0; i < nqsets; i++) {
3455 qs = &sc->sge.qs[i];
3456 qsp = &sc->params.sge.qset[i];
3457 qsp->coalesce_usecs = coalesce_usecs;
3459 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3460 &sc->sge.qs[0].rspq.lock;
3463 t3_update_qset_coalesce(qs, qsp);
3464 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3465 V_NEWTIMER(qs->rspq.holdoff_tmr));
3473 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
3475 adapter_t *sc = arg1;
3478 if ((sc->flags & FULL_INIT_DONE) == 0)
3481 timestamp = sc->timestamp;
3482 rc = sysctl_handle_int(oidp, ×tamp, arg2, req);
3487 if (timestamp != sc->timestamp) {
3488 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
3489 timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
3490 sc->timestamp = timestamp;
3497 t3_add_attach_sysctls(adapter_t *sc)
3499 struct sysctl_ctx_list *ctx;
3500 struct sysctl_oid_list *children;
3502 ctx = device_get_sysctl_ctx(sc->dev);
3503 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3505 /* random information */
3506 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3508 CTLFLAG_RD, &sc->fw_version,
3509 0, "firmware version");
3510 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3512 CTLFLAG_RD, &sc->params.rev,
3514 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3516 CTLFLAG_RD, &sc->port_types,
3517 0, "type of ports");
3518 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3520 CTLFLAG_RW, &cxgb_debug,
3521 0, "enable verbose debugging output");
3522 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3523 CTLFLAG_RD, &sc->tunq_coalesce,
3524 "#tunneled packets freed");
3525 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3527 CTLFLAG_RD, &txq_fills,
3528 0, "#times txq overrun");
3529 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3531 CTLFLAG_RD, &sc->params.vpd.cclk,
3532 0, "core clock frequency (in KHz)");
3536 static const char *rspq_name = "rspq";
3537 static const char *txq_names[] =
3545 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3547 struct port_info *p = arg1;
3553 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3555 t3_mac_update_stats(&p->mac);
3558 return (sysctl_handle_quad(oidp, parg, 0, req));
3562 t3_add_configured_sysctls(adapter_t *sc)
3564 struct sysctl_ctx_list *ctx;
3565 struct sysctl_oid_list *children;
3568 ctx = device_get_sysctl_ctx(sc->dev);
3569 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3571 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3573 CTLTYPE_INT|CTLFLAG_RW, sc,
3574 0, t3_set_coalesce_usecs,
3575 "I", "interrupt coalescing timer (us)");
3577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3579 CTLTYPE_INT | CTLFLAG_RW, sc,
3580 0, t3_pkt_timestamp,
3581 "I", "provide packet timestamp instead of connection hash");
3583 for (i = 0; i < sc->params.nports; i++) {
3584 struct port_info *pi = &sc->port[i];
3585 struct sysctl_oid *poid;
3586 struct sysctl_oid_list *poidlist;
3587 struct mac_stats *mstats = &pi->mac.stats;
3589 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3590 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3591 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3592 poidlist = SYSCTL_CHILDREN(poid);
3593 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3594 "nqsets", CTLFLAG_RD, &pi->nqsets,
3597 for (j = 0; j < pi->nqsets; j++) {
3598 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3599 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3600 *ctrlqpoid, *lropoid;
3601 struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3602 *txqpoidlist, *ctrlqpoidlist,
3604 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3606 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3608 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3609 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3610 qspoidlist = SYSCTL_CHILDREN(qspoid);
3612 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3613 CTLFLAG_RD, &qs->fl[0].empty, 0,
3614 "freelist #0 empty");
3615 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3616 CTLFLAG_RD, &qs->fl[1].empty, 0,
3617 "freelist #1 empty");
3619 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3620 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3621 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3623 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3624 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3625 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3627 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3628 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3629 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3631 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3632 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3633 lropoidlist = SYSCTL_CHILDREN(lropoid);
3635 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3636 CTLFLAG_RD, &qs->rspq.size,
3637 0, "#entries in response queue");
3638 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3639 CTLFLAG_RD, &qs->rspq.cidx,
3640 0, "consumer index");
3641 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3642 CTLFLAG_RD, &qs->rspq.credits,
3644 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3645 CTLFLAG_RD, &qs->rspq.starved,
3646 0, "#times starved");
3647 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3648 CTLFLAG_RD, &qs->rspq.phys_addr,
3649 "physical_address_of the queue");
3650 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3651 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3652 0, "start rspq dump entry");
3653 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3654 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3655 0, "#rspq entries to dump");
3656 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3657 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3658 0, t3_dump_rspq, "A", "dump of the response queue");
3660 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
3661 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
3662 "#tunneled packets dropped");
3663 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3664 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3665 0, "#tunneled packets waiting to be sent");
3667 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3668 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3669 0, "#tunneled packets queue producer index");
3670 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3671 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3672 0, "#tunneled packets queue consumer index");
3674 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3675 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3676 0, "#tunneled packets processed by the card");
3677 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3678 CTLFLAG_RD, &txq->cleaned,
3679 0, "#tunneled packets cleaned");
3680 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3681 CTLFLAG_RD, &txq->in_use,
3682 0, "#tunneled packet slots in use");
3683 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3684 CTLFLAG_RD, &txq->txq_frees,
3685 "#tunneled packets freed");
3686 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3687 CTLFLAG_RD, &txq->txq_skipped,
3688 0, "#tunneled packet descriptors skipped");
3689 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3690 CTLFLAG_RD, &txq->txq_coalesced,
3691 "#tunneled packets coalesced");
3692 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3693 CTLFLAG_RD, &txq->txq_enqueued,
3694 0, "#tunneled packets enqueued to hardware");
3695 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3696 CTLFLAG_RD, &qs->txq_stopped,
3697 0, "tx queues stopped");
3698 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3699 CTLFLAG_RD, &txq->phys_addr,
3700 "physical_address_of the queue");
3701 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3702 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3703 0, "txq generation");
3704 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3705 CTLFLAG_RD, &txq->cidx,
3706 0, "hardware queue cidx");
3707 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3708 CTLFLAG_RD, &txq->pidx,
3709 0, "hardware queue pidx");
3710 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3711 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3712 0, "txq start idx for dump");
3713 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3714 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3715 0, "txq #entries to dump");
3716 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3717 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3718 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3720 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3721 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3722 0, "ctrlq start idx for dump");
3723 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3724 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3725 0, "ctrl #entries to dump");
3726 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3727 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3728 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3730 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3731 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3732 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3733 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3734 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3735 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3736 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3737 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3740 /* Now add a node for mac stats. */
3741 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3742 CTLFLAG_RD, NULL, "MAC statistics");
3743 poidlist = SYSCTL_CHILDREN(poid);
3746 * We (ab)use the length argument (arg2) to pass on the offset
3747 * of the data that we are interested in. This is only required
3748 * for the quad counters that are updated from the hardware (we
3749 * make sure that we return the latest value).
3750 * sysctl_handle_macstat first updates *all* the counters from
3751 * the hardware, and then returns the latest value of the
3752 * requested counter. Best would be to update only the
3753 * requested counter from hardware, but t3_mac_update_stats()
3754 * hides all the register details and we don't want to dive into
3757 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3758 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3759 sysctl_handle_macstat, "QU", 0)
3760 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3761 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3762 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3763 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3764 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3765 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3766 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3767 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3768 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3769 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3770 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3771 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3772 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3773 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3774 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3775 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3776 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3777 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3778 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3779 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3780 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3781 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3782 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3783 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3784 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3785 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3786 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3787 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3788 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3789 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3790 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3791 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3792 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3793 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3794 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3795 CXGB_SYSCTL_ADD_QUAD(rx_short);
3796 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3797 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3798 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3799 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3800 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3801 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3802 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3803 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3804 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3805 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3806 #undef CXGB_SYSCTL_ADD_QUAD
3808 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3809 CTLFLAG_RD, &mstats->a, 0)
3810 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3811 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3812 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3813 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3814 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3815 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3816 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3817 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3818 CXGB_SYSCTL_ADD_ULONG(num_resets);
3819 CXGB_SYSCTL_ADD_ULONG(link_faults);
3820 #undef CXGB_SYSCTL_ADD_ULONG
3825 * t3_get_desc - dump an SGE descriptor for debugging purposes
3826 * @qs: the queue set
3827 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3828 * @idx: the descriptor index in the queue
3829 * @data: where to dump the descriptor contents
3831 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3832 * size of the descriptor.
3835 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3836 unsigned char *data)
3842 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3844 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3845 return sizeof(struct tx_desc);
3849 if (!qs->rspq.desc || idx >= qs->rspq.size)
3851 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3852 return sizeof(struct rsp_desc);
3856 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3858 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3859 return sizeof(struct rx_desc);