1 /**************************************************************************
3 Copyright (c) 2007-2009, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include "opt_inet6.h"
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/module.h>
42 #include <machine/bus.h>
43 #include <machine/resource.h>
44 #include <sys/bus_dma.h>
46 #include <sys/queue.h>
47 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
52 #include <sys/sched.h>
54 #include <sys/systm.h>
55 #include <sys/syslog.h>
56 #include <sys/socket.h>
57 #include <sys/sglist.h>
60 #include <net/if_var.h>
62 #include <net/ethernet.h>
63 #include <net/if_vlan_var.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip6.h>
69 #include <netinet/tcp.h>
71 #include <dev/pci/pcireg.h>
72 #include <dev/pci/pcivar.h>
77 #include <cxgb_include.h>
81 int multiq_tx_enable = 1;
84 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
87 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
88 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
90 "size of per-queue mbuf ring");
92 static int cxgb_tx_coalesce_force = 0;
93 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN,
94 &cxgb_tx_coalesce_force, 0,
95 "coalesce small packets into a single work request regardless of ring state");
97 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1
98 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
99 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2
100 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5
101 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5
102 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2
103 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6
106 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
107 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN,
108 &cxgb_tx_coalesce_enable_start, 0,
109 "coalesce enable threshold");
110 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
111 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN,
112 &cxgb_tx_coalesce_enable_stop, 0,
113 "coalesce disable threshold");
114 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN,
116 &cxgb_tx_reclaim_threshold, 0,
117 "tx cleaning minimum threshold");
120 * XXX don't re-enable this until TOE stops assuming
123 static int recycle_enable = 0;
125 extern int cxgb_use_16k_clusters;
126 extern int nmbjumbop;
127 extern int nmbjumbo9;
128 extern int nmbjumbo16;
132 #define SGE_RX_SM_BUF_SIZE 1536
133 #define SGE_RX_DROP_THRES 16
134 #define SGE_RX_COPY_THRES 128
137 * Period of the Tx buffer reclaim timer. This timer does not need to run
138 * frequently as Tx buffers are usually reclaimed by new Tx packets.
140 #define TX_RECLAIM_PERIOD (hz >> 1)
143 * Values for sge_txq.flags
146 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
147 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
151 uint64_t flit[TX_DESC_FLITS];
161 struct rsp_desc { /* response queue descriptor */
162 struct rss_header rss_hdr;
165 uint8_t imm_data[47];
169 #define RX_SW_DESC_MAP_CREATED (1 << 0)
170 #define TX_SW_DESC_MAP_CREATED (1 << 1)
171 #define RX_SW_DESC_INUSE (1 << 3)
172 #define TX_SW_DESC_MAPPED (1 << 4)
174 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
175 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
176 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
177 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
179 struct tx_sw_desc { /* SW state per Tx descriptor */
185 struct rx_sw_desc { /* SW state per Rx descriptor */
198 struct refill_fl_cb_arg {
200 bus_dma_segment_t seg;
206 * Maps a number of flits to the number of Tx descriptors that can hold them.
209 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
211 * HW allows up to 4 descriptors to be combined into a WR.
213 static uint8_t flit_desc_map[] = {
215 #if SGE_NUM_GENBITS == 1
216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
218 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
219 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
220 #elif SGE_NUM_GENBITS == 2
221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
222 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
223 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
224 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
226 # error "SGE_NUM_GENBITS must be 1 or 2"
230 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED)
231 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock)
232 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock)
233 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock)
234 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
235 #define TXQ_RING_NEEDS_ENQUEUE(qs) \
236 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
237 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
238 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \
239 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
240 #define TXQ_RING_DEQUEUE(qs) \
241 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
245 static void sge_timer_cb(void *arg);
246 static void sge_timer_reclaim(void *arg, int ncount);
247 static void sge_txq_reclaim_handler(void *arg, int ncount);
248 static void cxgb_start_locked(struct sge_qset *qs);
251 * XXX need to cope with bursty scheduling by looking at a wider
252 * window than we are now for determining the need for coalescing
255 static __inline uint64_t
256 check_pkt_coalesce(struct sge_qset *qs)
262 if (__predict_false(cxgb_tx_coalesce_force))
264 txq = &qs->txq[TXQ_ETH];
265 sc = qs->port->adapter;
266 fill = &sc->tunq_fill[qs->idx];
268 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
269 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
270 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
271 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
273 * if the hardware transmit queue is more than 1/8 full
274 * we mark it as coalescing - we drop back from coalescing
275 * when we go below 1/32 full and there are no packets enqueued,
276 * this provides us with some degree of hysteresis
278 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
279 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
281 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
284 return (sc->tunq_coalesce);
289 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
292 #if _BYTE_ORDER == _LITTLE_ENDIAN
294 wr_hilo |= (((uint64_t)wr_lo)<<32);
297 wr_hilo |= (((uint64_t)wr_hi)<<32);
299 wrp->wrh_hilo = wr_hilo;
303 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
312 struct coalesce_info {
318 coalesce_check(struct mbuf *m, void *arg)
320 struct coalesce_info *ci = arg;
321 int *count = &ci->count;
322 int *nbytes = &ci->nbytes;
324 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
325 (*count < 7) && (m->m_next == NULL))) {
334 cxgb_dequeue(struct sge_qset *qs)
336 struct mbuf *m, *m_head, *m_tail;
337 struct coalesce_info ci;
340 if (check_pkt_coalesce(qs) == 0)
341 return TXQ_RING_DEQUEUE(qs);
343 m_head = m_tail = NULL;
344 ci.count = ci.nbytes = 0;
346 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
347 if (m_head == NULL) {
349 } else if (m != NULL) {
350 m_tail->m_nextpkt = m;
355 panic("trying to coalesce %d packets in to one WR", ci.count);
360 * reclaim_completed_tx - reclaims completed Tx descriptors
361 * @adapter: the adapter
362 * @q: the Tx queue to reclaim completed descriptors from
364 * Reclaims Tx descriptors that the SGE has indicated it has processed,
365 * and frees the associated buffers if possible. Called with the Tx
369 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
371 struct sge_txq *q = &qs->txq[queue];
372 int reclaim = desc_reclaimable(q);
374 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
375 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
376 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
378 if (reclaim < reclaim_min)
381 mtx_assert(&qs->lock, MA_OWNED);
383 t3_free_tx_desc(qs, reclaim, queue);
384 q->cleaned += reclaim;
385 q->in_use -= reclaim;
387 if (isset(&qs->txq_stopped, TXQ_ETH))
388 clrbit(&qs->txq_stopped, TXQ_ETH);
394 * should_restart_tx - are there enough resources to restart a Tx queue?
397 * Checks if there are enough descriptors to restart a suspended Tx queue.
400 should_restart_tx(const struct sge_txq *q)
402 unsigned int r = q->processed - q->cleaned;
404 return q->in_use - r < (q->size >> 1);
408 * t3_sge_init - initialize SGE
410 * @p: the SGE parameters
412 * Performs SGE initialization needed every time after a chip reset.
413 * We do not initialize any of the queue sets here, instead the driver
414 * top-level must request those individually. We also do not enable DMA
415 * here, that should be done after the queues have been set up.
418 t3_sge_init(adapter_t *adap, struct sge_params *p)
422 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
424 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
425 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
426 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
427 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
428 #if SGE_NUM_GENBITS == 1
429 ctrl |= F_EGRGENCTRL;
431 if (adap->params.rev > 0) {
432 if (!(adap->flags & (USING_MSIX | USING_MSI)))
433 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
435 t3_write_reg(adap, A_SG_CONTROL, ctrl);
436 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
437 V_LORCQDRBTHRSH(512));
438 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
439 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
440 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
441 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
442 adap->params.rev < T3_REV_C ? 1000 : 500);
443 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
444 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
445 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
446 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
447 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
452 * sgl_len - calculates the size of an SGL of the given capacity
453 * @n: the number of SGL entries
455 * Calculates the number of flits needed for a scatter/gather list that
456 * can hold the given number of entries.
458 static __inline unsigned int
459 sgl_len(unsigned int n)
461 return ((3 * n) / 2 + (n & 1));
465 * get_imm_packet - return the next ingress packet buffer from a response
466 * @resp: the response descriptor containing the packet data
468 * Return a packet containing the immediate data of the given response.
471 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
474 if (resp->rss_hdr.opcode == CPL_RX_DATA) {
475 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
476 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
477 } else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
478 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
479 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
481 m->m_len = IMMED_PKT_SIZE;
482 m->m_ext.ext_buf = NULL;
483 m->m_ext.ext_type = 0;
484 memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len);
488 static __inline u_int
489 flits_to_desc(u_int n)
491 return (flit_desc_map[n]);
494 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
495 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
496 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
497 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
499 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
500 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
504 * t3_sge_err_intr_handler - SGE async event interrupt handler
505 * @adapter: the adapter
507 * Interrupt handler for SGE asynchronous (non-data) events.
510 t3_sge_err_intr_handler(adapter_t *adapter)
512 unsigned int v, status;
514 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
515 if (status & SGE_PARERR)
516 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
517 status & SGE_PARERR);
518 if (status & SGE_FRAMINGERR)
519 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
520 status & SGE_FRAMINGERR);
521 if (status & F_RSPQCREDITOVERFOW)
522 CH_ALERT(adapter, "SGE response queue credit overflow\n");
524 if (status & F_RSPQDISABLED) {
525 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
528 "packet delivered to disabled response queue (0x%x)\n",
529 (v >> S_RSPQ0DISABLED) & 0xff);
532 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
533 if (status & SGE_FATALERR)
534 t3_fatal_err(adapter);
538 t3_sge_prep(adapter_t *adap, struct sge_params *p)
540 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
542 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
543 nqsets *= adap->params.nports;
545 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
547 while (!powerof2(fl_q_size))
550 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
553 #if __FreeBSD_version >= 700111
555 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
556 jumbo_buf_size = MJUM16BYTES;
558 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
559 jumbo_buf_size = MJUM9BYTES;
562 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
563 jumbo_buf_size = MJUMPAGESIZE;
565 while (!powerof2(jumbo_q_size))
568 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
569 device_printf(adap->dev,
570 "Insufficient clusters and/or jumbo buffers.\n");
572 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
574 for (i = 0; i < SGE_QSETS; ++i) {
575 struct qset_params *q = p->qset + i;
577 if (adap->params.nports > 2) {
578 q->coalesce_usecs = 50;
581 q->coalesce_usecs = 10;
583 q->coalesce_usecs = 5;
587 q->rspq_size = RSPQ_Q_SIZE;
588 q->fl_size = fl_q_size;
589 q->jumbo_size = jumbo_q_size;
590 q->jumbo_buf_size = jumbo_buf_size;
591 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
592 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
593 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
599 t3_sge_alloc(adapter_t *sc)
602 /* The parent tag. */
603 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
604 1, 0, /* algnmnt, boundary */
605 BUS_SPACE_MAXADDR, /* lowaddr */
606 BUS_SPACE_MAXADDR, /* highaddr */
607 NULL, NULL, /* filter, filterarg */
608 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
609 BUS_SPACE_UNRESTRICTED, /* nsegments */
610 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
612 NULL, NULL, /* lock, lockarg */
614 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
619 * DMA tag for normal sized RX frames
621 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
622 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
623 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
624 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
629 * DMA tag for jumbo sized RX frames.
631 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
632 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
633 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
634 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
639 * DMA tag for TX frames.
641 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
642 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
643 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
644 NULL, NULL, &sc->tx_dmat)) {
645 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
653 t3_sge_free(struct adapter * sc)
656 if (sc->tx_dmat != NULL)
657 bus_dma_tag_destroy(sc->tx_dmat);
659 if (sc->rx_jumbo_dmat != NULL)
660 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
662 if (sc->rx_dmat != NULL)
663 bus_dma_tag_destroy(sc->rx_dmat);
665 if (sc->parent_dmat != NULL)
666 bus_dma_tag_destroy(sc->parent_dmat);
672 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
675 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
676 qs->rspq.polling = 0 /* p->polling */;
679 #if !defined(__i386__) && !defined(__amd64__)
681 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
683 struct refill_fl_cb_arg *cb_arg = arg;
685 cb_arg->error = error;
686 cb_arg->seg = segs[0];
692 * refill_fl - refill an SGE free-buffer list
693 * @sc: the controller softc
694 * @q: the free-list to refill
695 * @n: the number of new buffers to allocate
697 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
698 * The caller must assure that @n does not exceed the queue's capacity.
701 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
703 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
704 struct rx_desc *d = &q->desc[q->pidx];
705 struct refill_fl_cb_arg cb_arg;
713 * We allocate an uninitialized mbuf + cluster, mbuf is
714 * initialized after rx.
716 if (q->zone == zone_pack) {
717 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
719 cl = m->m_ext.ext_buf;
721 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
723 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
724 uma_zfree(q->zone, cl);
728 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
729 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
730 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
731 uma_zfree(q->zone, cl);
734 sd->flags |= RX_SW_DESC_MAP_CREATED;
736 #if !defined(__i386__) && !defined(__amd64__)
737 err = bus_dmamap_load(q->entry_tag, sd->map,
738 cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
740 if (err != 0 || cb_arg.error) {
741 if (q->zone == zone_pack)
742 uma_zfree(q->zone, cl);
747 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
749 sd->flags |= RX_SW_DESC_INUSE;
752 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
753 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
754 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
755 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
760 if (++q->pidx == q->size) {
771 if (q->db_pending >= 32) {
773 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
779 * free_rx_bufs - free the Rx buffers on an SGE free list
780 * @sc: the controle softc
781 * @q: the SGE free list to clean up
783 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
784 * this queue should be stopped before calling this function.
787 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
789 u_int cidx = q->cidx;
791 while (q->credits--) {
792 struct rx_sw_desc *d = &q->sdesc[cidx];
794 if (d->flags & RX_SW_DESC_INUSE) {
795 bus_dmamap_unload(q->entry_tag, d->map);
796 bus_dmamap_destroy(q->entry_tag, d->map);
797 if (q->zone == zone_pack) {
798 m_init(d->m, zone_pack, MCLBYTES,
799 M_NOWAIT, MT_DATA, M_EXT);
800 uma_zfree(zone_pack, d->m);
802 m_init(d->m, zone_mbuf, MLEN,
803 M_NOWAIT, MT_DATA, 0);
804 uma_zfree(zone_mbuf, d->m);
805 uma_zfree(q->zone, d->rxsd_cl);
811 if (++cidx == q->size)
817 __refill_fl(adapter_t *adap, struct sge_fl *fl)
819 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
823 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
825 uint32_t reclaimable = fl->size - fl->credits;
828 refill_fl(adap, fl, min(max, reclaimable));
832 * recycle_rx_buf - recycle a receive buffer
833 * @adapter: the adapter
834 * @q: the SGE free list
835 * @idx: index of buffer to recycle
837 * Recycles the specified buffer on the given free list by adding it at
838 * the next available slot on the list.
841 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
843 struct rx_desc *from = &q->desc[idx];
844 struct rx_desc *to = &q->desc[q->pidx];
846 q->sdesc[q->pidx] = q->sdesc[idx];
847 to->addr_lo = from->addr_lo; // already big endian
848 to->addr_hi = from->addr_hi; // likewise
849 wmb(); /* necessary ? */
850 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
851 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
854 if (++q->pidx == q->size) {
858 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
862 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
867 *addr = segs[0].ds_addr;
871 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
872 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
873 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
875 size_t len = nelem * elem_size;
880 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
881 BUS_SPACE_MAXADDR_32BIT,
882 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
883 len, 0, NULL, NULL, tag)) != 0) {
884 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
888 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
890 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
894 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
899 len = nelem * sw_size;
900 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
903 if (parent_entry_tag == NULL)
906 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
907 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
908 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
909 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
910 NULL, NULL, entry_tag)) != 0) {
911 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
918 sge_slow_intr_handler(void *arg, int ncount)
922 t3_slow_intr_handler(sc);
923 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
924 (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
928 * sge_timer_cb - perform periodic maintenance of an SGE qset
929 * @data: the SGE queue set to maintain
931 * Runs periodically from a timer to perform maintenance of an SGE queue
932 * set. It performs two tasks:
934 * a) Cleans up any completed Tx descriptors that may still be pending.
935 * Normal descriptor cleanup happens when new packets are added to a Tx
936 * queue so this timer is relatively infrequent and does any cleanup only
937 * if the Tx queue has not seen any new packets in a while. We make a
938 * best effort attempt to reclaim descriptors, in that we don't wait
939 * around if we cannot get a queue's lock (which most likely is because
940 * someone else is queueing new packets and so will also handle the clean
941 * up). Since control queues use immediate data exclusively we don't
942 * bother cleaning them up here.
944 * b) Replenishes Rx queues that have run out due to memory shortage.
945 * Normally new Rx buffers are added when existing ones are consumed but
946 * when out of memory a queue can become empty. We try to add only a few
947 * buffers here, the queue will be replenished fully as these new buffers
948 * are used up if memory shortage has subsided.
950 * c) Return coalesced response queue credits in case a response queue is
953 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
954 * fifo overflows and the FW doesn't implement any recovery scheme yet.
957 sge_timer_cb(void *arg)
960 if ((sc->flags & USING_MSIX) == 0) {
962 struct port_info *pi;
966 int reclaim_ofl, refill_rx;
968 if (sc->open_device_map == 0)
971 for (i = 0; i < sc->params.nports; i++) {
973 for (j = 0; j < pi->nqsets; j++) {
974 qs = &sc->sge.qs[pi->first_qset + j];
976 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
977 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
978 (qs->fl[1].credits < qs->fl[1].size));
979 if (reclaim_ofl || refill_rx) {
980 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
987 if (sc->params.nports > 2) {
990 for_each_port(sc, i) {
991 struct port_info *pi = &sc->port[i];
993 t3_write_reg(sc, A_SG_KDOORBELL,
995 (FW_TUNNEL_SGEEC_START + pi->first_qset));
998 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
999 sc->open_device_map != 0)
1000 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1004 * This is meant to be a catch-all function to keep sge state private
1009 t3_sge_init_adapter(adapter_t *sc)
1011 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
1012 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1013 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
1018 t3_sge_reset_adapter(adapter_t *sc)
1020 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1025 t3_sge_init_port(struct port_info *pi)
1027 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
1032 * refill_rspq - replenish an SGE response queue
1033 * @adapter: the adapter
1034 * @q: the response queue to replenish
1035 * @credits: how many new responses to make available
1037 * Replenishes a response queue by making the supplied number of responses
1040 static __inline void
1041 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1044 /* mbufs are allocated on demand when a rspq entry is processed. */
1045 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1046 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1050 sge_txq_reclaim_handler(void *arg, int ncount)
1052 struct sge_qset *qs = arg;
1055 for (i = 0; i < 3; i++)
1056 reclaim_completed_tx(qs, 16, i);
1060 sge_timer_reclaim(void *arg, int ncount)
1062 struct port_info *pi = arg;
1063 int i, nqsets = pi->nqsets;
1064 adapter_t *sc = pi->adapter;
1065 struct sge_qset *qs;
1068 KASSERT((sc->flags & USING_MSIX) == 0,
1069 ("can't call timer reclaim for msi-x"));
1071 for (i = 0; i < nqsets; i++) {
1072 qs = &sc->sge.qs[pi->first_qset + i];
1074 reclaim_completed_tx(qs, 16, TXQ_OFLD);
1075 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1076 &sc->sge.qs[0].rspq.lock;
1078 if (mtx_trylock(lock)) {
1079 /* XXX currently assume that we are *NOT* polling */
1080 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1082 if (qs->fl[0].credits < qs->fl[0].size - 16)
1083 __refill_fl(sc, &qs->fl[0]);
1084 if (qs->fl[1].credits < qs->fl[1].size - 16)
1085 __refill_fl(sc, &qs->fl[1]);
1087 if (status & (1 << qs->rspq.cntxt_id)) {
1088 if (qs->rspq.credits) {
1089 refill_rspq(sc, &qs->rspq, 1);
1091 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1092 1 << qs->rspq.cntxt_id);
1101 * init_qset_cntxt - initialize an SGE queue set context info
1102 * @qs: the queue set
1103 * @id: the queue set id
1105 * Initializes the TIDs and context ids for the queues of a queue set.
1108 init_qset_cntxt(struct sge_qset *qs, u_int id)
1111 qs->rspq.cntxt_id = id;
1112 qs->fl[0].cntxt_id = 2 * id;
1113 qs->fl[1].cntxt_id = 2 * id + 1;
1114 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1115 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1116 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1117 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1118 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1120 mbufq_init(&qs->txq[TXQ_ETH].sendq);
1121 mbufq_init(&qs->txq[TXQ_OFLD].sendq);
1122 mbufq_init(&qs->txq[TXQ_CTRL].sendq);
1127 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1129 txq->in_use += ndesc;
1131 * XXX we don't handle stopping of queue
1132 * presumably start handles this when we bump against the end
1134 txqs->gen = txq->gen;
1135 txq->unacked += ndesc;
1136 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1138 txqs->pidx = txq->pidx;
1141 if (((txqs->pidx > txq->cidx) &&
1142 (txq->pidx < txqs->pidx) &&
1143 (txq->pidx >= txq->cidx)) ||
1144 ((txqs->pidx < txq->cidx) &&
1145 (txq->pidx >= txq-> cidx)) ||
1146 ((txqs->pidx < txq->cidx) &&
1147 (txq->cidx < txqs->pidx)))
1148 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1149 txqs->pidx, txq->pidx, txq->cidx);
1151 if (txq->pidx >= txq->size) {
1152 txq->pidx -= txq->size;
1159 * calc_tx_descs - calculate the number of Tx descriptors for a packet
1160 * @m: the packet mbufs
1161 * @nsegs: the number of segments
1163 * Returns the number of Tx descriptors needed for the given Ethernet
1164 * packet. Ethernet packets require addition of WR and CPL headers.
1166 static __inline unsigned int
1167 calc_tx_descs(const struct mbuf *m, int nsegs)
1171 if (m->m_pkthdr.len <= PIO_LEN)
1174 flits = sgl_len(nsegs) + 2;
1175 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1178 return flits_to_desc(flits);
1182 * make_sgl - populate a scatter/gather list for a packet
1183 * @sgp: the SGL to populate
1184 * @segs: the packet dma segments
1185 * @nsegs: the number of segments
1187 * Generates a scatter/gather list for the buffers that make up a packet
1188 * and returns the SGL size in 8-byte words. The caller must size the SGL
1191 static __inline void
1192 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1196 for (idx = 0, i = 0; i < nsegs; i++) {
1198 * firmware doesn't like empty segments
1200 if (segs[i].ds_len == 0)
1205 sgp->len[idx] = htobe32(segs[i].ds_len);
1206 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1217 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1218 * @adap: the adapter
1221 * Ring the doorbell if a Tx queue is asleep. There is a natural race,
1222 * where the HW is going to sleep just after we checked, however,
1223 * then the interrupt handler will detect the outstanding TX packet
1224 * and ring the doorbell for us.
1226 * When GTS is disabled we unconditionally ring the doorbell.
1228 static __inline void
1229 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
1232 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1233 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1234 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1236 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1239 t3_write_reg(adap, A_SG_KDOORBELL,
1240 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1243 if (mustring || ++q->db_pending >= 32) {
1244 wmb(); /* write descriptors before telling HW */
1245 t3_write_reg(adap, A_SG_KDOORBELL,
1246 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1252 static __inline void
1253 wr_gen2(struct tx_desc *d, unsigned int gen)
1255 #if SGE_NUM_GENBITS == 2
1256 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1261 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1262 * @ndesc: number of Tx descriptors spanned by the SGL
1263 * @txd: first Tx descriptor to be written
1264 * @txqs: txq state (generation and producer index)
1265 * @txq: the SGE Tx queue
1267 * @flits: number of flits to the start of the SGL in the first descriptor
1268 * @sgl_flits: the SGL size in flits
1269 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1270 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1272 * Write a work request header and an associated SGL. If the SGL is
1273 * small enough to fit into one Tx descriptor it has already been written
1274 * and we just need to write the WR header. Otherwise we distribute the
1275 * SGL across the number of descriptors it spans.
1278 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1279 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1280 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1283 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1284 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1286 if (__predict_true(ndesc == 1)) {
1287 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1288 V_WR_SGLSFLT(flits)) | wr_hi,
1289 htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
1292 wr_gen2(txd, txqs->gen);
1295 unsigned int ogen = txqs->gen;
1296 const uint64_t *fp = (const uint64_t *)sgl;
1297 struct work_request_hdr *wp = wrp;
1299 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1300 V_WR_SGLSFLT(flits)) | wr_hi;
1303 unsigned int avail = WR_FLITS - flits;
1305 if (avail > sgl_flits)
1307 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1316 if (++txqs->pidx == txq->size) {
1324 * when the head of the mbuf chain
1325 * is freed all clusters will be freed
1328 wrp = (struct work_request_hdr *)txd;
1329 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1330 V_WR_SGLSFLT(1)) | wr_hi;
1331 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1333 V_WR_GEN(txqs->gen)) | wr_lo;
1334 wr_gen2(txd, txqs->gen);
1337 wrp->wrh_hi |= htonl(F_WR_EOP);
1339 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1340 wr_gen2((struct tx_desc *)wp, ogen);
1344 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1345 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1347 #define GET_VTAG(cntrl, m) \
1349 if ((m)->m_flags & M_VLANTAG) \
1350 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1354 t3_encap(struct sge_qset *qs, struct mbuf **m)
1358 struct sge_txq *txq;
1359 struct txq_state txqs;
1360 struct port_info *pi;
1361 unsigned int ndesc, flits, cntrl, mlen;
1362 int err, nsegs, tso_info = 0;
1364 struct work_request_hdr *wrp;
1365 struct tx_sw_desc *txsd;
1366 struct sg_ent *sgp, *sgl;
1367 uint32_t wr_hi, wr_lo, sgl_flits;
1368 bus_dma_segment_t segs[TX_MAX_SEGS];
1370 struct tx_desc *txd;
1374 txq = &qs->txq[TXQ_ETH];
1375 txd = &txq->desc[txq->pidx];
1376 txsd = &txq->sdesc[txq->pidx];
1382 mtx_assert(&qs->lock, MA_OWNED);
1383 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1384 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1386 if (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1387 m0->m_pkthdr.csum_flags & (CSUM_TSO))
1388 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1390 if (m0->m_nextpkt != NULL) {
1391 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1395 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1396 &m0, segs, &nsegs))) {
1398 printf("failed ... err=%d\n", err);
1401 mlen = m0->m_pkthdr.len;
1402 ndesc = calc_tx_descs(m0, nsegs);
1404 txq_prod(txq, ndesc, &txqs);
1406 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1409 if (m0->m_nextpkt != NULL) {
1410 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1414 panic("trying to coalesce %d packets in to one WR", nsegs);
1415 txq->txq_coalesced += nsegs;
1416 wrp = (struct work_request_hdr *)txd;
1417 flits = nsegs*2 + 1;
1419 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1420 struct cpl_tx_pkt_batch_entry *cbe;
1422 uint32_t *hflit = (uint32_t *)&flit;
1423 int cflags = m0->m_pkthdr.csum_flags;
1425 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1426 GET_VTAG(cntrl, m0);
1427 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1428 if (__predict_false(!(cflags & CSUM_IP)))
1429 cntrl |= F_TXPKT_IPCSUM_DIS;
1430 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
1431 CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1432 cntrl |= F_TXPKT_L4CSUM_DIS;
1434 hflit[0] = htonl(cntrl);
1435 hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1436 flit |= htobe64(1 << 24);
1437 cbe = &cpl_batch->pkt_entry[i];
1438 cbe->cntrl = hflit[0];
1439 cbe->len = hflit[1];
1440 cbe->addr = htobe64(segs[i].ds_addr);
1443 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1444 V_WR_SGLSFLT(flits)) |
1445 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1446 wr_lo = htonl(V_WR_LEN(flits) |
1447 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1448 set_wr_hdr(wrp, wr_hi, wr_lo);
1450 ETHER_BPF_MTAP(pi->ifp, m0);
1451 wr_gen2(txd, txqs.gen);
1452 check_ring_tx_db(sc, txq, 0);
1454 } else if (tso_info) {
1456 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1457 struct ether_header *eh;
1462 GET_VTAG(cntrl, m0);
1463 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1464 hdr->cntrl = htonl(cntrl);
1465 hdr->len = htonl(mlen | 0x80000000);
1467 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1468 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
1469 m0, mlen, m0->m_pkthdr.tso_segsz,
1470 (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
1471 panic("tx tso packet too small");
1474 /* Make sure that ether, ip, tcp headers are all in m0 */
1475 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1476 m0 = m_pullup(m0, TCPPKTHDRSIZE);
1477 if (__predict_false(m0 == NULL)) {
1478 /* XXX panic probably an overreaction */
1479 panic("couldn't fit header into mbuf");
1483 eh = mtod(m0, struct ether_header *);
1484 eth_type = eh->ether_type;
1485 if (eth_type == htons(ETHERTYPE_VLAN)) {
1486 struct ether_vlan_header *evh = (void *)eh;
1488 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
1490 eth_type = evh->evl_proto;
1492 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
1496 if (eth_type == htons(ETHERTYPE_IP)) {
1497 struct ip *ip = l3hdr;
1499 tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
1500 tcp = (struct tcphdr *)(ip + 1);
1501 } else if (eth_type == htons(ETHERTYPE_IPV6)) {
1502 struct ip6_hdr *ip6 = l3hdr;
1504 KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
1505 ("%s: CSUM_TSO with ip6_nxt %d",
1506 __func__, ip6->ip6_nxt));
1508 tso_info |= F_LSO_IPV6;
1509 tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
1510 tcp = (struct tcphdr *)(ip6 + 1);
1512 panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
1514 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
1515 hdr->lso_info = htonl(tso_info);
1517 if (__predict_false(mlen <= PIO_LEN)) {
1519 * pkt not undersized but fits in PIO_LEN
1520 * Indicates a TSO bug at the higher levels.
1523 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1524 flits = (mlen + 7) / 8 + 3;
1525 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1526 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1527 F_WR_SOP | F_WR_EOP | txqs.compl);
1528 wr_lo = htonl(V_WR_LEN(flits) |
1529 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1530 set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1532 ETHER_BPF_MTAP(pi->ifp, m0);
1533 wr_gen2(txd, txqs.gen);
1534 check_ring_tx_db(sc, txq, 0);
1540 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1542 GET_VTAG(cntrl, m0);
1543 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1544 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1545 cntrl |= F_TXPKT_IPCSUM_DIS;
1546 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
1547 CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1548 cntrl |= F_TXPKT_L4CSUM_DIS;
1549 cpl->cntrl = htonl(cntrl);
1550 cpl->len = htonl(mlen | 0x80000000);
1552 if (mlen <= PIO_LEN) {
1554 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1555 flits = (mlen + 7) / 8 + 2;
1557 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1558 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1559 F_WR_SOP | F_WR_EOP | txqs.compl);
1560 wr_lo = htonl(V_WR_LEN(flits) |
1561 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1562 set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1564 ETHER_BPF_MTAP(pi->ifp, m0);
1565 wr_gen2(txd, txqs.gen);
1566 check_ring_tx_db(sc, txq, 0);
1572 wrp = (struct work_request_hdr *)txd;
1573 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1574 make_sgl(sgp, segs, nsegs);
1576 sgl_flits = sgl_len(nsegs);
1578 ETHER_BPF_MTAP(pi->ifp, m0);
1580 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1581 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1582 wr_lo = htonl(V_WR_TID(txq->token));
1583 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1584 sgl_flits, wr_hi, wr_lo);
1585 check_ring_tx_db(sc, txq, 0);
1591 cxgb_tx_watchdog(void *arg)
1593 struct sge_qset *qs = arg;
1594 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1596 if (qs->coalescing != 0 &&
1597 (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1600 else if (qs->coalescing == 0 &&
1601 (txq->in_use >= cxgb_tx_coalesce_enable_start))
1603 if (TXQ_TRYLOCK(qs)) {
1604 qs->qs_flags |= QS_FLUSHING;
1605 cxgb_start_locked(qs);
1606 qs->qs_flags &= ~QS_FLUSHING;
1609 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
1610 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1611 qs, txq->txq_watchdog.c_cpu);
1615 cxgb_tx_timeout(void *arg)
1617 struct sge_qset *qs = arg;
1618 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1620 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1622 if (TXQ_TRYLOCK(qs)) {
1623 qs->qs_flags |= QS_TIMEOUT;
1624 cxgb_start_locked(qs);
1625 qs->qs_flags &= ~QS_TIMEOUT;
1631 cxgb_start_locked(struct sge_qset *qs)
1633 struct mbuf *m_head = NULL;
1634 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1635 struct port_info *pi = qs->port;
1636 struct ifnet *ifp = pi->ifp;
1638 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1639 reclaim_completed_tx(qs, 0, TXQ_ETH);
1641 if (!pi->link_config.link_ok) {
1645 TXQ_LOCK_ASSERT(qs);
1646 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
1647 pi->link_config.link_ok) {
1648 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1650 if (txq->size - txq->in_use <= TX_MAX_DESC)
1653 if ((m_head = cxgb_dequeue(qs)) == NULL)
1656 * Encapsulation can modify our pointer, and or make it
1657 * NULL on failure. In that event, we can't requeue.
1659 if (t3_encap(qs, &m_head) || m_head == NULL)
1665 if (txq->db_pending)
1666 check_ring_tx_db(pi->adapter, txq, 1);
1668 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1669 pi->link_config.link_ok)
1670 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1671 qs, txq->txq_timer.c_cpu);
1677 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
1679 struct port_info *pi = qs->port;
1680 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1681 struct buf_ring *br = txq->txq_mr;
1684 avail = txq->size - txq->in_use;
1685 TXQ_LOCK_ASSERT(qs);
1688 * We can only do a direct transmit if the following are true:
1689 * - we aren't coalescing (ring < 3/4 full)
1690 * - the link is up -- checked in caller
1691 * - there are no packets enqueued already
1692 * - there is space in hardware transmit queue
1694 if (check_pkt_coalesce(qs) == 0 &&
1695 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
1696 if (t3_encap(qs, &m)) {
1698 (error = drbr_enqueue(ifp, br, m)) != 0)
1701 if (txq->db_pending)
1702 check_ring_tx_db(pi->adapter, txq, 1);
1705 * We've bypassed the buf ring so we need to update
1706 * the stats directly
1708 txq->txq_direct_packets++;
1709 txq->txq_direct_bytes += m->m_pkthdr.len;
1711 } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1714 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1715 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1716 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1717 cxgb_start_locked(qs);
1718 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1719 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1720 qs, txq->txq_timer.c_cpu);
1725 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
1727 struct sge_qset *qs;
1728 struct port_info *pi = ifp->if_softc;
1729 int error, qidx = pi->first_qset;
1731 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
1732 ||(!pi->link_config.link_ok)) {
1737 /* check if flowid is set */
1738 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1739 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1741 qs = &pi->adapter->sge.qs[qidx];
1743 if (TXQ_TRYLOCK(qs)) {
1745 error = cxgb_transmit_locked(ifp, qs, m);
1748 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1753 cxgb_qflush(struct ifnet *ifp)
1756 * flush any enqueued mbufs in the buf_rings
1757 * and in the transmit queues
1764 * write_imm - write a packet into a Tx descriptor as immediate data
1765 * @d: the Tx descriptor to write
1767 * @len: the length of packet data to write as immediate data
1768 * @gen: the generation bit value to write
1770 * Writes a packet as immediate data into a Tx descriptor. The packet
1771 * contains a work request at its beginning. We must write the packet
1772 * carefully so the SGE doesn't read accidentally before it's written in
1775 static __inline void
1776 write_imm(struct tx_desc *d, caddr_t src,
1777 unsigned int len, unsigned int gen)
1779 struct work_request_hdr *from = (struct work_request_hdr *)src;
1780 struct work_request_hdr *to = (struct work_request_hdr *)d;
1781 uint32_t wr_hi, wr_lo;
1783 KASSERT(len <= WR_LEN && len >= sizeof(*from),
1784 ("%s: invalid len %d", __func__, len));
1786 memcpy(&to[1], &from[1], len - sizeof(*from));
1787 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1788 V_WR_BCNTLFLT(len & 7));
1789 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
1790 set_wr_hdr(to, wr_hi, wr_lo);
1796 * check_desc_avail - check descriptor availability on a send queue
1797 * @adap: the adapter
1799 * @m: the packet needing the descriptors
1800 * @ndesc: the number of Tx descriptors needed
1801 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1803 * Checks if the requested number of Tx descriptors is available on an
1804 * SGE send queue. If the queue is already suspended or not enough
1805 * descriptors are available the packet is queued for later transmission.
1806 * Must be called with the Tx queue locked.
1808 * Returns 0 if enough descriptors are available, 1 if there aren't
1809 * enough descriptors and the packet has been queued, and 2 if the caller
1810 * needs to retry because there weren't enough descriptors at the
1811 * beginning of the call but some freed up in the mean time.
1814 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1815 struct mbuf *m, unsigned int ndesc,
1819 * XXX We currently only use this for checking the control queue
1820 * the control queue is only used for binding qsets which happens
1821 * at init time so we are guaranteed enough descriptors
1823 if (__predict_false(!mbufq_empty(&q->sendq))) {
1824 addq_exit: mbufq_tail(&q->sendq, m);
1827 if (__predict_false(q->size - q->in_use < ndesc)) {
1829 struct sge_qset *qs = txq_to_qset(q, qid);
1831 setbit(&qs->txq_stopped, qid);
1832 if (should_restart_tx(q) &&
1833 test_and_clear_bit(qid, &qs->txq_stopped))
1844 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1845 * @q: the SGE control Tx queue
1847 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1848 * that send only immediate data (presently just the control queues) and
1849 * thus do not have any mbufs
1851 static __inline void
1852 reclaim_completed_tx_imm(struct sge_txq *q)
1854 unsigned int reclaim = q->processed - q->cleaned;
1856 q->in_use -= reclaim;
1857 q->cleaned += reclaim;
1861 * ctrl_xmit - send a packet through an SGE control Tx queue
1862 * @adap: the adapter
1863 * @q: the control queue
1866 * Send a packet through an SGE control Tx queue. Packets sent through
1867 * a control queue must fit entirely as immediate data in a single Tx
1868 * descriptor and have no page fragments.
1871 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1874 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1875 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1877 KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
1879 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1880 wrp->wrh_lo = htonl(V_WR_TID(q->token));
1883 again: reclaim_completed_tx_imm(q);
1885 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1886 if (__predict_false(ret)) {
1893 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1896 if (++q->pidx >= q->size) {
1902 t3_write_reg(adap, A_SG_KDOORBELL,
1903 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1911 * restart_ctrlq - restart a suspended control queue
1912 * @qs: the queue set cotaining the control queue
1914 * Resumes transmission on a suspended Tx control queue.
1917 restart_ctrlq(void *data, int npending)
1920 struct sge_qset *qs = (struct sge_qset *)data;
1921 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1922 adapter_t *adap = qs->port->adapter;
1925 again: reclaim_completed_tx_imm(q);
1927 while (q->in_use < q->size &&
1928 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1930 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1933 if (++q->pidx >= q->size) {
1939 if (!mbufq_empty(&q->sendq)) {
1940 setbit(&qs->txq_stopped, TXQ_CTRL);
1942 if (should_restart_tx(q) &&
1943 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1948 t3_write_reg(adap, A_SG_KDOORBELL,
1949 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1954 * Send a management message through control queue 0
1957 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1959 return ctrl_xmit(adap, &adap->sge.qs[0], m);
1963 * free_qset - free the resources of an SGE queue set
1964 * @sc: the controller owning the queue set
1967 * Release the HW and SW resources associated with an SGE queue set, such
1968 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1969 * queue set must be quiesced prior to calling this.
1972 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1976 reclaim_completed_tx(q, 0, TXQ_ETH);
1977 if (q->txq[TXQ_ETH].txq_mr != NULL)
1978 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
1979 if (q->txq[TXQ_ETH].txq_ifq != NULL) {
1980 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
1981 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
1984 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1985 if (q->fl[i].desc) {
1986 mtx_lock_spin(&sc->sge.reg_lock);
1987 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1988 mtx_unlock_spin(&sc->sge.reg_lock);
1989 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1990 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1992 bus_dma_tag_destroy(q->fl[i].desc_tag);
1993 bus_dma_tag_destroy(q->fl[i].entry_tag);
1995 if (q->fl[i].sdesc) {
1996 free_rx_bufs(sc, &q->fl[i]);
1997 free(q->fl[i].sdesc, M_DEVBUF);
2001 mtx_unlock(&q->lock);
2002 MTX_DESTROY(&q->lock);
2003 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2004 if (q->txq[i].desc) {
2005 mtx_lock_spin(&sc->sge.reg_lock);
2006 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2007 mtx_unlock_spin(&sc->sge.reg_lock);
2008 bus_dmamap_unload(q->txq[i].desc_tag,
2009 q->txq[i].desc_map);
2010 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2011 q->txq[i].desc_map);
2012 bus_dma_tag_destroy(q->txq[i].desc_tag);
2013 bus_dma_tag_destroy(q->txq[i].entry_tag);
2015 if (q->txq[i].sdesc) {
2016 free(q->txq[i].sdesc, M_DEVBUF);
2021 mtx_lock_spin(&sc->sge.reg_lock);
2022 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2023 mtx_unlock_spin(&sc->sge.reg_lock);
2025 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2026 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2028 bus_dma_tag_destroy(q->rspq.desc_tag);
2029 MTX_DESTROY(&q->rspq.lock);
2032 #if defined(INET6) || defined(INET)
2033 tcp_lro_free(&q->lro.ctrl);
2036 bzero(q, sizeof(*q));
2040 * t3_free_sge_resources - free SGE resources
2041 * @sc: the adapter softc
2043 * Frees resources used by the SGE queue sets.
2046 t3_free_sge_resources(adapter_t *sc, int nqsets)
2050 for (i = 0; i < nqsets; ++i) {
2051 TXQ_LOCK(&sc->sge.qs[i]);
2052 t3_free_qset(sc, &sc->sge.qs[i]);
2057 * t3_sge_start - enable SGE
2058 * @sc: the controller softc
2060 * Enables the SGE for DMAs. This is the last step in starting packet
2064 t3_sge_start(adapter_t *sc)
2066 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2070 * t3_sge_stop - disable SGE operation
2073 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2074 * from error interrupts) or from normal process context. In the latter
2075 * case it also disables any pending queue restart tasklets. Note that
2076 * if it is called in interrupt context it cannot disable the restart
2077 * tasklets as it cannot wait, however the tasklets will have no effect
2078 * since the doorbells are disabled and the driver will call this again
2079 * later from process context, at which time the tasklets will be stopped
2080 * if they are still running.
2083 t3_sge_stop(adapter_t *sc)
2087 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2092 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2093 nqsets += sc->port[i].nqsets;
2099 for (i = 0; i < nqsets; ++i) {
2100 struct sge_qset *qs = &sc->sge.qs[i];
2102 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2103 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2109 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
2110 * @adapter: the adapter
2111 * @q: the Tx queue to reclaim descriptors from
2112 * @reclaimable: the number of descriptors to reclaim
2113 * @m_vec_size: maximum number of buffers to reclaim
2114 * @desc_reclaimed: returns the number of descriptors reclaimed
2116 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2117 * Tx buffers. Called with the Tx queue lock held.
2119 * Returns number of buffers of reclaimed
2122 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2124 struct tx_sw_desc *txsd;
2125 unsigned int cidx, mask;
2126 struct sge_txq *q = &qs->txq[queue];
2129 T3_TRACE2(sc->tb[q->cntxt_id & 7],
2130 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2134 txsd = &q->sdesc[cidx];
2136 mtx_assert(&qs->lock, MA_OWNED);
2137 while (reclaimable--) {
2138 prefetch(q->sdesc[(cidx + 1) & mask].m);
2139 prefetch(q->sdesc[(cidx + 2) & mask].m);
2141 if (txsd->m != NULL) {
2142 if (txsd->flags & TX_SW_DESC_MAPPED) {
2143 bus_dmamap_unload(q->entry_tag, txsd->map);
2144 txsd->flags &= ~TX_SW_DESC_MAPPED;
2146 m_freem_list(txsd->m);
2152 if (++cidx == q->size) {
2162 * is_new_response - check if a response is newly written
2163 * @r: the response descriptor
2164 * @q: the response queue
2166 * Returns true if a response descriptor contains a yet unprocessed
2170 is_new_response(const struct rsp_desc *r,
2171 const struct sge_rspq *q)
2173 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2176 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2177 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2178 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2179 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2180 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2182 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2183 #define NOMEM_INTR_DELAY 2500
2187 * write_ofld_wr - write an offload work request
2188 * @adap: the adapter
2189 * @m: the packet to send
2191 * @pidx: index of the first Tx descriptor to write
2192 * @gen: the generation value to use
2193 * @ndesc: number of descriptors the packet will occupy
2195 * Write an offload work request to send the supplied packet. The packet
2196 * data already carry the work request with most fields populated.
2199 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
2200 unsigned int pidx, unsigned int gen, unsigned int ndesc)
2202 unsigned int sgl_flits, flits;
2203 int i, idx, nsegs, wrlen;
2204 struct work_request_hdr *from;
2205 struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
2206 struct tx_desc *d = &q->desc[pidx];
2207 struct txq_state txqs;
2208 struct sglist_seg *segs;
2209 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2212 from = (void *)(oh + 1); /* Start of WR within mbuf */
2213 wrlen = m->m_len - sizeof(*oh);
2215 if (!(oh->flags & F_HDR_SGL)) {
2216 write_imm(d, (caddr_t)from, wrlen, gen);
2219 * mbuf with "real" immediate tx data will be enqueue_wr'd by
2220 * t3_push_frames and freed in wr_ack. Others, like those sent
2221 * down by close_conn, t3_send_reset, etc. should be freed here.
2223 if (!(oh->flags & F_HDR_DF))
2228 memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
2232 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
2234 nsegs = sgl->sg_nseg;
2235 segs = sgl->sg_segs;
2236 for (idx = 0, i = 0; i < nsegs; i++) {
2237 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
2240 sgp->len[idx] = htobe32(segs[i].ss_len);
2241 sgp->addr[idx] = htobe64(segs[i].ss_paddr);
2249 sgl_flits = sgl_len(nsegs);
2254 write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
2255 from->wrh_hi, from->wrh_lo);
2259 * ofld_xmit - send a packet through an offload queue
2260 * @adap: the adapter
2261 * @q: the Tx offload queue
2264 * Send an offload packet through an SGE offload queue.
2267 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2271 unsigned int pidx, gen;
2272 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2273 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2275 ndesc = G_HDR_NDESC(oh->flags);
2278 again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
2279 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2280 if (__predict_false(ret)) {
2292 if (q->pidx >= q->size) {
2297 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2298 check_ring_tx_db(adap, q, 1);
2305 * restart_offloadq - restart a suspended offload queue
2306 * @qs: the queue set cotaining the offload queue
2308 * Resumes transmission on a suspended Tx offload queue.
2311 restart_offloadq(void *data, int npending)
2314 struct sge_qset *qs = data;
2315 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2316 adapter_t *adap = qs->port->adapter;
2320 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
2322 while ((m = mbufq_peek(&q->sendq)) != NULL) {
2323 unsigned int gen, pidx;
2324 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2325 unsigned int ndesc = G_HDR_NDESC(oh->flags);
2327 if (__predict_false(q->size - q->in_use < ndesc)) {
2328 setbit(&qs->txq_stopped, TXQ_OFLD);
2329 if (should_restart_tx(q) &&
2330 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2340 if (q->pidx >= q->size) {
2345 (void)mbufq_dequeue(&q->sendq);
2347 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2351 set_bit(TXQ_RUNNING, &q->flags);
2352 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2356 t3_write_reg(adap, A_SG_KDOORBELL,
2357 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2361 * t3_offload_tx - send an offload packet
2364 * Sends an offload packet. We use the packet priority to select the
2365 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2366 * should be sent as regular or control, bits 1-3 select the queue set.
2369 t3_offload_tx(struct adapter *sc, struct mbuf *m)
2371 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2372 struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
2374 if (oh->flags & F_HDR_CTRL) {
2375 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
2376 return (ctrl_xmit(sc, qs, m));
2378 return (ofld_xmit(sc, qs, m));
2383 restart_tx(struct sge_qset *qs)
2385 struct adapter *sc = qs->port->adapter;
2387 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2388 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2389 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2390 qs->txq[TXQ_OFLD].restarts++;
2391 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2394 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2395 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2396 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2397 qs->txq[TXQ_CTRL].restarts++;
2398 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2403 * t3_sge_alloc_qset - initialize an SGE queue set
2404 * @sc: the controller softc
2405 * @id: the queue set id
2406 * @nports: how many Ethernet ports will be using this queue set
2407 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2408 * @p: configuration parameters for this queue set
2409 * @ntxq: number of Tx queues for the queue set
2410 * @pi: port info for queue set
2412 * Allocate resources and initialize an SGE queue set. A queue set
2413 * comprises a response queue, two Rx free-buffer queues, and up to 3
2414 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2415 * queue, offload queue, and control queue.
2418 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2419 const struct qset_params *p, int ntxq, struct port_info *pi)
2421 struct sge_qset *q = &sc->sge.qs[id];
2424 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2428 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2429 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2430 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2433 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
2434 M_NOWAIT | M_ZERO)) == NULL) {
2435 device_printf(sc->dev, "failed to allocate ifq\n");
2438 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);
2439 callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
2440 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
2441 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
2442 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
2444 init_qset_cntxt(q, id);
2446 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2447 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2448 &q->fl[0].desc, &q->fl[0].sdesc,
2449 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2450 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2451 printf("error %d from alloc ring fl0\n", ret);
2455 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2456 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2457 &q->fl[1].desc, &q->fl[1].sdesc,
2458 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2459 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2460 printf("error %d from alloc ring fl1\n", ret);
2464 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2465 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2466 &q->rspq.desc_tag, &q->rspq.desc_map,
2467 NULL, NULL)) != 0) {
2468 printf("error %d from alloc ring rspq\n", ret);
2472 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2473 device_get_unit(sc->dev), irq_vec_idx);
2474 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2476 for (i = 0; i < ntxq; ++i) {
2477 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2479 if ((ret = alloc_ring(sc, p->txq_size[i],
2480 sizeof(struct tx_desc), sz,
2481 &q->txq[i].phys_addr, &q->txq[i].desc,
2482 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2483 &q->txq[i].desc_map,
2484 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2485 printf("error %d from alloc ring tx %i\n", ret, i);
2488 mbufq_init(&q->txq[i].sendq);
2490 q->txq[i].size = p->txq_size[i];
2494 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2496 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2497 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2498 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2500 q->fl[0].gen = q->fl[1].gen = 1;
2501 q->fl[0].size = p->fl_size;
2502 q->fl[1].size = p->jumbo_size;
2506 q->rspq.size = p->rspq_size;
2508 q->txq[TXQ_ETH].stop_thres = nports *
2509 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2511 q->fl[0].buf_size = MCLBYTES;
2512 q->fl[0].zone = zone_pack;
2513 q->fl[0].type = EXT_PACKET;
2515 if (p->jumbo_buf_size == MJUM16BYTES) {
2516 q->fl[1].zone = zone_jumbo16;
2517 q->fl[1].type = EXT_JUMBO16;
2518 } else if (p->jumbo_buf_size == MJUM9BYTES) {
2519 q->fl[1].zone = zone_jumbo9;
2520 q->fl[1].type = EXT_JUMBO9;
2521 } else if (p->jumbo_buf_size == MJUMPAGESIZE) {
2522 q->fl[1].zone = zone_jumbop;
2523 q->fl[1].type = EXT_JUMBOP;
2525 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
2529 q->fl[1].buf_size = p->jumbo_buf_size;
2531 /* Allocate and setup the lro_ctrl structure */
2532 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2533 #if defined(INET6) || defined(INET)
2534 ret = tcp_lro_init(&q->lro.ctrl);
2536 printf("error %d from tcp_lro_init\n", ret);
2540 q->lro.ctrl.ifp = pi->ifp;
2542 mtx_lock_spin(&sc->sge.reg_lock);
2543 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2544 q->rspq.phys_addr, q->rspq.size,
2545 q->fl[0].buf_size, 1, 0);
2547 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2551 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2552 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2553 q->fl[i].phys_addr, q->fl[i].size,
2554 q->fl[i].buf_size, p->cong_thres, 1,
2557 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2562 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2563 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2564 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2567 printf("error %d from t3_sge_init_ecntxt\n", ret);
2572 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2573 USE_GTS, SGE_CNTXT_OFLD, id,
2574 q->txq[TXQ_OFLD].phys_addr,
2575 q->txq[TXQ_OFLD].size, 0, 1, 0);
2577 printf("error %d from t3_sge_init_ecntxt\n", ret);
2583 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2585 q->txq[TXQ_CTRL].phys_addr,
2586 q->txq[TXQ_CTRL].size,
2587 q->txq[TXQ_CTRL].token, 1, 0);
2589 printf("error %d from t3_sge_init_ecntxt\n", ret);
2594 mtx_unlock_spin(&sc->sge.reg_lock);
2595 t3_update_qset_coalesce(q, p);
2597 refill_fl(sc, &q->fl[0], q->fl[0].size);
2598 refill_fl(sc, &q->fl[1], q->fl[1].size);
2599 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2601 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2602 V_NEWTIMER(q->rspq.holdoff_tmr));
2607 mtx_unlock_spin(&sc->sge.reg_lock);
2610 t3_free_qset(sc, q);
2616 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2617 * ethernet data. Hardware assistance with various checksums and any vlan tag
2618 * will also be taken into account here.
2621 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
2623 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2624 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2625 struct ifnet *ifp = pi->ifp;
2627 if (cpl->vlan_valid) {
2628 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2629 m->m_flags |= M_VLANTAG;
2632 m->m_pkthdr.rcvif = ifp;
2634 * adjust after conversion to mbuf chain
2636 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2637 m->m_len -= (sizeof(*cpl) + ethpad);
2638 m->m_data += (sizeof(*cpl) + ethpad);
2640 if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
2641 struct ether_header *eh = mtod(m, void *);
2644 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2645 struct ether_vlan_header *evh = mtod(m, void *);
2647 eh_type = evh->evl_proto;
2649 eh_type = eh->ether_type;
2651 if (ifp->if_capenable & IFCAP_RXCSUM &&
2652 eh_type == htons(ETHERTYPE_IP)) {
2653 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
2654 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2655 m->m_pkthdr.csum_data = 0xffff;
2656 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
2657 eh_type == htons(ETHERTYPE_IPV6)) {
2658 m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
2660 m->m_pkthdr.csum_data = 0xffff;
2666 * get_packet - return the next ingress packet buffer from a free list
2667 * @adap: the adapter that received the packet
2668 * @drop_thres: # of remaining buffers before we start dropping packets
2669 * @qs: the qset that the SGE free list holding the packet belongs to
2670 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2671 * @r: response descriptor
2673 * Get the next packet from a free list and complete setup of the
2674 * sk_buff. If the packet is small we make a copy and recycle the
2675 * original buffer, otherwise we use the original buffer itself. If a
2676 * positive drop threshold is supplied packets are dropped and their
2677 * buffers recycled if (a) the number of remaining buffers is under the
2678 * threshold and the packet is too big to copy, or (b) the packet should
2679 * be copied but there is no memory for the copy.
2682 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2683 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2686 unsigned int len_cq = ntohl(r->len_cq);
2687 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2688 int mask, cidx = fl->cidx;
2689 struct rx_sw_desc *sd = &fl->sdesc[cidx];
2690 uint32_t len = G_RSPD_LEN(len_cq);
2691 uint32_t flags = M_EXT;
2692 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2697 mask = fl->size - 1;
2698 prefetch(fl->sdesc[(cidx + 1) & mask].m);
2699 prefetch(fl->sdesc[(cidx + 2) & mask].m);
2700 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2701 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2704 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2706 if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2707 sopeop == RSPQ_SOP_EOP) {
2708 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
2710 cl = mtod(m, void *);
2711 memcpy(cl, sd->rxsd_cl, len);
2712 recycle_rx_buf(adap, fl, fl->cidx);
2713 m->m_pkthdr.len = m->m_len = len;
2715 mh->mh_head = mh->mh_tail = m;
2720 bus_dmamap_unload(fl->entry_tag, sd->map);
2724 if ((sopeop == RSPQ_SOP_EOP) ||
2725 (sopeop == RSPQ_SOP))
2727 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
2728 if (fl->zone == zone_pack) {
2730 * restore clobbered data pointer
2732 m->m_data = m->m_ext.ext_buf;
2734 m_cljset(m, cl, fl->type);
2743 mh->mh_head = mh->mh_tail = m;
2744 m->m_pkthdr.len = len;
2749 case RSPQ_NSOP_NEOP:
2750 if (mh->mh_tail == NULL) {
2751 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2755 mh->mh_tail->m_next = m;
2757 mh->mh_head->m_pkthdr.len += len;
2761 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2763 if (++fl->cidx == fl->size)
2770 * handle_rsp_cntrl_info - handles control information in a response
2771 * @qs: the queue set corresponding to the response
2772 * @flags: the response control flags
2774 * Handles the control information of an SGE response, such as GTS
2775 * indications and completion credits for the queue set's Tx queues.
2776 * HW coalesces credits, we don't do any extra SW coalescing.
2778 static __inline void
2779 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2781 unsigned int credits;
2784 if (flags & F_RSPD_TXQ0_GTS)
2785 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2787 credits = G_RSPD_TXQ0_CR(flags);
2789 qs->txq[TXQ_ETH].processed += credits;
2791 credits = G_RSPD_TXQ2_CR(flags);
2793 qs->txq[TXQ_CTRL].processed += credits;
2796 if (flags & F_RSPD_TXQ1_GTS)
2797 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2799 credits = G_RSPD_TXQ1_CR(flags);
2801 qs->txq[TXQ_OFLD].processed += credits;
2806 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2807 unsigned int sleeping)
2813 * process_responses - process responses from an SGE response queue
2814 * @adap: the adapter
2815 * @qs: the queue set to which the response queue belongs
2816 * @budget: how many responses can be processed in this round
2818 * Process responses from an SGE response queue up to the supplied budget.
2819 * Responses include received packets as well as credits and other events
2820 * for the queues that belong to the response queue's queue set.
2821 * A negative budget is effectively unlimited.
2823 * Additionally choose the interrupt holdoff time for the next interrupt
2824 * on this queue. If the system is under memory shortage use a fairly
2825 * long delay to help recovery.
2828 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2830 struct sge_rspq *rspq = &qs->rspq;
2831 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2832 int budget_left = budget;
2833 unsigned int sleeping = 0;
2834 #if defined(INET6) || defined(INET)
2835 int lro_enabled = qs->lro.enabled;
2837 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2839 struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
2841 static int last_holdoff = 0;
2842 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2843 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2844 last_holdoff = rspq->holdoff_tmr;
2847 rspq->next_holdoff = rspq->holdoff_tmr;
2849 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2850 int eth, eop = 0, ethpad = 0;
2851 uint32_t flags = ntohl(r->flags);
2852 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2853 uint8_t opcode = r->rss_hdr.opcode;
2855 eth = (opcode == CPL_RX_PKT);
2857 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2861 printf("async notification\n");
2863 if (mh->mh_head == NULL) {
2864 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
2867 m = m_gethdr(M_NOWAIT, MT_DATA);
2872 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2873 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2874 *mtod(m, char *) = CPL_ASYNC_NOTIF;
2875 opcode = CPL_ASYNC_NOTIF;
2877 rspq->async_notif++;
2879 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2880 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
2884 rspq->next_holdoff = NOMEM_INTR_DELAY;
2888 if (mh->mh_head == NULL)
2891 mh->mh_tail->m_next = m;
2894 get_imm_packet(adap, r, m);
2895 mh->mh_head->m_pkthdr.len += m->m_len;
2898 } else if (r->len_cq) {
2899 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2901 eop = get_packet(adap, drop_thresh, qs, mh, r);
2903 if (r->rss_hdr.hash_type && !adap->timestamp) {
2904 M_HASHTYPE_SET(mh->mh_head, M_HASHTYPE_OPAQUE);
2905 mh->mh_head->m_pkthdr.flowid = rss_hash;
2914 if (flags & RSPD_CTRL_MASK) {
2915 sleeping |= flags & RSPD_GTS_MASK;
2916 handle_rsp_cntrl_info(qs, flags);
2920 rspq->offload_pkts++;
2922 adap->cpl_handler[opcode](qs, r, mh->mh_head);
2924 m_freem(mh->mh_head);
2927 } else if (eth && eop) {
2928 struct mbuf *m = mh->mh_head;
2930 t3_rx_eth(adap, m, ethpad);
2933 * The T304 sends incoming packets on any qset. If LRO
2934 * is also enabled, we could end up sending packet up
2935 * lro_ctrl->ifp's input. That is incorrect.
2937 * The mbuf's rcvif was derived from the cpl header and
2938 * is accurate. Skip LRO and just use that.
2940 #if defined(INET6) || defined(INET)
2941 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
2943 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
2944 && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
2946 /* successfully queue'd for LRO */
2951 * LRO not enabled, packet unsuitable for LRO,
2952 * or unable to queue. Pass it up right now in
2955 struct ifnet *ifp = m->m_pkthdr.rcvif;
2956 (*ifp->if_input)(ifp, m);
2963 if (__predict_false(++rspq->cidx == rspq->size)) {
2969 if (++rspq->credits >= 64) {
2970 refill_rspq(adap, rspq, rspq->credits);
2973 __refill_fl_lt(adap, &qs->fl[0], 32);
2974 __refill_fl_lt(adap, &qs->fl[1], 32);
2978 #if defined(INET6) || defined(INET)
2980 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
2981 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
2982 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
2983 tcp_lro_flush(lro_ctrl, queued);
2988 check_ring_db(adap, qs, sleeping);
2990 mb(); /* commit Tx queue processed updates */
2991 if (__predict_false(qs->txq_stopped > 1))
2994 __refill_fl_lt(adap, &qs->fl[0], 512);
2995 __refill_fl_lt(adap, &qs->fl[1], 512);
2996 budget -= budget_left;
3001 * A helper function that processes responses and issues GTS.
3004 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3007 static int last_holdoff = 0;
3009 work = process_responses(adap, rspq_to_qset(rq), -1);
3011 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3012 printf("next_holdoff=%d\n", rq->next_holdoff);
3013 last_holdoff = rq->next_holdoff;
3015 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3016 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3023 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3024 * Handles data events from SGE response queues as well as error and other
3025 * async events as they all use the same interrupt pin. We use one SGE
3026 * response queue per port in this mode and protect all response queues with
3030 t3b_intr(void *data)
3033 adapter_t *adap = data;
3034 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3036 t3_write_reg(adap, A_PL_CLI, 0);
3037 map = t3_read_reg(adap, A_SG_DATA_INTR);
3042 if (__predict_false(map & F_ERRINTR)) {
3043 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3044 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3045 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3048 mtx_lock(&q0->lock);
3049 for_each_port(adap, i)
3051 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3052 mtx_unlock(&q0->lock);
3056 * The MSI interrupt handler. This needs to handle data events from SGE
3057 * response queues as well as error and other async events as they all use
3058 * the same MSI vector. We use one SGE response queue per port in this mode
3059 * and protect all response queues with queue 0's lock.
3062 t3_intr_msi(void *data)
3064 adapter_t *adap = data;
3065 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3066 int i, new_packets = 0;
3068 mtx_lock(&q0->lock);
3070 for_each_port(adap, i)
3071 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3073 mtx_unlock(&q0->lock);
3074 if (new_packets == 0) {
3075 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3076 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3077 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3082 t3_intr_msix(void *data)
3084 struct sge_qset *qs = data;
3085 adapter_t *adap = qs->port->adapter;
3086 struct sge_rspq *rspq = &qs->rspq;
3088 if (process_responses_gts(adap, rspq) == 0)
3089 rspq->unhandled_irqs++;
3092 #define QDUMP_SBUF_SIZE 32 * 400
3094 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3096 struct sge_rspq *rspq;
3097 struct sge_qset *qs;
3098 int i, err, dump_end, idx;
3100 struct rsp_desc *rspd;
3104 qs = rspq_to_qset(rspq);
3105 if (rspq->rspq_dump_count == 0)
3107 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3109 "dump count is too large %d\n", rspq->rspq_dump_count);
3110 rspq->rspq_dump_count = 0;
3113 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3115 "dump start of %d is greater than queue size\n",
3116 rspq->rspq_dump_start);
3117 rspq->rspq_dump_start = 0;
3120 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3123 err = sysctl_wire_old_buffer(req, 0);
3126 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3128 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3129 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3130 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3131 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3132 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3134 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3135 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3137 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3138 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3139 idx = i & (RSPQ_Q_SIZE-1);
3141 rspd = &rspq->desc[idx];
3142 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3143 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3144 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3145 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3146 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3147 be32toh(rspd->len_cq), rspd->intr_gen);
3150 err = sbuf_finish(sb);
3151 /* Output a trailing NUL. */
3153 err = SYSCTL_OUT(req, "", 1);
3159 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3161 struct sge_txq *txq;
3162 struct sge_qset *qs;
3163 int i, j, err, dump_end;
3165 struct tx_desc *txd;
3166 uint32_t *WR, wr_hi, wr_lo, gen;
3170 qs = txq_to_qset(txq, TXQ_ETH);
3171 if (txq->txq_dump_count == 0) {
3174 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3176 "dump count is too large %d\n", txq->txq_dump_count);
3177 txq->txq_dump_count = 1;
3180 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3182 "dump start of %d is greater than queue size\n",
3183 txq->txq_dump_start);
3184 txq->txq_dump_start = 0;
3187 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3190 err = sysctl_wire_old_buffer(req, 0);
3193 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3195 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3196 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3197 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3198 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3199 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3200 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3201 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3202 txq->txq_dump_start,
3203 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3205 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3206 for (i = txq->txq_dump_start; i < dump_end; i++) {
3207 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3208 WR = (uint32_t *)txd->flit;
3209 wr_hi = ntohl(WR[0]);
3210 wr_lo = ntohl(WR[1]);
3211 gen = G_WR_GEN(wr_lo);
3213 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3215 for (j = 2; j < 30; j += 4)
3216 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3217 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3220 err = sbuf_finish(sb);
3221 /* Output a trailing NUL. */
3223 err = SYSCTL_OUT(req, "", 1);
3229 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3231 struct sge_txq *txq;
3232 struct sge_qset *qs;
3233 int i, j, err, dump_end;
3235 struct tx_desc *txd;
3236 uint32_t *WR, wr_hi, wr_lo, gen;
3239 qs = txq_to_qset(txq, TXQ_CTRL);
3240 if (txq->txq_dump_count == 0) {
3243 if (txq->txq_dump_count > 256) {
3245 "dump count is too large %d\n", txq->txq_dump_count);
3246 txq->txq_dump_count = 1;
3249 if (txq->txq_dump_start > 255) {
3251 "dump start of %d is greater than queue size\n",
3252 txq->txq_dump_start);
3253 txq->txq_dump_start = 0;
3257 err = sysctl_wire_old_buffer(req, 0);
3260 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3261 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3262 txq->txq_dump_start,
3263 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3265 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3266 for (i = txq->txq_dump_start; i < dump_end; i++) {
3267 txd = &txq->desc[i & (255)];
3268 WR = (uint32_t *)txd->flit;
3269 wr_hi = ntohl(WR[0]);
3270 wr_lo = ntohl(WR[1]);
3271 gen = G_WR_GEN(wr_lo);
3273 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3275 for (j = 2; j < 30; j += 4)
3276 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3277 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3280 err = sbuf_finish(sb);
3281 /* Output a trailing NUL. */
3283 err = SYSCTL_OUT(req, "", 1);
3289 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3291 adapter_t *sc = arg1;
3292 struct qset_params *qsp = &sc->params.sge.qset[0];
3294 struct sge_qset *qs;
3295 int i, j, err, nqsets = 0;
3298 if ((sc->flags & FULL_INIT_DONE) == 0)
3301 coalesce_usecs = qsp->coalesce_usecs;
3302 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3307 if (coalesce_usecs == qsp->coalesce_usecs)
3310 for (i = 0; i < sc->params.nports; i++)
3311 for (j = 0; j < sc->port[i].nqsets; j++)
3314 coalesce_usecs = max(1, coalesce_usecs);
3316 for (i = 0; i < nqsets; i++) {
3317 qs = &sc->sge.qs[i];
3318 qsp = &sc->params.sge.qset[i];
3319 qsp->coalesce_usecs = coalesce_usecs;
3321 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3322 &sc->sge.qs[0].rspq.lock;
3325 t3_update_qset_coalesce(qs, qsp);
3326 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3327 V_NEWTIMER(qs->rspq.holdoff_tmr));
3335 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
3337 adapter_t *sc = arg1;
3340 if ((sc->flags & FULL_INIT_DONE) == 0)
3343 timestamp = sc->timestamp;
3344 rc = sysctl_handle_int(oidp, ×tamp, arg2, req);
3349 if (timestamp != sc->timestamp) {
3350 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
3351 timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
3352 sc->timestamp = timestamp;
3359 t3_add_attach_sysctls(adapter_t *sc)
3361 struct sysctl_ctx_list *ctx;
3362 struct sysctl_oid_list *children;
3364 ctx = device_get_sysctl_ctx(sc->dev);
3365 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3367 /* random information */
3368 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3370 CTLFLAG_RD, sc->fw_version,
3371 0, "firmware version");
3372 SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3374 CTLFLAG_RD, &sc->params.rev,
3376 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3378 CTLFLAG_RD, sc->port_types,
3379 0, "type of ports");
3380 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3382 CTLFLAG_RW, &cxgb_debug,
3383 0, "enable verbose debugging output");
3384 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3385 CTLFLAG_RD, &sc->tunq_coalesce,
3386 "#tunneled packets freed");
3387 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3389 CTLFLAG_RD, &txq_fills,
3390 0, "#times txq overrun");
3391 SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3393 CTLFLAG_RD, &sc->params.vpd.cclk,
3394 0, "core clock frequency (in KHz)");
3398 static const char *rspq_name = "rspq";
3399 static const char *txq_names[] =
3407 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3409 struct port_info *p = arg1;
3415 cxgb_refresh_stats(p);
3416 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3418 return (sysctl_handle_64(oidp, parg, 0, req));
3422 t3_add_configured_sysctls(adapter_t *sc)
3424 struct sysctl_ctx_list *ctx;
3425 struct sysctl_oid_list *children;
3428 ctx = device_get_sysctl_ctx(sc->dev);
3429 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3431 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3433 CTLTYPE_INT|CTLFLAG_RW, sc,
3434 0, t3_set_coalesce_usecs,
3435 "I", "interrupt coalescing timer (us)");
3437 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3439 CTLTYPE_INT | CTLFLAG_RW, sc,
3440 0, t3_pkt_timestamp,
3441 "I", "provide packet timestamp instead of connection hash");
3443 for (i = 0; i < sc->params.nports; i++) {
3444 struct port_info *pi = &sc->port[i];
3445 struct sysctl_oid *poid;
3446 struct sysctl_oid_list *poidlist;
3447 struct mac_stats *mstats = &pi->mac.stats;
3449 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3450 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3451 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3452 poidlist = SYSCTL_CHILDREN(poid);
3453 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
3454 "nqsets", CTLFLAG_RD, &pi->nqsets,
3457 for (j = 0; j < pi->nqsets; j++) {
3458 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3459 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3460 *ctrlqpoid, *lropoid;
3461 struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3462 *txqpoidlist, *ctrlqpoidlist,
3464 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3466 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3468 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3469 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3470 qspoidlist = SYSCTL_CHILDREN(qspoid);
3472 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3473 CTLFLAG_RD, &qs->fl[0].empty, 0,
3474 "freelist #0 empty");
3475 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3476 CTLFLAG_RD, &qs->fl[1].empty, 0,
3477 "freelist #1 empty");
3479 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3480 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3481 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3483 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3484 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3485 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3487 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3488 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3489 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3491 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3492 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3493 lropoidlist = SYSCTL_CHILDREN(lropoid);
3495 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3496 CTLFLAG_RD, &qs->rspq.size,
3497 0, "#entries in response queue");
3498 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3499 CTLFLAG_RD, &qs->rspq.cidx,
3500 0, "consumer index");
3501 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3502 CTLFLAG_RD, &qs->rspq.credits,
3504 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3505 CTLFLAG_RD, &qs->rspq.starved,
3506 0, "#times starved");
3507 SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3508 CTLFLAG_RD, &qs->rspq.phys_addr,
3509 "physical_address_of the queue");
3510 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3511 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3512 0, "start rspq dump entry");
3513 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3514 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3515 0, "#rspq entries to dump");
3516 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3517 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3518 0, t3_dump_rspq, "A", "dump of the response queue");
3520 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
3521 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
3522 "#tunneled packets dropped");
3523 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3524 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3525 0, "#tunneled packets waiting to be sent");
3527 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3528 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3529 0, "#tunneled packets queue producer index");
3530 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3531 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3532 0, "#tunneled packets queue consumer index");
3534 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
3535 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3536 0, "#tunneled packets processed by the card");
3537 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3538 CTLFLAG_RD, &txq->cleaned,
3539 0, "#tunneled packets cleaned");
3540 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3541 CTLFLAG_RD, &txq->in_use,
3542 0, "#tunneled packet slots in use");
3543 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
3544 CTLFLAG_RD, &txq->txq_frees,
3545 "#tunneled packets freed");
3546 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3547 CTLFLAG_RD, &txq->txq_skipped,
3548 0, "#tunneled packet descriptors skipped");
3549 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3550 CTLFLAG_RD, &txq->txq_coalesced,
3551 "#tunneled packets coalesced");
3552 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3553 CTLFLAG_RD, &txq->txq_enqueued,
3554 0, "#tunneled packets enqueued to hardware");
3555 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3556 CTLFLAG_RD, &qs->txq_stopped,
3557 0, "tx queues stopped");
3558 SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3559 CTLFLAG_RD, &txq->phys_addr,
3560 "physical_address_of the queue");
3561 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3562 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3563 0, "txq generation");
3564 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3565 CTLFLAG_RD, &txq->cidx,
3566 0, "hardware queue cidx");
3567 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3568 CTLFLAG_RD, &txq->pidx,
3569 0, "hardware queue pidx");
3570 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3571 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3572 0, "txq start idx for dump");
3573 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3574 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3575 0, "txq #entries to dump");
3576 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3577 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3578 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3580 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3581 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3582 0, "ctrlq start idx for dump");
3583 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3584 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3585 0, "ctrl #entries to dump");
3586 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3587 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3588 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3590 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3591 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3592 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3593 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3594 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3595 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3596 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3597 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3600 /* Now add a node for mac stats. */
3601 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3602 CTLFLAG_RD, NULL, "MAC statistics");
3603 poidlist = SYSCTL_CHILDREN(poid);
3606 * We (ab)use the length argument (arg2) to pass on the offset
3607 * of the data that we are interested in. This is only required
3608 * for the quad counters that are updated from the hardware (we
3609 * make sure that we return the latest value).
3610 * sysctl_handle_macstat first updates *all* the counters from
3611 * the hardware, and then returns the latest value of the
3612 * requested counter. Best would be to update only the
3613 * requested counter from hardware, but t3_mac_update_stats()
3614 * hides all the register details and we don't want to dive into
3617 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3618 (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3619 sysctl_handle_macstat, "QU", 0)
3620 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3621 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3622 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3623 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3624 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3625 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3626 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3627 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3628 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3629 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3630 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3631 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3632 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3633 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3634 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3635 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3636 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3637 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3638 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3639 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3640 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3641 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3642 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3643 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3644 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3645 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3646 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3647 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3648 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3649 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3650 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3651 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3652 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3653 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3654 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3655 CXGB_SYSCTL_ADD_QUAD(rx_short);
3656 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3657 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3658 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3659 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3660 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3661 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3662 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3663 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3664 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3665 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3666 #undef CXGB_SYSCTL_ADD_QUAD
3668 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3669 CTLFLAG_RD, &mstats->a, 0)
3670 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3671 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3672 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3673 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3674 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3675 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3676 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3677 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3678 CXGB_SYSCTL_ADD_ULONG(num_resets);
3679 CXGB_SYSCTL_ADD_ULONG(link_faults);
3680 #undef CXGB_SYSCTL_ADD_ULONG
3685 * t3_get_desc - dump an SGE descriptor for debugging purposes
3686 * @qs: the queue set
3687 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3688 * @idx: the descriptor index in the queue
3689 * @data: where to dump the descriptor contents
3691 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3692 * size of the descriptor.
3695 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3696 unsigned char *data)
3702 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3704 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3705 return sizeof(struct tx_desc);
3709 if (!qs->rspq.desc || idx >= qs->rspq.size)
3711 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3712 return sizeof(struct rsp_desc);
3716 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3718 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3719 return sizeof(struct rx_desc);