1 /**************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 Copyright (c) 2007-2009, Chelsio Inc.
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
13 2. Neither the name of the Chelsio Corporation nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
29 ***************************************************************************/
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include "opt_inet6.h"
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/module.h>
43 #include <machine/bus.h>
44 #include <machine/resource.h>
46 #include <sys/queue.h>
47 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
52 #include <sys/sched.h>
54 #include <sys/systm.h>
55 #include <sys/syslog.h>
56 #include <sys/socket.h>
57 #include <sys/sglist.h>
60 #include <net/if_var.h>
62 #include <net/ethernet.h>
63 #include <net/if_vlan_var.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip6.h>
69 #include <netinet/tcp.h>
71 #include <dev/pci/pcireg.h>
72 #include <dev/pci/pcivar.h>
77 #include <cxgb_include.h>
81 int multiq_tx_enable = 1;
84 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
87 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
88 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
90 "size of per-queue mbuf ring");
92 static int cxgb_tx_coalesce_force = 0;
93 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN,
94 &cxgb_tx_coalesce_force, 0,
95 "coalesce small packets into a single work request regardless of ring state");
97 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1
98 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
99 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2
100 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5
101 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5
102 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2
103 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6
106 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
107 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN,
108 &cxgb_tx_coalesce_enable_start, 0,
109 "coalesce enable threshold");
110 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
111 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN,
112 &cxgb_tx_coalesce_enable_stop, 0,
113 "coalesce disable threshold");
114 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN,
116 &cxgb_tx_reclaim_threshold, 0,
117 "tx cleaning minimum threshold");
120 * XXX don't re-enable this until TOE stops assuming
123 static int recycle_enable = 0;
125 extern int cxgb_use_16k_clusters;
126 extern int nmbjumbop;
127 extern int nmbjumbo9;
128 extern int nmbjumbo16;
132 #define SGE_RX_SM_BUF_SIZE 1536
133 #define SGE_RX_DROP_THRES 16
134 #define SGE_RX_COPY_THRES 128
137 * Period of the Tx buffer reclaim timer. This timer does not need to run
138 * frequently as Tx buffers are usually reclaimed by new Tx packets.
140 #define TX_RECLAIM_PERIOD (hz >> 1)
143 * Values for sge_txq.flags
146 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
147 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
151 uint64_t flit[TX_DESC_FLITS];
161 struct rsp_desc { /* response queue descriptor */
162 struct rss_header rss_hdr;
165 uint8_t imm_data[47];
169 #define RX_SW_DESC_MAP_CREATED (1 << 0)
170 #define TX_SW_DESC_MAP_CREATED (1 << 1)
171 #define RX_SW_DESC_INUSE (1 << 3)
172 #define TX_SW_DESC_MAPPED (1 << 4)
174 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
175 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
176 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
177 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
179 struct tx_sw_desc { /* SW state per Tx descriptor */
185 struct rx_sw_desc { /* SW state per Rx descriptor */
198 struct refill_fl_cb_arg {
200 bus_dma_segment_t seg;
206 * Maps a number of flits to the number of Tx descriptors that can hold them.
209 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
211 * HW allows up to 4 descriptors to be combined into a WR.
213 static uint8_t flit_desc_map[] = {
215 #if SGE_NUM_GENBITS == 1
216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
218 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
219 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
220 #elif SGE_NUM_GENBITS == 2
221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
222 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
223 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
224 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
226 # error "SGE_NUM_GENBITS must be 1 or 2"
230 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED)
231 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock)
232 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock)
233 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock)
234 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
235 #define TXQ_RING_NEEDS_ENQUEUE(qs) \
236 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
237 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
238 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \
239 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
240 #define TXQ_RING_DEQUEUE(qs) \
241 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
245 static void sge_timer_cb(void *arg);
246 static void sge_timer_reclaim(void *arg, int ncount);
247 static void sge_txq_reclaim_handler(void *arg, int ncount);
248 static void cxgb_start_locked(struct sge_qset *qs);
251 * XXX need to cope with bursty scheduling by looking at a wider
252 * window than we are now for determining the need for coalescing
255 static __inline uint64_t
256 check_pkt_coalesce(struct sge_qset *qs)
262 if (__predict_false(cxgb_tx_coalesce_force))
264 txq = &qs->txq[TXQ_ETH];
265 sc = qs->port->adapter;
266 fill = &sc->tunq_fill[qs->idx];
268 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
269 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
270 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
271 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
273 * if the hardware transmit queue is more than 1/8 full
274 * we mark it as coalescing - we drop back from coalescing
275 * when we go below 1/32 full and there are no packets enqueued,
276 * this provides us with some degree of hysteresis
278 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
279 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
281 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
284 return (sc->tunq_coalesce);
289 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
292 #if _BYTE_ORDER == _LITTLE_ENDIAN
294 wr_hilo |= (((uint64_t)wr_lo)<<32);
297 wr_hilo |= (((uint64_t)wr_hi)<<32);
299 wrp->wrh_hilo = wr_hilo;
303 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
312 struct coalesce_info {
318 coalesce_check(struct mbuf *m, void *arg)
320 struct coalesce_info *ci = arg;
321 int *count = &ci->count;
322 int *nbytes = &ci->nbytes;
324 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
325 (*count < 7) && (m->m_next == NULL))) {
334 cxgb_dequeue(struct sge_qset *qs)
336 struct mbuf *m, *m_head, *m_tail;
337 struct coalesce_info ci;
340 if (check_pkt_coalesce(qs) == 0)
341 return TXQ_RING_DEQUEUE(qs);
343 m_head = m_tail = NULL;
344 ci.count = ci.nbytes = 0;
346 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
347 if (m_head == NULL) {
349 } else if (m != NULL) {
350 m_tail->m_nextpkt = m;
355 panic("trying to coalesce %d packets in to one WR", ci.count);
360 * reclaim_completed_tx - reclaims completed Tx descriptors
361 * @adapter: the adapter
362 * @q: the Tx queue to reclaim completed descriptors from
364 * Reclaims Tx descriptors that the SGE has indicated it has processed,
365 * and frees the associated buffers if possible. Called with the Tx
369 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
371 struct sge_txq *q = &qs->txq[queue];
372 int reclaim = desc_reclaimable(q);
374 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
375 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
376 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
378 if (reclaim < reclaim_min)
381 mtx_assert(&qs->lock, MA_OWNED);
383 t3_free_tx_desc(qs, reclaim, queue);
384 q->cleaned += reclaim;
385 q->in_use -= reclaim;
387 if (isset(&qs->txq_stopped, TXQ_ETH))
388 clrbit(&qs->txq_stopped, TXQ_ETH);
395 cxgb_netdump_poll_tx(struct sge_qset *qs)
398 return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH));
403 * should_restart_tx - are there enough resources to restart a Tx queue?
406 * Checks if there are enough descriptors to restart a suspended Tx queue.
409 should_restart_tx(const struct sge_txq *q)
411 unsigned int r = q->processed - q->cleaned;
413 return q->in_use - r < (q->size >> 1);
417 * t3_sge_init - initialize SGE
419 * @p: the SGE parameters
421 * Performs SGE initialization needed every time after a chip reset.
422 * We do not initialize any of the queue sets here, instead the driver
423 * top-level must request those individually. We also do not enable DMA
424 * here, that should be done after the queues have been set up.
427 t3_sge_init(adapter_t *adap, struct sge_params *p)
431 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
433 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
434 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
435 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
436 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
437 #if SGE_NUM_GENBITS == 1
438 ctrl |= F_EGRGENCTRL;
440 if (adap->params.rev > 0) {
441 if (!(adap->flags & (USING_MSIX | USING_MSI)))
442 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
444 t3_write_reg(adap, A_SG_CONTROL, ctrl);
445 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
446 V_LORCQDRBTHRSH(512));
447 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
448 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
449 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
450 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
451 adap->params.rev < T3_REV_C ? 1000 : 500);
452 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
453 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
454 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
455 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
456 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
461 * sgl_len - calculates the size of an SGL of the given capacity
462 * @n: the number of SGL entries
464 * Calculates the number of flits needed for a scatter/gather list that
465 * can hold the given number of entries.
467 static __inline unsigned int
468 sgl_len(unsigned int n)
470 return ((3 * n) / 2 + (n & 1));
474 * get_imm_packet - return the next ingress packet buffer from a response
475 * @resp: the response descriptor containing the packet data
477 * Return a packet containing the immediate data of the given response.
480 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
483 if (resp->rss_hdr.opcode == CPL_RX_DATA) {
484 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
485 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
486 } else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
487 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
488 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
490 m->m_len = IMMED_PKT_SIZE;
491 m->m_ext.ext_buf = NULL;
492 m->m_ext.ext_type = 0;
493 memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len);
497 static __inline u_int
498 flits_to_desc(u_int n)
500 return (flit_desc_map[n]);
503 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
504 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
505 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
506 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
508 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
509 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
513 * t3_sge_err_intr_handler - SGE async event interrupt handler
514 * @adapter: the adapter
516 * Interrupt handler for SGE asynchronous (non-data) events.
519 t3_sge_err_intr_handler(adapter_t *adapter)
521 unsigned int v, status;
523 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
524 if (status & SGE_PARERR)
525 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
526 status & SGE_PARERR);
527 if (status & SGE_FRAMINGERR)
528 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
529 status & SGE_FRAMINGERR);
530 if (status & F_RSPQCREDITOVERFOW)
531 CH_ALERT(adapter, "SGE response queue credit overflow\n");
533 if (status & F_RSPQDISABLED) {
534 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
537 "packet delivered to disabled response queue (0x%x)\n",
538 (v >> S_RSPQ0DISABLED) & 0xff);
541 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
542 if (status & SGE_FATALERR)
543 t3_fatal_err(adapter);
547 t3_sge_prep(adapter_t *adap, struct sge_params *p)
549 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
551 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
552 nqsets *= adap->params.nports;
554 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
556 while (!powerof2(fl_q_size))
559 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
562 #if __FreeBSD_version >= 700111
564 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
565 jumbo_buf_size = MJUM16BYTES;
567 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
568 jumbo_buf_size = MJUM9BYTES;
571 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
572 jumbo_buf_size = MJUMPAGESIZE;
574 while (!powerof2(jumbo_q_size))
577 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
578 device_printf(adap->dev,
579 "Insufficient clusters and/or jumbo buffers.\n");
581 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
583 for (i = 0; i < SGE_QSETS; ++i) {
584 struct qset_params *q = p->qset + i;
586 if (adap->params.nports > 2) {
587 q->coalesce_usecs = 50;
590 q->coalesce_usecs = 10;
592 q->coalesce_usecs = 5;
596 q->rspq_size = RSPQ_Q_SIZE;
597 q->fl_size = fl_q_size;
598 q->jumbo_size = jumbo_q_size;
599 q->jumbo_buf_size = jumbo_buf_size;
600 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
601 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
602 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
608 t3_sge_alloc(adapter_t *sc)
611 /* The parent tag. */
612 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
613 1, 0, /* algnmnt, boundary */
614 BUS_SPACE_MAXADDR, /* lowaddr */
615 BUS_SPACE_MAXADDR, /* highaddr */
616 NULL, NULL, /* filter, filterarg */
617 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
618 BUS_SPACE_UNRESTRICTED, /* nsegments */
619 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
621 NULL, NULL, /* lock, lockarg */
623 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
628 * DMA tag for normal sized RX frames
630 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
631 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
632 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
633 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
638 * DMA tag for jumbo sized RX frames.
640 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
641 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
642 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
643 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
648 * DMA tag for TX frames.
650 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
651 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
652 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
653 NULL, NULL, &sc->tx_dmat)) {
654 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
662 t3_sge_free(struct adapter * sc)
665 if (sc->tx_dmat != NULL)
666 bus_dma_tag_destroy(sc->tx_dmat);
668 if (sc->rx_jumbo_dmat != NULL)
669 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
671 if (sc->rx_dmat != NULL)
672 bus_dma_tag_destroy(sc->rx_dmat);
674 if (sc->parent_dmat != NULL)
675 bus_dma_tag_destroy(sc->parent_dmat);
681 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
684 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
685 qs->rspq.polling = 0 /* p->polling */;
688 #if !defined(__i386__) && !defined(__amd64__)
690 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
692 struct refill_fl_cb_arg *cb_arg = arg;
694 cb_arg->error = error;
695 cb_arg->seg = segs[0];
701 * refill_fl - refill an SGE free-buffer list
702 * @sc: the controller softc
703 * @q: the free-list to refill
704 * @n: the number of new buffers to allocate
706 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
707 * The caller must assure that @n does not exceed the queue's capacity.
710 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
712 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
713 struct rx_desc *d = &q->desc[q->pidx];
714 struct refill_fl_cb_arg cb_arg;
722 * We allocate an uninitialized mbuf + cluster, mbuf is
723 * initialized after rx.
725 if (q->zone == zone_pack) {
726 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
728 cl = m->m_ext.ext_buf;
730 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
732 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
733 uma_zfree(q->zone, cl);
737 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
738 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
739 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
740 uma_zfree(q->zone, cl);
743 sd->flags |= RX_SW_DESC_MAP_CREATED;
745 #if !defined(__i386__) && !defined(__amd64__)
746 err = bus_dmamap_load(q->entry_tag, sd->map,
747 cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
749 if (err != 0 || cb_arg.error) {
750 if (q->zone != zone_pack)
751 uma_zfree(q->zone, cl);
756 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
758 sd->flags |= RX_SW_DESC_INUSE;
761 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
762 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
763 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
764 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
769 if (++q->pidx == q->size) {
780 if (q->db_pending >= 32) {
782 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
788 * free_rx_bufs - free the Rx buffers on an SGE free list
789 * @sc: the controle softc
790 * @q: the SGE free list to clean up
792 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
793 * this queue should be stopped before calling this function.
796 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
798 u_int cidx = q->cidx;
800 while (q->credits--) {
801 struct rx_sw_desc *d = &q->sdesc[cidx];
803 if (d->flags & RX_SW_DESC_INUSE) {
804 bus_dmamap_unload(q->entry_tag, d->map);
805 bus_dmamap_destroy(q->entry_tag, d->map);
806 if (q->zone == zone_pack) {
807 m_init(d->m, M_NOWAIT, MT_DATA, M_EXT);
808 uma_zfree(zone_pack, d->m);
810 m_init(d->m, M_NOWAIT, MT_DATA, 0);
811 uma_zfree(zone_mbuf, d->m);
812 uma_zfree(q->zone, d->rxsd_cl);
818 if (++cidx == q->size)
824 __refill_fl(adapter_t *adap, struct sge_fl *fl)
826 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
830 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
832 uint32_t reclaimable = fl->size - fl->credits;
835 refill_fl(adap, fl, min(max, reclaimable));
839 * recycle_rx_buf - recycle a receive buffer
840 * @adapter: the adapter
841 * @q: the SGE free list
842 * @idx: index of buffer to recycle
844 * Recycles the specified buffer on the given free list by adding it at
845 * the next available slot on the list.
848 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
850 struct rx_desc *from = &q->desc[idx];
851 struct rx_desc *to = &q->desc[q->pidx];
853 q->sdesc[q->pidx] = q->sdesc[idx];
854 to->addr_lo = from->addr_lo; // already big endian
855 to->addr_hi = from->addr_hi; // likewise
856 wmb(); /* necessary ? */
857 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
858 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
861 if (++q->pidx == q->size) {
865 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
869 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
874 *addr = segs[0].ds_addr;
878 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
879 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
880 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
882 size_t len = nelem * elem_size;
887 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
888 BUS_SPACE_MAXADDR_32BIT,
889 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
890 len, 0, NULL, NULL, tag)) != 0) {
891 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
895 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
897 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
901 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
906 len = nelem * sw_size;
907 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
910 if (parent_entry_tag == NULL)
913 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
914 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
915 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
916 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
917 NULL, NULL, entry_tag)) != 0) {
918 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
925 sge_slow_intr_handler(void *arg, int ncount)
929 t3_slow_intr_handler(sc);
930 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
931 (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
935 * sge_timer_cb - perform periodic maintenance of an SGE qset
936 * @data: the SGE queue set to maintain
938 * Runs periodically from a timer to perform maintenance of an SGE queue
939 * set. It performs two tasks:
941 * a) Cleans up any completed Tx descriptors that may still be pending.
942 * Normal descriptor cleanup happens when new packets are added to a Tx
943 * queue so this timer is relatively infrequent and does any cleanup only
944 * if the Tx queue has not seen any new packets in a while. We make a
945 * best effort attempt to reclaim descriptors, in that we don't wait
946 * around if we cannot get a queue's lock (which most likely is because
947 * someone else is queueing new packets and so will also handle the clean
948 * up). Since control queues use immediate data exclusively we don't
949 * bother cleaning them up here.
951 * b) Replenishes Rx queues that have run out due to memory shortage.
952 * Normally new Rx buffers are added when existing ones are consumed but
953 * when out of memory a queue can become empty. We try to add only a few
954 * buffers here, the queue will be replenished fully as these new buffers
955 * are used up if memory shortage has subsided.
957 * c) Return coalesced response queue credits in case a response queue is
960 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
961 * fifo overflows and the FW doesn't implement any recovery scheme yet.
964 sge_timer_cb(void *arg)
967 if ((sc->flags & USING_MSIX) == 0) {
969 struct port_info *pi;
973 int reclaim_ofl, refill_rx;
975 if (sc->open_device_map == 0)
978 for (i = 0; i < sc->params.nports; i++) {
980 for (j = 0; j < pi->nqsets; j++) {
981 qs = &sc->sge.qs[pi->first_qset + j];
983 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
984 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
985 (qs->fl[1].credits < qs->fl[1].size));
986 if (reclaim_ofl || refill_rx) {
987 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
994 if (sc->params.nports > 2) {
997 for_each_port(sc, i) {
998 struct port_info *pi = &sc->port[i];
1000 t3_write_reg(sc, A_SG_KDOORBELL,
1002 (FW_TUNNEL_SGEEC_START + pi->first_qset));
1005 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
1006 sc->open_device_map != 0)
1007 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1011 * This is meant to be a catch-all function to keep sge state private
1016 t3_sge_init_adapter(adapter_t *sc)
1018 callout_init(&sc->sge_timer_ch, 1);
1019 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1020 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
1025 t3_sge_reset_adapter(adapter_t *sc)
1027 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1032 t3_sge_init_port(struct port_info *pi)
1034 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
1039 * refill_rspq - replenish an SGE response queue
1040 * @adapter: the adapter
1041 * @q: the response queue to replenish
1042 * @credits: how many new responses to make available
1044 * Replenishes a response queue by making the supplied number of responses
1047 static __inline void
1048 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1051 /* mbufs are allocated on demand when a rspq entry is processed. */
1052 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1053 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1057 sge_txq_reclaim_handler(void *arg, int ncount)
1059 struct sge_qset *qs = arg;
1062 for (i = 0; i < 3; i++)
1063 reclaim_completed_tx(qs, 16, i);
1067 sge_timer_reclaim(void *arg, int ncount)
1069 struct port_info *pi = arg;
1070 int i, nqsets = pi->nqsets;
1071 adapter_t *sc = pi->adapter;
1072 struct sge_qset *qs;
1075 KASSERT((sc->flags & USING_MSIX) == 0,
1076 ("can't call timer reclaim for msi-x"));
1078 for (i = 0; i < nqsets; i++) {
1079 qs = &sc->sge.qs[pi->first_qset + i];
1081 reclaim_completed_tx(qs, 16, TXQ_OFLD);
1082 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1083 &sc->sge.qs[0].rspq.lock;
1085 if (mtx_trylock(lock)) {
1086 /* XXX currently assume that we are *NOT* polling */
1087 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1089 if (qs->fl[0].credits < qs->fl[0].size - 16)
1090 __refill_fl(sc, &qs->fl[0]);
1091 if (qs->fl[1].credits < qs->fl[1].size - 16)
1092 __refill_fl(sc, &qs->fl[1]);
1094 if (status & (1 << qs->rspq.cntxt_id)) {
1095 if (qs->rspq.credits) {
1096 refill_rspq(sc, &qs->rspq, 1);
1098 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1099 1 << qs->rspq.cntxt_id);
1108 * init_qset_cntxt - initialize an SGE queue set context info
1109 * @qs: the queue set
1110 * @id: the queue set id
1112 * Initializes the TIDs and context ids for the queues of a queue set.
1115 init_qset_cntxt(struct sge_qset *qs, u_int id)
1118 qs->rspq.cntxt_id = id;
1119 qs->fl[0].cntxt_id = 2 * id;
1120 qs->fl[1].cntxt_id = 2 * id + 1;
1121 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1122 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1123 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1124 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1125 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1127 /* XXX: a sane limit is needed instead of INT_MAX */
1128 mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX);
1129 mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX);
1130 mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX);
1135 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1137 txq->in_use += ndesc;
1139 * XXX we don't handle stopping of queue
1140 * presumably start handles this when we bump against the end
1142 txqs->gen = txq->gen;
1143 txq->unacked += ndesc;
1144 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1146 txqs->pidx = txq->pidx;
1149 if (((txqs->pidx > txq->cidx) &&
1150 (txq->pidx < txqs->pidx) &&
1151 (txq->pidx >= txq->cidx)) ||
1152 ((txqs->pidx < txq->cidx) &&
1153 (txq->pidx >= txq-> cidx)) ||
1154 ((txqs->pidx < txq->cidx) &&
1155 (txq->cidx < txqs->pidx)))
1156 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1157 txqs->pidx, txq->pidx, txq->cidx);
1159 if (txq->pidx >= txq->size) {
1160 txq->pidx -= txq->size;
1167 * calc_tx_descs - calculate the number of Tx descriptors for a packet
1168 * @m: the packet mbufs
1169 * @nsegs: the number of segments
1171 * Returns the number of Tx descriptors needed for the given Ethernet
1172 * packet. Ethernet packets require addition of WR and CPL headers.
1174 static __inline unsigned int
1175 calc_tx_descs(const struct mbuf *m, int nsegs)
1179 if (m->m_pkthdr.len <= PIO_LEN)
1182 flits = sgl_len(nsegs) + 2;
1183 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1186 return flits_to_desc(flits);
1190 * make_sgl - populate a scatter/gather list for a packet
1191 * @sgp: the SGL to populate
1192 * @segs: the packet dma segments
1193 * @nsegs: the number of segments
1195 * Generates a scatter/gather list for the buffers that make up a packet
1196 * and returns the SGL size in 8-byte words. The caller must size the SGL
1199 static __inline void
1200 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1204 for (idx = 0, i = 0; i < nsegs; i++) {
1206 * firmware doesn't like empty segments
1208 if (segs[i].ds_len == 0)
1213 sgp->len[idx] = htobe32(segs[i].ds_len);
1214 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1225 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1226 * @adap: the adapter
1229 * Ring the doorbell if a Tx queue is asleep. There is a natural race,
1230 * where the HW is going to sleep just after we checked, however,
1231 * then the interrupt handler will detect the outstanding TX packet
1232 * and ring the doorbell for us.
1234 * When GTS is disabled we unconditionally ring the doorbell.
1236 static __inline void
1237 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
1240 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1241 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1242 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1244 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1247 t3_write_reg(adap, A_SG_KDOORBELL,
1248 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1251 if (mustring || ++q->db_pending >= 32) {
1252 wmb(); /* write descriptors before telling HW */
1253 t3_write_reg(adap, A_SG_KDOORBELL,
1254 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1260 static __inline void
1261 wr_gen2(struct tx_desc *d, unsigned int gen)
1263 #if SGE_NUM_GENBITS == 2
1264 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1269 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1270 * @ndesc: number of Tx descriptors spanned by the SGL
1271 * @txd: first Tx descriptor to be written
1272 * @txqs: txq state (generation and producer index)
1273 * @txq: the SGE Tx queue
1275 * @flits: number of flits to the start of the SGL in the first descriptor
1276 * @sgl_flits: the SGL size in flits
1277 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1278 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1280 * Write a work request header and an associated SGL. If the SGL is
1281 * small enough to fit into one Tx descriptor it has already been written
1282 * and we just need to write the WR header. Otherwise we distribute the
1283 * SGL across the number of descriptors it spans.
1286 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1287 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1288 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1291 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1292 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1294 if (__predict_true(ndesc == 1)) {
1295 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1296 V_WR_SGLSFLT(flits)) | wr_hi,
1297 htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
1300 wr_gen2(txd, txqs->gen);
1303 unsigned int ogen = txqs->gen;
1304 const uint64_t *fp = (const uint64_t *)sgl;
1305 struct work_request_hdr *wp = wrp;
1307 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1308 V_WR_SGLSFLT(flits)) | wr_hi;
1311 unsigned int avail = WR_FLITS - flits;
1313 if (avail > sgl_flits)
1315 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1324 if (++txqs->pidx == txq->size) {
1332 * when the head of the mbuf chain
1333 * is freed all clusters will be freed
1336 wrp = (struct work_request_hdr *)txd;
1337 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1338 V_WR_SGLSFLT(1)) | wr_hi;
1339 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1341 V_WR_GEN(txqs->gen)) | wr_lo;
1342 wr_gen2(txd, txqs->gen);
1345 wrp->wrh_hi |= htonl(F_WR_EOP);
1347 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1348 wr_gen2((struct tx_desc *)wp, ogen);
1352 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1353 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1355 #define GET_VTAG(cntrl, m) \
1357 if ((m)->m_flags & M_VLANTAG) \
1358 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1362 t3_encap(struct sge_qset *qs, struct mbuf **m)
1366 struct sge_txq *txq;
1367 struct txq_state txqs;
1368 struct port_info *pi;
1369 unsigned int ndesc, flits, cntrl, mlen;
1370 int err, nsegs, tso_info = 0;
1372 struct work_request_hdr *wrp;
1373 struct tx_sw_desc *txsd;
1374 struct sg_ent *sgp, *sgl;
1375 uint32_t wr_hi, wr_lo, sgl_flits;
1376 bus_dma_segment_t segs[TX_MAX_SEGS];
1378 struct tx_desc *txd;
1382 txq = &qs->txq[TXQ_ETH];
1383 txd = &txq->desc[txq->pidx];
1384 txsd = &txq->sdesc[txq->pidx];
1390 mtx_assert(&qs->lock, MA_OWNED);
1391 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1392 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1394 if (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1395 m0->m_pkthdr.csum_flags & (CSUM_TSO))
1396 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1398 if (m0->m_nextpkt != NULL) {
1399 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1403 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1404 &m0, segs, &nsegs))) {
1406 printf("failed ... err=%d\n", err);
1409 mlen = m0->m_pkthdr.len;
1410 ndesc = calc_tx_descs(m0, nsegs);
1412 txq_prod(txq, ndesc, &txqs);
1414 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1417 if (m0->m_nextpkt != NULL) {
1418 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1422 panic("trying to coalesce %d packets in to one WR", nsegs);
1423 txq->txq_coalesced += nsegs;
1424 wrp = (struct work_request_hdr *)txd;
1425 flits = nsegs*2 + 1;
1427 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1428 struct cpl_tx_pkt_batch_entry *cbe;
1430 uint32_t *hflit = (uint32_t *)&flit;
1431 int cflags = m0->m_pkthdr.csum_flags;
1433 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1434 GET_VTAG(cntrl, m0);
1435 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1436 if (__predict_false(!(cflags & CSUM_IP)))
1437 cntrl |= F_TXPKT_IPCSUM_DIS;
1438 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
1439 CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1440 cntrl |= F_TXPKT_L4CSUM_DIS;
1442 hflit[0] = htonl(cntrl);
1443 hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1444 flit |= htobe64(1 << 24);
1445 cbe = &cpl_batch->pkt_entry[i];
1446 cbe->cntrl = hflit[0];
1447 cbe->len = hflit[1];
1448 cbe->addr = htobe64(segs[i].ds_addr);
1451 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1452 V_WR_SGLSFLT(flits)) |
1453 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1454 wr_lo = htonl(V_WR_LEN(flits) |
1455 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1456 set_wr_hdr(wrp, wr_hi, wr_lo);
1458 ETHER_BPF_MTAP(pi->ifp, m0);
1459 wr_gen2(txd, txqs.gen);
1460 check_ring_tx_db(sc, txq, 0);
1462 } else if (tso_info) {
1464 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1465 struct ether_header *eh;
1470 GET_VTAG(cntrl, m0);
1471 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1472 hdr->cntrl = htonl(cntrl);
1473 hdr->len = htonl(mlen | 0x80000000);
1475 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1476 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
1477 m0, mlen, m0->m_pkthdr.tso_segsz,
1478 (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
1479 panic("tx tso packet too small");
1482 /* Make sure that ether, ip, tcp headers are all in m0 */
1483 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1484 m0 = m_pullup(m0, TCPPKTHDRSIZE);
1485 if (__predict_false(m0 == NULL)) {
1486 /* XXX panic probably an overreaction */
1487 panic("couldn't fit header into mbuf");
1491 eh = mtod(m0, struct ether_header *);
1492 eth_type = eh->ether_type;
1493 if (eth_type == htons(ETHERTYPE_VLAN)) {
1494 struct ether_vlan_header *evh = (void *)eh;
1496 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
1498 eth_type = evh->evl_proto;
1500 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
1504 if (eth_type == htons(ETHERTYPE_IP)) {
1505 struct ip *ip = l3hdr;
1507 tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
1508 tcp = (struct tcphdr *)(ip + 1);
1509 } else if (eth_type == htons(ETHERTYPE_IPV6)) {
1510 struct ip6_hdr *ip6 = l3hdr;
1512 KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
1513 ("%s: CSUM_TSO with ip6_nxt %d",
1514 __func__, ip6->ip6_nxt));
1516 tso_info |= F_LSO_IPV6;
1517 tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
1518 tcp = (struct tcphdr *)(ip6 + 1);
1520 panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
1522 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
1523 hdr->lso_info = htonl(tso_info);
1525 if (__predict_false(mlen <= PIO_LEN)) {
1527 * pkt not undersized but fits in PIO_LEN
1528 * Indicates a TSO bug at the higher levels.
1531 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1532 flits = (mlen + 7) / 8 + 3;
1533 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1534 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1535 F_WR_SOP | F_WR_EOP | txqs.compl);
1536 wr_lo = htonl(V_WR_LEN(flits) |
1537 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1538 set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1540 ETHER_BPF_MTAP(pi->ifp, m0);
1541 wr_gen2(txd, txqs.gen);
1542 check_ring_tx_db(sc, txq, 0);
1548 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1550 GET_VTAG(cntrl, m0);
1551 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1552 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1553 cntrl |= F_TXPKT_IPCSUM_DIS;
1554 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
1555 CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1556 cntrl |= F_TXPKT_L4CSUM_DIS;
1557 cpl->cntrl = htonl(cntrl);
1558 cpl->len = htonl(mlen | 0x80000000);
1560 if (mlen <= PIO_LEN) {
1562 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1563 flits = (mlen + 7) / 8 + 2;
1565 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1566 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1567 F_WR_SOP | F_WR_EOP | txqs.compl);
1568 wr_lo = htonl(V_WR_LEN(flits) |
1569 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1570 set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1572 ETHER_BPF_MTAP(pi->ifp, m0);
1573 wr_gen2(txd, txqs.gen);
1574 check_ring_tx_db(sc, txq, 0);
1580 wrp = (struct work_request_hdr *)txd;
1581 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1582 make_sgl(sgp, segs, nsegs);
1584 sgl_flits = sgl_len(nsegs);
1586 ETHER_BPF_MTAP(pi->ifp, m0);
1588 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1589 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1590 wr_lo = htonl(V_WR_TID(txq->token));
1591 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1592 sgl_flits, wr_hi, wr_lo);
1593 check_ring_tx_db(sc, txq, 0);
1600 cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m)
1604 error = t3_encap(qs, m);
1606 check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1);
1607 else if (*m != NULL) {
1616 cxgb_tx_watchdog(void *arg)
1618 struct sge_qset *qs = arg;
1619 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1621 if (qs->coalescing != 0 &&
1622 (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1625 else if (qs->coalescing == 0 &&
1626 (txq->in_use >= cxgb_tx_coalesce_enable_start))
1628 if (TXQ_TRYLOCK(qs)) {
1629 qs->qs_flags |= QS_FLUSHING;
1630 cxgb_start_locked(qs);
1631 qs->qs_flags &= ~QS_FLUSHING;
1634 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
1635 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1636 qs, txq->txq_watchdog.c_cpu);
1640 cxgb_tx_timeout(void *arg)
1642 struct sge_qset *qs = arg;
1643 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1645 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1647 if (TXQ_TRYLOCK(qs)) {
1648 qs->qs_flags |= QS_TIMEOUT;
1649 cxgb_start_locked(qs);
1650 qs->qs_flags &= ~QS_TIMEOUT;
1656 cxgb_start_locked(struct sge_qset *qs)
1658 struct mbuf *m_head = NULL;
1659 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1660 struct port_info *pi = qs->port;
1661 struct ifnet *ifp = pi->ifp;
1663 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1664 reclaim_completed_tx(qs, 0, TXQ_ETH);
1666 if (!pi->link_config.link_ok) {
1670 TXQ_LOCK_ASSERT(qs);
1671 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
1672 pi->link_config.link_ok) {
1673 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1675 if (txq->size - txq->in_use <= TX_MAX_DESC)
1678 if ((m_head = cxgb_dequeue(qs)) == NULL)
1681 * Encapsulation can modify our pointer, and or make it
1682 * NULL on failure. In that event, we can't requeue.
1684 if (t3_encap(qs, &m_head) || m_head == NULL)
1690 if (txq->db_pending)
1691 check_ring_tx_db(pi->adapter, txq, 1);
1693 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1694 pi->link_config.link_ok)
1695 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1696 qs, txq->txq_timer.c_cpu);
1702 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
1704 struct port_info *pi = qs->port;
1705 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1706 struct buf_ring *br = txq->txq_mr;
1709 avail = txq->size - txq->in_use;
1710 TXQ_LOCK_ASSERT(qs);
1713 * We can only do a direct transmit if the following are true:
1714 * - we aren't coalescing (ring < 3/4 full)
1715 * - the link is up -- checked in caller
1716 * - there are no packets enqueued already
1717 * - there is space in hardware transmit queue
1719 if (check_pkt_coalesce(qs) == 0 &&
1720 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
1721 if (t3_encap(qs, &m)) {
1723 (error = drbr_enqueue(ifp, br, m)) != 0)
1726 if (txq->db_pending)
1727 check_ring_tx_db(pi->adapter, txq, 1);
1730 * We've bypassed the buf ring so we need to update
1731 * the stats directly
1733 txq->txq_direct_packets++;
1734 txq->txq_direct_bytes += m->m_pkthdr.len;
1736 } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1739 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1740 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1741 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1742 cxgb_start_locked(qs);
1743 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1744 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1745 qs, txq->txq_timer.c_cpu);
1750 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
1752 struct sge_qset *qs;
1753 struct port_info *pi = ifp->if_softc;
1754 int error, qidx = pi->first_qset;
1756 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
1757 ||(!pi->link_config.link_ok)) {
1762 /* check if flowid is set */
1763 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1764 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1766 qs = &pi->adapter->sge.qs[qidx];
1768 if (TXQ_TRYLOCK(qs)) {
1770 error = cxgb_transmit_locked(ifp, qs, m);
1773 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1778 cxgb_qflush(struct ifnet *ifp)
1781 * flush any enqueued mbufs in the buf_rings
1782 * and in the transmit queues
1789 * write_imm - write a packet into a Tx descriptor as immediate data
1790 * @d: the Tx descriptor to write
1792 * @len: the length of packet data to write as immediate data
1793 * @gen: the generation bit value to write
1795 * Writes a packet as immediate data into a Tx descriptor. The packet
1796 * contains a work request at its beginning. We must write the packet
1797 * carefully so the SGE doesn't read accidentally before it's written in
1800 static __inline void
1801 write_imm(struct tx_desc *d, caddr_t src,
1802 unsigned int len, unsigned int gen)
1804 struct work_request_hdr *from = (struct work_request_hdr *)src;
1805 struct work_request_hdr *to = (struct work_request_hdr *)d;
1806 uint32_t wr_hi, wr_lo;
1808 KASSERT(len <= WR_LEN && len >= sizeof(*from),
1809 ("%s: invalid len %d", __func__, len));
1811 memcpy(&to[1], &from[1], len - sizeof(*from));
1812 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1813 V_WR_BCNTLFLT(len & 7));
1814 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
1815 set_wr_hdr(to, wr_hi, wr_lo);
1821 * check_desc_avail - check descriptor availability on a send queue
1822 * @adap: the adapter
1824 * @m: the packet needing the descriptors
1825 * @ndesc: the number of Tx descriptors needed
1826 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1828 * Checks if the requested number of Tx descriptors is available on an
1829 * SGE send queue. If the queue is already suspended or not enough
1830 * descriptors are available the packet is queued for later transmission.
1831 * Must be called with the Tx queue locked.
1833 * Returns 0 if enough descriptors are available, 1 if there aren't
1834 * enough descriptors and the packet has been queued, and 2 if the caller
1835 * needs to retry because there weren't enough descriptors at the
1836 * beginning of the call but some freed up in the mean time.
1839 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1840 struct mbuf *m, unsigned int ndesc,
1844 * XXX We currently only use this for checking the control queue
1845 * the control queue is only used for binding qsets which happens
1846 * at init time so we are guaranteed enough descriptors
1848 if (__predict_false(mbufq_len(&q->sendq))) {
1849 addq_exit: (void )mbufq_enqueue(&q->sendq, m);
1852 if (__predict_false(q->size - q->in_use < ndesc)) {
1854 struct sge_qset *qs = txq_to_qset(q, qid);
1856 setbit(&qs->txq_stopped, qid);
1857 if (should_restart_tx(q) &&
1858 test_and_clear_bit(qid, &qs->txq_stopped))
1869 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1870 * @q: the SGE control Tx queue
1872 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1873 * that send only immediate data (presently just the control queues) and
1874 * thus do not have any mbufs
1876 static __inline void
1877 reclaim_completed_tx_imm(struct sge_txq *q)
1879 unsigned int reclaim = q->processed - q->cleaned;
1881 q->in_use -= reclaim;
1882 q->cleaned += reclaim;
1886 * ctrl_xmit - send a packet through an SGE control Tx queue
1887 * @adap: the adapter
1888 * @q: the control queue
1891 * Send a packet through an SGE control Tx queue. Packets sent through
1892 * a control queue must fit entirely as immediate data in a single Tx
1893 * descriptor and have no page fragments.
1896 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1899 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1900 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1902 KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
1904 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1905 wrp->wrh_lo = htonl(V_WR_TID(q->token));
1908 again: reclaim_completed_tx_imm(q);
1910 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1911 if (__predict_false(ret)) {
1918 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1921 if (++q->pidx >= q->size) {
1927 t3_write_reg(adap, A_SG_KDOORBELL,
1928 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1936 * restart_ctrlq - restart a suspended control queue
1937 * @qs: the queue set cotaining the control queue
1939 * Resumes transmission on a suspended Tx control queue.
1942 restart_ctrlq(void *data, int npending)
1945 struct sge_qset *qs = (struct sge_qset *)data;
1946 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1947 adapter_t *adap = qs->port->adapter;
1950 again: reclaim_completed_tx_imm(q);
1952 while (q->in_use < q->size &&
1953 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1955 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1958 if (++q->pidx >= q->size) {
1964 if (mbufq_len(&q->sendq)) {
1965 setbit(&qs->txq_stopped, TXQ_CTRL);
1967 if (should_restart_tx(q) &&
1968 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1973 t3_write_reg(adap, A_SG_KDOORBELL,
1974 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1979 * Send a management message through control queue 0
1982 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1984 return ctrl_xmit(adap, &adap->sge.qs[0], m);
1988 * free_qset - free the resources of an SGE queue set
1989 * @sc: the controller owning the queue set
1992 * Release the HW and SW resources associated with an SGE queue set, such
1993 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1994 * queue set must be quiesced prior to calling this.
1997 t3_free_qset(adapter_t *sc, struct sge_qset *q)
2001 reclaim_completed_tx(q, 0, TXQ_ETH);
2002 if (q->txq[TXQ_ETH].txq_mr != NULL)
2003 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
2004 if (q->txq[TXQ_ETH].txq_ifq != NULL) {
2005 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
2006 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
2009 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2010 if (q->fl[i].desc) {
2011 mtx_lock_spin(&sc->sge.reg_lock);
2012 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2013 mtx_unlock_spin(&sc->sge.reg_lock);
2014 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2015 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2017 bus_dma_tag_destroy(q->fl[i].desc_tag);
2018 bus_dma_tag_destroy(q->fl[i].entry_tag);
2020 if (q->fl[i].sdesc) {
2021 free_rx_bufs(sc, &q->fl[i]);
2022 free(q->fl[i].sdesc, M_DEVBUF);
2026 mtx_unlock(&q->lock);
2027 MTX_DESTROY(&q->lock);
2028 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2029 if (q->txq[i].desc) {
2030 mtx_lock_spin(&sc->sge.reg_lock);
2031 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2032 mtx_unlock_spin(&sc->sge.reg_lock);
2033 bus_dmamap_unload(q->txq[i].desc_tag,
2034 q->txq[i].desc_map);
2035 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2036 q->txq[i].desc_map);
2037 bus_dma_tag_destroy(q->txq[i].desc_tag);
2038 bus_dma_tag_destroy(q->txq[i].entry_tag);
2040 if (q->txq[i].sdesc) {
2041 free(q->txq[i].sdesc, M_DEVBUF);
2046 mtx_lock_spin(&sc->sge.reg_lock);
2047 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2048 mtx_unlock_spin(&sc->sge.reg_lock);
2050 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2051 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2053 bus_dma_tag_destroy(q->rspq.desc_tag);
2054 MTX_DESTROY(&q->rspq.lock);
2057 #if defined(INET6) || defined(INET)
2058 tcp_lro_free(&q->lro.ctrl);
2061 bzero(q, sizeof(*q));
2065 * t3_free_sge_resources - free SGE resources
2066 * @sc: the adapter softc
2068 * Frees resources used by the SGE queue sets.
2071 t3_free_sge_resources(adapter_t *sc, int nqsets)
2075 for (i = 0; i < nqsets; ++i) {
2076 TXQ_LOCK(&sc->sge.qs[i]);
2077 t3_free_qset(sc, &sc->sge.qs[i]);
2082 * t3_sge_start - enable SGE
2083 * @sc: the controller softc
2085 * Enables the SGE for DMAs. This is the last step in starting packet
2089 t3_sge_start(adapter_t *sc)
2091 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2095 * t3_sge_stop - disable SGE operation
2098 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2099 * from error interrupts) or from normal process context. In the latter
2100 * case it also disables any pending queue restart tasklets. Note that
2101 * if it is called in interrupt context it cannot disable the restart
2102 * tasklets as it cannot wait, however the tasklets will have no effect
2103 * since the doorbells are disabled and the driver will call this again
2104 * later from process context, at which time the tasklets will be stopped
2105 * if they are still running.
2108 t3_sge_stop(adapter_t *sc)
2112 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2117 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2118 nqsets += sc->port[i].nqsets;
2124 for (i = 0; i < nqsets; ++i) {
2125 struct sge_qset *qs = &sc->sge.qs[i];
2127 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2128 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2134 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
2135 * @adapter: the adapter
2136 * @q: the Tx queue to reclaim descriptors from
2137 * @reclaimable: the number of descriptors to reclaim
2138 * @m_vec_size: maximum number of buffers to reclaim
2139 * @desc_reclaimed: returns the number of descriptors reclaimed
2141 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2142 * Tx buffers. Called with the Tx queue lock held.
2144 * Returns number of buffers of reclaimed
2147 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2149 struct tx_sw_desc *txsd;
2150 unsigned int cidx, mask;
2151 struct sge_txq *q = &qs->txq[queue];
2154 T3_TRACE2(sc->tb[q->cntxt_id & 7],
2155 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2159 txsd = &q->sdesc[cidx];
2161 mtx_assert(&qs->lock, MA_OWNED);
2162 while (reclaimable--) {
2163 prefetch(q->sdesc[(cidx + 1) & mask].m);
2164 prefetch(q->sdesc[(cidx + 2) & mask].m);
2166 if (txsd->m != NULL) {
2167 if (txsd->flags & TX_SW_DESC_MAPPED) {
2168 bus_dmamap_unload(q->entry_tag, txsd->map);
2169 txsd->flags &= ~TX_SW_DESC_MAPPED;
2171 m_freem_list(txsd->m);
2177 if (++cidx == q->size) {
2187 * is_new_response - check if a response is newly written
2188 * @r: the response descriptor
2189 * @q: the response queue
2191 * Returns true if a response descriptor contains a yet unprocessed
2195 is_new_response(const struct rsp_desc *r,
2196 const struct sge_rspq *q)
2198 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2201 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2202 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2203 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2204 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2205 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2207 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2208 #define NOMEM_INTR_DELAY 2500
2212 * write_ofld_wr - write an offload work request
2213 * @adap: the adapter
2214 * @m: the packet to send
2216 * @pidx: index of the first Tx descriptor to write
2217 * @gen: the generation value to use
2218 * @ndesc: number of descriptors the packet will occupy
2220 * Write an offload work request to send the supplied packet. The packet
2221 * data already carry the work request with most fields populated.
2224 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
2225 unsigned int pidx, unsigned int gen, unsigned int ndesc)
2227 unsigned int sgl_flits, flits;
2228 int i, idx, nsegs, wrlen;
2229 struct work_request_hdr *from;
2230 struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
2231 struct tx_desc *d = &q->desc[pidx];
2232 struct txq_state txqs;
2233 struct sglist_seg *segs;
2234 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2237 from = (void *)(oh + 1); /* Start of WR within mbuf */
2238 wrlen = m->m_len - sizeof(*oh);
2240 if (!(oh->flags & F_HDR_SGL)) {
2241 write_imm(d, (caddr_t)from, wrlen, gen);
2244 * mbuf with "real" immediate tx data will be enqueue_wr'd by
2245 * t3_push_frames and freed in wr_ack. Others, like those sent
2246 * down by close_conn, t3_send_reset, etc. should be freed here.
2248 if (!(oh->flags & F_HDR_DF))
2253 memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
2257 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
2259 nsegs = sgl->sg_nseg;
2260 segs = sgl->sg_segs;
2261 for (idx = 0, i = 0; i < nsegs; i++) {
2262 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
2265 sgp->len[idx] = htobe32(segs[i].ss_len);
2266 sgp->addr[idx] = htobe64(segs[i].ss_paddr);
2274 sgl_flits = sgl_len(nsegs);
2279 write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
2280 from->wrh_hi, from->wrh_lo);
2284 * ofld_xmit - send a packet through an offload queue
2285 * @adap: the adapter
2286 * @q: the Tx offload queue
2289 * Send an offload packet through an SGE offload queue.
2292 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2296 unsigned int pidx, gen;
2297 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2298 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2300 ndesc = G_HDR_NDESC(oh->flags);
2303 again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
2304 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2305 if (__predict_false(ret)) {
2317 if (q->pidx >= q->size) {
2322 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2323 check_ring_tx_db(adap, q, 1);
2330 * restart_offloadq - restart a suspended offload queue
2331 * @qs: the queue set cotaining the offload queue
2333 * Resumes transmission on a suspended Tx offload queue.
2336 restart_offloadq(void *data, int npending)
2339 struct sge_qset *qs = data;
2340 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2341 adapter_t *adap = qs->port->adapter;
2345 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
2347 while ((m = mbufq_first(&q->sendq)) != NULL) {
2348 unsigned int gen, pidx;
2349 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2350 unsigned int ndesc = G_HDR_NDESC(oh->flags);
2352 if (__predict_false(q->size - q->in_use < ndesc)) {
2353 setbit(&qs->txq_stopped, TXQ_OFLD);
2354 if (should_restart_tx(q) &&
2355 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2365 if (q->pidx >= q->size) {
2370 (void)mbufq_dequeue(&q->sendq);
2372 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2376 set_bit(TXQ_RUNNING, &q->flags);
2377 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2381 t3_write_reg(adap, A_SG_KDOORBELL,
2382 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2386 * t3_offload_tx - send an offload packet
2389 * Sends an offload packet. We use the packet priority to select the
2390 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2391 * should be sent as regular or control, bits 1-3 select the queue set.
2394 t3_offload_tx(struct adapter *sc, struct mbuf *m)
2396 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2397 struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
2399 if (oh->flags & F_HDR_CTRL) {
2400 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
2401 return (ctrl_xmit(sc, qs, m));
2403 return (ofld_xmit(sc, qs, m));
2408 restart_tx(struct sge_qset *qs)
2410 struct adapter *sc = qs->port->adapter;
2412 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2413 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2414 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2415 qs->txq[TXQ_OFLD].restarts++;
2416 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2419 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2420 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2421 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2422 qs->txq[TXQ_CTRL].restarts++;
2423 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2428 * t3_sge_alloc_qset - initialize an SGE queue set
2429 * @sc: the controller softc
2430 * @id: the queue set id
2431 * @nports: how many Ethernet ports will be using this queue set
2432 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2433 * @p: configuration parameters for this queue set
2434 * @ntxq: number of Tx queues for the queue set
2435 * @pi: port info for queue set
2437 * Allocate resources and initialize an SGE queue set. A queue set
2438 * comprises a response queue, two Rx free-buffer queues, and up to 3
2439 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2440 * queue, offload queue, and control queue.
2443 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2444 const struct qset_params *p, int ntxq, struct port_info *pi)
2446 struct sge_qset *q = &sc->sge.qs[id];
2449 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2453 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2454 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2455 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2458 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
2459 M_NOWAIT | M_ZERO)) == NULL) {
2460 device_printf(sc->dev, "failed to allocate ifq\n");
2463 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);
2464 callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
2465 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
2466 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
2467 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
2469 init_qset_cntxt(q, id);
2471 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2472 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2473 &q->fl[0].desc, &q->fl[0].sdesc,
2474 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2475 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2476 printf("error %d from alloc ring fl0\n", ret);
2480 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2481 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2482 &q->fl[1].desc, &q->fl[1].sdesc,
2483 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2484 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2485 printf("error %d from alloc ring fl1\n", ret);
2489 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2490 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2491 &q->rspq.desc_tag, &q->rspq.desc_map,
2492 NULL, NULL)) != 0) {
2493 printf("error %d from alloc ring rspq\n", ret);
2497 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2498 device_get_unit(sc->dev), irq_vec_idx);
2499 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2501 for (i = 0; i < ntxq; ++i) {
2502 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2504 if ((ret = alloc_ring(sc, p->txq_size[i],
2505 sizeof(struct tx_desc), sz,
2506 &q->txq[i].phys_addr, &q->txq[i].desc,
2507 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2508 &q->txq[i].desc_map,
2509 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2510 printf("error %d from alloc ring tx %i\n", ret, i);
2513 mbufq_init(&q->txq[i].sendq, INT_MAX);
2515 q->txq[i].size = p->txq_size[i];
2519 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2521 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2522 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2523 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2525 q->fl[0].gen = q->fl[1].gen = 1;
2526 q->fl[0].size = p->fl_size;
2527 q->fl[1].size = p->jumbo_size;
2531 q->rspq.size = p->rspq_size;
2533 q->txq[TXQ_ETH].stop_thres = nports *
2534 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2536 q->fl[0].buf_size = MCLBYTES;
2537 q->fl[0].zone = zone_pack;
2538 q->fl[0].type = EXT_PACKET;
2540 if (p->jumbo_buf_size == MJUM16BYTES) {
2541 q->fl[1].zone = zone_jumbo16;
2542 q->fl[1].type = EXT_JUMBO16;
2543 } else if (p->jumbo_buf_size == MJUM9BYTES) {
2544 q->fl[1].zone = zone_jumbo9;
2545 q->fl[1].type = EXT_JUMBO9;
2546 } else if (p->jumbo_buf_size == MJUMPAGESIZE) {
2547 q->fl[1].zone = zone_jumbop;
2548 q->fl[1].type = EXT_JUMBOP;
2550 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
2554 q->fl[1].buf_size = p->jumbo_buf_size;
2556 /* Allocate and setup the lro_ctrl structure */
2557 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2558 #if defined(INET6) || defined(INET)
2559 ret = tcp_lro_init(&q->lro.ctrl);
2561 printf("error %d from tcp_lro_init\n", ret);
2565 q->lro.ctrl.ifp = pi->ifp;
2567 mtx_lock_spin(&sc->sge.reg_lock);
2568 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2569 q->rspq.phys_addr, q->rspq.size,
2570 q->fl[0].buf_size, 1, 0);
2572 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2576 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2577 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2578 q->fl[i].phys_addr, q->fl[i].size,
2579 q->fl[i].buf_size, p->cong_thres, 1,
2582 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2587 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2588 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2589 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2592 printf("error %d from t3_sge_init_ecntxt\n", ret);
2597 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2598 USE_GTS, SGE_CNTXT_OFLD, id,
2599 q->txq[TXQ_OFLD].phys_addr,
2600 q->txq[TXQ_OFLD].size, 0, 1, 0);
2602 printf("error %d from t3_sge_init_ecntxt\n", ret);
2608 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2610 q->txq[TXQ_CTRL].phys_addr,
2611 q->txq[TXQ_CTRL].size,
2612 q->txq[TXQ_CTRL].token, 1, 0);
2614 printf("error %d from t3_sge_init_ecntxt\n", ret);
2619 mtx_unlock_spin(&sc->sge.reg_lock);
2620 t3_update_qset_coalesce(q, p);
2622 refill_fl(sc, &q->fl[0], q->fl[0].size);
2623 refill_fl(sc, &q->fl[1], q->fl[1].size);
2624 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2626 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2627 V_NEWTIMER(q->rspq.holdoff_tmr));
2632 mtx_unlock_spin(&sc->sge.reg_lock);
2635 t3_free_qset(sc, q);
2641 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2642 * ethernet data. Hardware assistance with various checksums and any vlan tag
2643 * will also be taken into account here.
2646 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
2648 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2649 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2650 struct ifnet *ifp = pi->ifp;
2652 if (cpl->vlan_valid) {
2653 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2654 m->m_flags |= M_VLANTAG;
2657 m->m_pkthdr.rcvif = ifp;
2659 * adjust after conversion to mbuf chain
2661 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2662 m->m_len -= (sizeof(*cpl) + ethpad);
2663 m->m_data += (sizeof(*cpl) + ethpad);
2665 if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
2666 struct ether_header *eh = mtod(m, void *);
2669 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2670 struct ether_vlan_header *evh = mtod(m, void *);
2672 eh_type = evh->evl_proto;
2674 eh_type = eh->ether_type;
2676 if (ifp->if_capenable & IFCAP_RXCSUM &&
2677 eh_type == htons(ETHERTYPE_IP)) {
2678 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
2679 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2680 m->m_pkthdr.csum_data = 0xffff;
2681 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
2682 eh_type == htons(ETHERTYPE_IPV6)) {
2683 m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
2685 m->m_pkthdr.csum_data = 0xffff;
2691 * get_packet - return the next ingress packet buffer from a free list
2692 * @adap: the adapter that received the packet
2693 * @drop_thres: # of remaining buffers before we start dropping packets
2694 * @qs: the qset that the SGE free list holding the packet belongs to
2695 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2696 * @r: response descriptor
2698 * Get the next packet from a free list and complete setup of the
2699 * sk_buff. If the packet is small we make a copy and recycle the
2700 * original buffer, otherwise we use the original buffer itself. If a
2701 * positive drop threshold is supplied packets are dropped and their
2702 * buffers recycled if (a) the number of remaining buffers is under the
2703 * threshold and the packet is too big to copy, or (b) the packet should
2704 * be copied but there is no memory for the copy.
2707 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2708 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2711 unsigned int len_cq = ntohl(r->len_cq);
2712 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2713 int mask, cidx = fl->cidx;
2714 struct rx_sw_desc *sd = &fl->sdesc[cidx];
2715 uint32_t len = G_RSPD_LEN(len_cq);
2716 uint32_t flags = M_EXT;
2717 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2722 mask = fl->size - 1;
2723 prefetch(fl->sdesc[(cidx + 1) & mask].m);
2724 prefetch(fl->sdesc[(cidx + 2) & mask].m);
2725 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2726 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2729 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2731 if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2732 sopeop == RSPQ_SOP_EOP) {
2733 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
2735 cl = mtod(m, void *);
2736 memcpy(cl, sd->rxsd_cl, len);
2737 recycle_rx_buf(adap, fl, fl->cidx);
2738 m->m_pkthdr.len = m->m_len = len;
2740 mh->mh_head = mh->mh_tail = m;
2745 bus_dmamap_unload(fl->entry_tag, sd->map);
2749 if ((sopeop == RSPQ_SOP_EOP) ||
2750 (sopeop == RSPQ_SOP))
2752 m_init(m, M_NOWAIT, MT_DATA, flags);
2753 if (fl->zone == zone_pack) {
2755 * restore clobbered data pointer
2757 m->m_data = m->m_ext.ext_buf;
2759 m_cljset(m, cl, fl->type);
2768 mh->mh_head = mh->mh_tail = m;
2769 m->m_pkthdr.len = len;
2774 case RSPQ_NSOP_NEOP:
2775 if (mh->mh_tail == NULL) {
2776 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2780 mh->mh_tail->m_next = m;
2782 mh->mh_head->m_pkthdr.len += len;
2786 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2788 if (++fl->cidx == fl->size)
2795 * handle_rsp_cntrl_info - handles control information in a response
2796 * @qs: the queue set corresponding to the response
2797 * @flags: the response control flags
2799 * Handles the control information of an SGE response, such as GTS
2800 * indications and completion credits for the queue set's Tx queues.
2801 * HW coalesces credits, we don't do any extra SW coalescing.
2803 static __inline void
2804 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2806 unsigned int credits;
2809 if (flags & F_RSPD_TXQ0_GTS)
2810 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2812 credits = G_RSPD_TXQ0_CR(flags);
2814 qs->txq[TXQ_ETH].processed += credits;
2816 credits = G_RSPD_TXQ2_CR(flags);
2818 qs->txq[TXQ_CTRL].processed += credits;
2821 if (flags & F_RSPD_TXQ1_GTS)
2822 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2824 credits = G_RSPD_TXQ1_CR(flags);
2826 qs->txq[TXQ_OFLD].processed += credits;
2831 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2832 unsigned int sleeping)
2838 * process_responses - process responses from an SGE response queue
2839 * @adap: the adapter
2840 * @qs: the queue set to which the response queue belongs
2841 * @budget: how many responses can be processed in this round
2843 * Process responses from an SGE response queue up to the supplied budget.
2844 * Responses include received packets as well as credits and other events
2845 * for the queues that belong to the response queue's queue set.
2846 * A negative budget is effectively unlimited.
2848 * Additionally choose the interrupt holdoff time for the next interrupt
2849 * on this queue. If the system is under memory shortage use a fairly
2850 * long delay to help recovery.
2853 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2855 struct sge_rspq *rspq = &qs->rspq;
2856 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2857 int budget_left = budget;
2858 unsigned int sleeping = 0;
2859 #if defined(INET6) || defined(INET)
2860 int lro_enabled = qs->lro.enabled;
2862 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2864 struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
2866 static int last_holdoff = 0;
2867 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2868 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2869 last_holdoff = rspq->holdoff_tmr;
2872 rspq->next_holdoff = rspq->holdoff_tmr;
2874 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2875 int eth, eop = 0, ethpad = 0;
2876 uint32_t flags = ntohl(r->flags);
2877 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2878 uint8_t opcode = r->rss_hdr.opcode;
2880 eth = (opcode == CPL_RX_PKT);
2882 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2886 printf("async notification\n");
2888 if (mh->mh_head == NULL) {
2889 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
2892 m = m_gethdr(M_NOWAIT, MT_DATA);
2897 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2898 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2899 *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF;
2900 opcode = CPL_ASYNC_NOTIF;
2902 rspq->async_notif++;
2904 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2905 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
2909 rspq->next_holdoff = NOMEM_INTR_DELAY;
2913 if (mh->mh_head == NULL)
2916 mh->mh_tail->m_next = m;
2919 get_imm_packet(adap, r, m);
2920 mh->mh_head->m_pkthdr.len += m->m_len;
2923 } else if (r->len_cq) {
2924 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2926 eop = get_packet(adap, drop_thresh, qs, mh, r);
2928 if (r->rss_hdr.hash_type && !adap->timestamp) {
2929 M_HASHTYPE_SET(mh->mh_head,
2930 M_HASHTYPE_OPAQUE_HASH);
2931 mh->mh_head->m_pkthdr.flowid = rss_hash;
2940 if (flags & RSPD_CTRL_MASK) {
2941 sleeping |= flags & RSPD_GTS_MASK;
2942 handle_rsp_cntrl_info(qs, flags);
2946 rspq->offload_pkts++;
2948 adap->cpl_handler[opcode](qs, r, mh->mh_head);
2950 m_freem(mh->mh_head);
2953 } else if (eth && eop) {
2954 struct mbuf *m = mh->mh_head;
2956 t3_rx_eth(adap, m, ethpad);
2959 * The T304 sends incoming packets on any qset. If LRO
2960 * is also enabled, we could end up sending packet up
2961 * lro_ctrl->ifp's input. That is incorrect.
2963 * The mbuf's rcvif was derived from the cpl header and
2964 * is accurate. Skip LRO and just use that.
2966 #if defined(INET6) || defined(INET)
2967 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
2969 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
2970 && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
2972 /* successfully queue'd for LRO */
2977 * LRO not enabled, packet unsuitable for LRO,
2978 * or unable to queue. Pass it up right now in
2981 struct ifnet *ifp = m->m_pkthdr.rcvif;
2982 (*ifp->if_input)(ifp, m);
2989 if (__predict_false(++rspq->cidx == rspq->size)) {
2995 if (++rspq->credits >= 64) {
2996 refill_rspq(adap, rspq, rspq->credits);
2999 __refill_fl_lt(adap, &qs->fl[0], 32);
3000 __refill_fl_lt(adap, &qs->fl[1], 32);
3004 #if defined(INET6) || defined(INET)
3006 tcp_lro_flush_all(lro_ctrl);
3010 check_ring_db(adap, qs, sleeping);
3012 mb(); /* commit Tx queue processed updates */
3013 if (__predict_false(qs->txq_stopped > 1))
3016 __refill_fl_lt(adap, &qs->fl[0], 512);
3017 __refill_fl_lt(adap, &qs->fl[1], 512);
3018 budget -= budget_left;
3023 * A helper function that processes responses and issues GTS.
3026 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3029 static int last_holdoff = 0;
3031 work = process_responses(adap, rspq_to_qset(rq), -1);
3033 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3034 printf("next_holdoff=%d\n", rq->next_holdoff);
3035 last_holdoff = rq->next_holdoff;
3037 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3038 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3045 cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs)
3048 return (process_responses_gts(adap, &qs->rspq));
3053 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3054 * Handles data events from SGE response queues as well as error and other
3055 * async events as they all use the same interrupt pin. We use one SGE
3056 * response queue per port in this mode and protect all response queues with
3060 t3b_intr(void *data)
3063 adapter_t *adap = data;
3064 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3066 t3_write_reg(adap, A_PL_CLI, 0);
3067 map = t3_read_reg(adap, A_SG_DATA_INTR);
3072 if (__predict_false(map & F_ERRINTR)) {
3073 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3074 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3075 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3078 mtx_lock(&q0->lock);
3079 for_each_port(adap, i)
3081 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3082 mtx_unlock(&q0->lock);
3086 * The MSI interrupt handler. This needs to handle data events from SGE
3087 * response queues as well as error and other async events as they all use
3088 * the same MSI vector. We use one SGE response queue per port in this mode
3089 * and protect all response queues with queue 0's lock.
3092 t3_intr_msi(void *data)
3094 adapter_t *adap = data;
3095 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3096 int i, new_packets = 0;
3098 mtx_lock(&q0->lock);
3100 for_each_port(adap, i)
3101 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3103 mtx_unlock(&q0->lock);
3104 if (new_packets == 0) {
3105 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3106 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3107 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3112 t3_intr_msix(void *data)
3114 struct sge_qset *qs = data;
3115 adapter_t *adap = qs->port->adapter;
3116 struct sge_rspq *rspq = &qs->rspq;
3118 if (process_responses_gts(adap, rspq) == 0)
3119 rspq->unhandled_irqs++;
3122 #define QDUMP_SBUF_SIZE 32 * 400
3124 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3126 struct sge_rspq *rspq;
3127 struct sge_qset *qs;
3128 int i, err, dump_end, idx;
3130 struct rsp_desc *rspd;
3134 qs = rspq_to_qset(rspq);
3135 if (rspq->rspq_dump_count == 0)
3137 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3139 "dump count is too large %d\n", rspq->rspq_dump_count);
3140 rspq->rspq_dump_count = 0;
3143 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3145 "dump start of %d is greater than queue size\n",
3146 rspq->rspq_dump_start);
3147 rspq->rspq_dump_start = 0;
3150 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3153 err = sysctl_wire_old_buffer(req, 0);
3156 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3158 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3159 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3160 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3161 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3162 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3164 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3165 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3167 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3168 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3169 idx = i & (RSPQ_Q_SIZE-1);
3171 rspd = &rspq->desc[idx];
3172 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3173 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3174 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3175 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3176 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3177 be32toh(rspd->len_cq), rspd->intr_gen);
3180 err = sbuf_finish(sb);
3186 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3188 struct sge_txq *txq;
3189 struct sge_qset *qs;
3190 int i, j, err, dump_end;
3192 struct tx_desc *txd;
3193 uint32_t *WR, wr_hi, wr_lo, gen;
3197 qs = txq_to_qset(txq, TXQ_ETH);
3198 if (txq->txq_dump_count == 0) {
3201 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3203 "dump count is too large %d\n", txq->txq_dump_count);
3204 txq->txq_dump_count = 1;
3207 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3209 "dump start of %d is greater than queue size\n",
3210 txq->txq_dump_start);
3211 txq->txq_dump_start = 0;
3214 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3217 err = sysctl_wire_old_buffer(req, 0);
3220 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3222 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3223 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3224 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3225 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3226 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3227 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3228 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3229 txq->txq_dump_start,
3230 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3232 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3233 for (i = txq->txq_dump_start; i < dump_end; i++) {
3234 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3235 WR = (uint32_t *)txd->flit;
3236 wr_hi = ntohl(WR[0]);
3237 wr_lo = ntohl(WR[1]);
3238 gen = G_WR_GEN(wr_lo);
3240 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3242 for (j = 2; j < 30; j += 4)
3243 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3244 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3247 err = sbuf_finish(sb);
3253 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3255 struct sge_txq *txq;
3256 struct sge_qset *qs;
3257 int i, j, err, dump_end;
3259 struct tx_desc *txd;
3260 uint32_t *WR, wr_hi, wr_lo, gen;
3263 qs = txq_to_qset(txq, TXQ_CTRL);
3264 if (txq->txq_dump_count == 0) {
3267 if (txq->txq_dump_count > 256) {
3269 "dump count is too large %d\n", txq->txq_dump_count);
3270 txq->txq_dump_count = 1;
3273 if (txq->txq_dump_start > 255) {
3275 "dump start of %d is greater than queue size\n",
3276 txq->txq_dump_start);
3277 txq->txq_dump_start = 0;
3281 err = sysctl_wire_old_buffer(req, 0);
3284 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3285 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3286 txq->txq_dump_start,
3287 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3289 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3290 for (i = txq->txq_dump_start; i < dump_end; i++) {
3291 txd = &txq->desc[i & (255)];
3292 WR = (uint32_t *)txd->flit;
3293 wr_hi = ntohl(WR[0]);
3294 wr_lo = ntohl(WR[1]);
3295 gen = G_WR_GEN(wr_lo);
3297 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3299 for (j = 2; j < 30; j += 4)
3300 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3301 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3304 err = sbuf_finish(sb);
3310 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3312 adapter_t *sc = arg1;
3313 struct qset_params *qsp = &sc->params.sge.qset[0];
3315 struct sge_qset *qs;
3316 int i, j, err, nqsets = 0;
3319 if ((sc->flags & FULL_INIT_DONE) == 0)
3322 coalesce_usecs = qsp->coalesce_usecs;
3323 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3328 if (coalesce_usecs == qsp->coalesce_usecs)
3331 for (i = 0; i < sc->params.nports; i++)
3332 for (j = 0; j < sc->port[i].nqsets; j++)
3335 coalesce_usecs = max(1, coalesce_usecs);
3337 for (i = 0; i < nqsets; i++) {
3338 qs = &sc->sge.qs[i];
3339 qsp = &sc->params.sge.qset[i];
3340 qsp->coalesce_usecs = coalesce_usecs;
3342 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3343 &sc->sge.qs[0].rspq.lock;
3346 t3_update_qset_coalesce(qs, qsp);
3347 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3348 V_NEWTIMER(qs->rspq.holdoff_tmr));
3356 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
3358 adapter_t *sc = arg1;
3361 if ((sc->flags & FULL_INIT_DONE) == 0)
3364 timestamp = sc->timestamp;
3365 rc = sysctl_handle_int(oidp, ×tamp, arg2, req);
3370 if (timestamp != sc->timestamp) {
3371 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
3372 timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
3373 sc->timestamp = timestamp;
3380 t3_add_attach_sysctls(adapter_t *sc)
3382 struct sysctl_ctx_list *ctx;
3383 struct sysctl_oid_list *children;
3385 ctx = device_get_sysctl_ctx(sc->dev);
3386 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3388 /* random information */
3389 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3391 CTLFLAG_RD, sc->fw_version,
3392 0, "firmware version");
3393 SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3395 CTLFLAG_RD, &sc->params.rev,
3397 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3399 CTLFLAG_RD, sc->port_types,
3400 0, "type of ports");
3401 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3403 CTLFLAG_RW, &cxgb_debug,
3404 0, "enable verbose debugging output");
3405 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3406 CTLFLAG_RD, &sc->tunq_coalesce,
3407 "#tunneled packets freed");
3408 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3410 CTLFLAG_RD, &txq_fills,
3411 0, "#times txq overrun");
3412 SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3414 CTLFLAG_RD, &sc->params.vpd.cclk,
3415 0, "core clock frequency (in KHz)");
3419 static const char *rspq_name = "rspq";
3420 static const char *txq_names[] =
3428 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3430 struct port_info *p = arg1;
3436 cxgb_refresh_stats(p);
3437 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3439 return (sysctl_handle_64(oidp, parg, 0, req));
3443 t3_add_configured_sysctls(adapter_t *sc)
3445 struct sysctl_ctx_list *ctx;
3446 struct sysctl_oid_list *children;
3449 ctx = device_get_sysctl_ctx(sc->dev);
3450 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3452 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3454 CTLTYPE_INT|CTLFLAG_RW, sc,
3455 0, t3_set_coalesce_usecs,
3456 "I", "interrupt coalescing timer (us)");
3458 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3460 CTLTYPE_INT | CTLFLAG_RW, sc,
3461 0, t3_pkt_timestamp,
3462 "I", "provide packet timestamp instead of connection hash");
3464 for (i = 0; i < sc->params.nports; i++) {
3465 struct port_info *pi = &sc->port[i];
3466 struct sysctl_oid *poid;
3467 struct sysctl_oid_list *poidlist;
3468 struct mac_stats *mstats = &pi->mac.stats;
3470 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3471 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3472 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3473 poidlist = SYSCTL_CHILDREN(poid);
3474 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
3475 "nqsets", CTLFLAG_RD, &pi->nqsets,
3478 for (j = 0; j < pi->nqsets; j++) {
3479 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3480 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3481 *ctrlqpoid, *lropoid;
3482 struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3483 *txqpoidlist, *ctrlqpoidlist,
3485 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3487 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3489 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3490 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3491 qspoidlist = SYSCTL_CHILDREN(qspoid);
3493 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3494 CTLFLAG_RD, &qs->fl[0].empty, 0,
3495 "freelist #0 empty");
3496 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3497 CTLFLAG_RD, &qs->fl[1].empty, 0,
3498 "freelist #1 empty");
3500 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3501 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3502 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3504 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3505 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3506 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3508 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3509 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3510 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3512 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3513 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3514 lropoidlist = SYSCTL_CHILDREN(lropoid);
3516 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3517 CTLFLAG_RD, &qs->rspq.size,
3518 0, "#entries in response queue");
3519 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3520 CTLFLAG_RD, &qs->rspq.cidx,
3521 0, "consumer index");
3522 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3523 CTLFLAG_RD, &qs->rspq.credits,
3525 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3526 CTLFLAG_RD, &qs->rspq.starved,
3527 0, "#times starved");
3528 SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3529 CTLFLAG_RD, &qs->rspq.phys_addr,
3530 "physical_address_of the queue");
3531 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3532 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3533 0, "start rspq dump entry");
3534 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3535 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3536 0, "#rspq entries to dump");
3537 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3538 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3539 0, t3_dump_rspq, "A", "dump of the response queue");
3541 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
3542 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
3543 "#tunneled packets dropped");
3544 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3545 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len,
3546 0, "#tunneled packets waiting to be sent");
3548 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3549 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3550 0, "#tunneled packets queue producer index");
3551 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3552 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3553 0, "#tunneled packets queue consumer index");
3555 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
3556 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3557 0, "#tunneled packets processed by the card");
3558 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3559 CTLFLAG_RD, &txq->cleaned,
3560 0, "#tunneled packets cleaned");
3561 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3562 CTLFLAG_RD, &txq->in_use,
3563 0, "#tunneled packet slots in use");
3564 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
3565 CTLFLAG_RD, &txq->txq_frees,
3566 "#tunneled packets freed");
3567 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3568 CTLFLAG_RD, &txq->txq_skipped,
3569 0, "#tunneled packet descriptors skipped");
3570 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3571 CTLFLAG_RD, &txq->txq_coalesced,
3572 "#tunneled packets coalesced");
3573 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3574 CTLFLAG_RD, &txq->txq_enqueued,
3575 0, "#tunneled packets enqueued to hardware");
3576 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3577 CTLFLAG_RD, &qs->txq_stopped,
3578 0, "tx queues stopped");
3579 SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3580 CTLFLAG_RD, &txq->phys_addr,
3581 "physical_address_of the queue");
3582 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3583 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3584 0, "txq generation");
3585 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3586 CTLFLAG_RD, &txq->cidx,
3587 0, "hardware queue cidx");
3588 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3589 CTLFLAG_RD, &txq->pidx,
3590 0, "hardware queue pidx");
3591 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3592 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3593 0, "txq start idx for dump");
3594 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3595 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3596 0, "txq #entries to dump");
3597 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3598 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3599 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3601 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3602 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3603 0, "ctrlq start idx for dump");
3604 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3605 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3606 0, "ctrl #entries to dump");
3607 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3608 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3609 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3611 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued",
3612 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3613 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3614 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3615 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3616 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3617 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3618 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3621 /* Now add a node for mac stats. */
3622 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3623 CTLFLAG_RD, NULL, "MAC statistics");
3624 poidlist = SYSCTL_CHILDREN(poid);
3627 * We (ab)use the length argument (arg2) to pass on the offset
3628 * of the data that we are interested in. This is only required
3629 * for the quad counters that are updated from the hardware (we
3630 * make sure that we return the latest value).
3631 * sysctl_handle_macstat first updates *all* the counters from
3632 * the hardware, and then returns the latest value of the
3633 * requested counter. Best would be to update only the
3634 * requested counter from hardware, but t3_mac_update_stats()
3635 * hides all the register details and we don't want to dive into
3638 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3639 (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3640 sysctl_handle_macstat, "QU", 0)
3641 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3642 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3643 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3644 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3645 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3646 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3647 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3648 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3649 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3650 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3651 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3652 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3653 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3654 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3655 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3656 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3657 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3658 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3659 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3660 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3661 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3662 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3663 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3664 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3665 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3666 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3667 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3668 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3669 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3670 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3671 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3672 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3673 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3674 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3675 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3676 CXGB_SYSCTL_ADD_QUAD(rx_short);
3677 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3678 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3679 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3680 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3681 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3682 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3683 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3684 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3685 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3686 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3687 #undef CXGB_SYSCTL_ADD_QUAD
3689 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3690 CTLFLAG_RD, &mstats->a, 0)
3691 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3692 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3693 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3694 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3695 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3696 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3697 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3698 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3699 CXGB_SYSCTL_ADD_ULONG(num_resets);
3700 CXGB_SYSCTL_ADD_ULONG(link_faults);
3701 #undef CXGB_SYSCTL_ADD_ULONG
3706 * t3_get_desc - dump an SGE descriptor for debugging purposes
3707 * @qs: the queue set
3708 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3709 * @idx: the descriptor index in the queue
3710 * @data: where to dump the descriptor contents
3712 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3713 * size of the descriptor.
3716 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3717 unsigned char *data)
3723 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3725 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3726 return sizeof(struct tx_desc);
3730 if (!qs->rspq.desc || idx >= qs->rspq.size)
3732 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3733 return sizeof(struct rsp_desc);
3737 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3739 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3740 return sizeof(struct rx_desc);