1 /**************************************************************************
3 Copyright (c) 2007-2008, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
51 #include <sys/sched.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
67 #include <cxgb_include.h>
72 * XXX don't re-enable this until TOE stops assuming
75 static int recycle_enable = 0;
76 extern int cxgb_txq_buf_ring_size;
77 int cxgb_cached_allocations;
79 int cxgb_ext_freed = 0;
80 int cxgb_ext_inited = 0;
84 extern int cxgb_use_16k_clusters;
85 extern int cxgb_pcpu_cache_enable;
88 extern int nmbjumbo16;
95 #define SGE_RX_SM_BUF_SIZE 1536
96 #define SGE_RX_DROP_THRES 16
97 #define SGE_RX_COPY_THRES 128
100 * Period of the Tx buffer reclaim timer. This timer does not need to run
101 * frequently as Tx buffers are usually reclaimed by new Tx packets.
103 #define TX_RECLAIM_PERIOD (hz >> 1)
106 * Values for sge_txq.flags
109 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
110 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
114 uint64_t flit[TX_DESC_FLITS];
124 struct rsp_desc { /* response queue descriptor */
125 struct rss_header rss_hdr;
128 uint8_t imm_data[47];
132 #define RX_SW_DESC_MAP_CREATED (1 << 0)
133 #define TX_SW_DESC_MAP_CREATED (1 << 1)
134 #define RX_SW_DESC_INUSE (1 << 3)
135 #define TX_SW_DESC_MAPPED (1 << 4)
137 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
138 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
139 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
140 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
142 struct tx_sw_desc { /* SW state per Tx descriptor */
143 struct mbuf_iovec mi;
148 struct rx_sw_desc { /* SW state per Rx descriptor */
161 struct refill_fl_cb_arg {
163 bus_dma_segment_t seg;
168 * Maps a number of flits to the number of Tx descriptors that can hold them.
171 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
173 * HW allows up to 4 descriptors to be combined into a WR.
175 static uint8_t flit_desc_map[] = {
177 #if SGE_NUM_GENBITS == 1
178 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
179 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
180 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
181 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
182 #elif SGE_NUM_GENBITS == 2
183 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
184 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
185 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
186 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
188 # error "SGE_NUM_GENBITS must be 1 or 2"
195 static void sge_timer_cb(void *arg);
196 static void sge_timer_reclaim(void *arg, int ncount);
197 static void sge_txq_reclaim_handler(void *arg, int ncount);
200 * reclaim_completed_tx - reclaims completed Tx descriptors
201 * @adapter: the adapter
202 * @q: the Tx queue to reclaim completed descriptors from
204 * Reclaims Tx descriptors that the SGE has indicated it has processed,
205 * and frees the associated buffers if possible. Called with the Tx
209 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min)
211 int reclaim = desc_reclaimable(q);
213 if (reclaim < reclaim_min)
216 mtx_assert(&q->lock, MA_OWNED);
218 t3_free_tx_desc(q, reclaim);
219 q->cleaned += reclaim;
220 q->in_use -= reclaim;
226 * should_restart_tx - are there enough resources to restart a Tx queue?
229 * Checks if there are enough descriptors to restart a suspended Tx queue.
232 should_restart_tx(const struct sge_txq *q)
234 unsigned int r = q->processed - q->cleaned;
236 return q->in_use - r < (q->size >> 1);
240 * t3_sge_init - initialize SGE
242 * @p: the SGE parameters
244 * Performs SGE initialization needed every time after a chip reset.
245 * We do not initialize any of the queue sets here, instead the driver
246 * top-level must request those individually. We also do not enable DMA
247 * here, that should be done after the queues have been set up.
250 t3_sge_init(adapter_t *adap, struct sge_params *p)
254 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
256 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
257 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
258 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
259 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
260 #if SGE_NUM_GENBITS == 1
261 ctrl |= F_EGRGENCTRL;
263 if (adap->params.rev > 0) {
264 if (!(adap->flags & (USING_MSIX | USING_MSI)))
265 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
267 t3_write_reg(adap, A_SG_CONTROL, ctrl);
268 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
269 V_LORCQDRBTHRSH(512));
270 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
271 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
272 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
273 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
274 adap->params.rev < T3_REV_C ? 1000 : 500);
275 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
276 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
277 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
278 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
279 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
284 * sgl_len - calculates the size of an SGL of the given capacity
285 * @n: the number of SGL entries
287 * Calculates the number of flits needed for a scatter/gather list that
288 * can hold the given number of entries.
290 static __inline unsigned int
291 sgl_len(unsigned int n)
293 return ((3 * n) / 2 + (n & 1));
297 * get_imm_packet - return the next ingress packet buffer from a response
298 * @resp: the response descriptor containing the packet data
300 * Return a packet containing the immediate data of the given response.
303 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
306 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
307 m->m_ext.ext_buf = NULL;
308 m->m_ext.ext_type = 0;
309 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
313 static __inline u_int
314 flits_to_desc(u_int n)
316 return (flit_desc_map[n]);
319 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
320 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
321 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
322 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
324 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
325 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
329 * t3_sge_err_intr_handler - SGE async event interrupt handler
330 * @adapter: the adapter
332 * Interrupt handler for SGE asynchronous (non-data) events.
335 t3_sge_err_intr_handler(adapter_t *adapter)
337 unsigned int v, status;
339 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
340 if (status & SGE_PARERR)
341 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
342 status & SGE_PARERR);
343 if (status & SGE_FRAMINGERR)
344 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
345 status & SGE_FRAMINGERR);
346 if (status & F_RSPQCREDITOVERFOW)
347 CH_ALERT(adapter, "SGE response queue credit overflow\n");
349 if (status & F_RSPQDISABLED) {
350 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
353 "packet delivered to disabled response queue (0x%x)\n",
354 (v >> S_RSPQ0DISABLED) & 0xff);
357 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
358 if (status & SGE_FATALERR)
359 t3_fatal_err(adapter);
363 t3_sge_prep(adapter_t *adap, struct sge_params *p)
367 nqsets = min(SGE_QSETS, mp_ncpus*4);
369 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
371 while (!powerof2(fl_q_size))
373 #if __FreeBSD_version >= 700111
374 if (cxgb_use_16k_clusters)
375 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
377 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
379 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
381 while (!powerof2(jumbo_q_size))
384 /* XXX Does ETHER_ALIGN need to be accounted for here? */
385 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
387 for (i = 0; i < SGE_QSETS; ++i) {
388 struct qset_params *q = p->qset + i;
390 if (adap->params.nports > 2) {
391 q->coalesce_usecs = 50;
394 q->coalesce_usecs = 10;
396 q->coalesce_usecs = 5;
400 q->rspq_size = RSPQ_Q_SIZE;
401 q->fl_size = fl_q_size;
402 q->jumbo_size = jumbo_q_size;
403 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
404 q->txq_size[TXQ_OFLD] = 1024;
405 q->txq_size[TXQ_CTRL] = 256;
411 t3_sge_alloc(adapter_t *sc)
414 /* The parent tag. */
415 if (bus_dma_tag_create( NULL, /* parent */
416 1, 0, /* algnmnt, boundary */
417 BUS_SPACE_MAXADDR, /* lowaddr */
418 BUS_SPACE_MAXADDR, /* highaddr */
419 NULL, NULL, /* filter, filterarg */
420 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
421 BUS_SPACE_UNRESTRICTED, /* nsegments */
422 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
424 NULL, NULL, /* lock, lockarg */
426 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
431 * DMA tag for normal sized RX frames
433 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
434 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
435 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
436 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
441 * DMA tag for jumbo sized RX frames.
443 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
444 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
445 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
446 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
451 * DMA tag for TX frames.
453 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
454 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
455 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
456 NULL, NULL, &sc->tx_dmat)) {
457 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
465 t3_sge_free(struct adapter * sc)
468 if (sc->tx_dmat != NULL)
469 bus_dma_tag_destroy(sc->tx_dmat);
471 if (sc->rx_jumbo_dmat != NULL)
472 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
474 if (sc->rx_dmat != NULL)
475 bus_dma_tag_destroy(sc->rx_dmat);
477 if (sc->parent_dmat != NULL)
478 bus_dma_tag_destroy(sc->parent_dmat);
484 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
487 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
488 qs->rspq.polling = 0 /* p->polling */;
491 #if !defined(__i386__) && !defined(__amd64__)
493 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
495 struct refill_fl_cb_arg *cb_arg = arg;
497 cb_arg->error = error;
498 cb_arg->seg = segs[0];
504 * refill_fl - refill an SGE free-buffer list
505 * @sc: the controller softc
506 * @q: the free-list to refill
507 * @n: the number of new buffers to allocate
509 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
510 * The caller must assure that @n does not exceed the queue's capacity.
513 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
515 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
516 struct rx_desc *d = &q->desc[q->pidx];
517 struct refill_fl_cb_arg cb_arg;
520 int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
525 * We only allocate a cluster, mbuf allocation happens after rx
527 if ((cl = cxgb_cache_get(q->zone)) == NULL) {
528 log(LOG_WARNING, "Failed to allocate cluster\n");
532 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
533 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
534 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
535 uma_zfree(q->zone, cl);
538 sd->flags |= RX_SW_DESC_MAP_CREATED;
540 #if !defined(__i386__) && !defined(__amd64__)
541 err = bus_dmamap_load(q->entry_tag, sd->map,
542 cl + header_size, q->buf_size,
543 refill_fl_cb, &cb_arg, 0);
545 if (err != 0 || cb_arg.error) {
546 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
553 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size));
555 sd->flags |= RX_SW_DESC_INUSE;
557 sd->data = cl + header_size;
558 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
559 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
560 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
561 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
566 if (++q->pidx == q->size) {
578 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
583 * free_rx_bufs - free the Rx buffers on an SGE free list
584 * @sc: the controle softc
585 * @q: the SGE free list to clean up
587 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
588 * this queue should be stopped before calling this function.
591 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
593 u_int cidx = q->cidx;
595 while (q->credits--) {
596 struct rx_sw_desc *d = &q->sdesc[cidx];
598 if (d->flags & RX_SW_DESC_INUSE) {
599 bus_dmamap_unload(q->entry_tag, d->map);
600 bus_dmamap_destroy(q->entry_tag, d->map);
601 uma_zfree(q->zone, d->rxsd_cl);
604 if (++cidx == q->size)
610 __refill_fl(adapter_t *adap, struct sge_fl *fl)
612 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
616 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
618 if ((fl->size - fl->credits) < max)
619 refill_fl(adap, fl, min(max, fl->size - fl->credits));
623 refill_fl_service(adapter_t *adap, struct sge_fl *fl)
625 __refill_fl_lt(adap, fl, 512);
629 * recycle_rx_buf - recycle a receive buffer
630 * @adapter: the adapter
631 * @q: the SGE free list
632 * @idx: index of buffer to recycle
634 * Recycles the specified buffer on the given free list by adding it at
635 * the next available slot on the list.
638 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
640 struct rx_desc *from = &q->desc[idx];
641 struct rx_desc *to = &q->desc[q->pidx];
643 q->sdesc[q->pidx] = q->sdesc[idx];
644 to->addr_lo = from->addr_lo; // already big endian
645 to->addr_hi = from->addr_hi; // likewise
647 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
648 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
651 if (++q->pidx == q->size) {
655 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
659 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
664 *addr = segs[0].ds_addr;
668 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
669 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
670 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
672 size_t len = nelem * elem_size;
677 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
678 BUS_SPACE_MAXADDR_32BIT,
679 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
680 len, 0, NULL, NULL, tag)) != 0) {
681 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
685 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
687 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
691 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
696 len = nelem * sw_size;
697 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
700 if (parent_entry_tag == NULL)
703 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
704 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
705 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
706 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
707 NULL, NULL, entry_tag)) != 0) {
708 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
715 sge_slow_intr_handler(void *arg, int ncount)
719 t3_slow_intr_handler(sc);
723 * sge_timer_cb - perform periodic maintenance of an SGE qset
724 * @data: the SGE queue set to maintain
726 * Runs periodically from a timer to perform maintenance of an SGE queue
727 * set. It performs two tasks:
729 * a) Cleans up any completed Tx descriptors that may still be pending.
730 * Normal descriptor cleanup happens when new packets are added to a Tx
731 * queue so this timer is relatively infrequent and does any cleanup only
732 * if the Tx queue has not seen any new packets in a while. We make a
733 * best effort attempt to reclaim descriptors, in that we don't wait
734 * around if we cannot get a queue's lock (which most likely is because
735 * someone else is queueing new packets and so will also handle the clean
736 * up). Since control queues use immediate data exclusively we don't
737 * bother cleaning them up here.
739 * b) Replenishes Rx queues that have run out due to memory shortage.
740 * Normally new Rx buffers are added when existing ones are consumed but
741 * when out of memory a queue can become empty. We try to add only a few
742 * buffers here, the queue will be replenished fully as these new buffers
743 * are used up if memory shortage has subsided.
745 * c) Return coalesced response queue credits in case a response queue is
748 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
749 * fifo overflows and the FW doesn't implement any recovery scheme yet.
752 sge_timer_cb(void *arg)
755 #ifndef IFNET_MULTIQUEUE
756 struct port_info *pi;
760 int reclaim_ofl, refill_rx;
762 for (i = 0; i < sc->params.nports; i++) {
764 for (j = 0; j < pi->nqsets; j++) {
765 qs = &sc->sge.qs[pi->first_qset + j];
767 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
768 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
769 (qs->fl[1].credits < qs->fl[1].size));
770 if (reclaim_ofl || refill_rx) {
771 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
777 if (sc->params.nports > 2) {
780 for_each_port(sc, i) {
781 struct port_info *pi = &sc->port[i];
783 t3_write_reg(sc, A_SG_KDOORBELL,
785 (FW_TUNNEL_SGEEC_START + pi->first_qset));
788 if (sc->open_device_map != 0)
789 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
793 * This is meant to be a catch-all function to keep sge state private
798 t3_sge_init_adapter(adapter_t *sc)
800 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
801 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
802 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
809 t3_sge_reset_adapter(adapter_t *sc)
811 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
816 t3_sge_init_port(struct port_info *pi)
818 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
823 t3_sge_deinit_sw(adapter_t *sc)
830 * refill_rspq - replenish an SGE response queue
831 * @adapter: the adapter
832 * @q: the response queue to replenish
833 * @credits: how many new responses to make available
835 * Replenishes a response queue by making the supplied number of responses
839 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
842 /* mbufs are allocated on demand when a rspq entry is processed. */
843 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
844 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
848 sge_txq_reclaim_(struct sge_txq *txq, int force)
851 if (desc_reclaimable(txq) < 16)
853 if (mtx_trylock(&txq->lock) == 0)
855 reclaim_completed_tx_(txq, 16);
856 mtx_unlock(&txq->lock);
861 sge_txq_reclaim_handler(void *arg, int ncount)
863 struct sge_txq *q = arg;
865 sge_txq_reclaim_(q, TRUE);
871 sge_timer_reclaim(void *arg, int ncount)
873 struct port_info *pi = arg;
874 int i, nqsets = pi->nqsets;
875 adapter_t *sc = pi->adapter;
880 #ifdef IFNET_MULTIQUEUE
881 panic("%s should not be called with multiqueue support\n", __FUNCTION__);
883 for (i = 0; i < nqsets; i++) {
884 qs = &sc->sge.qs[pi->first_qset + i];
886 txq = &qs->txq[TXQ_OFLD];
887 sge_txq_reclaim_(txq, FALSE);
889 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
890 &sc->sge.qs[0].rspq.lock;
892 if (mtx_trylock(lock)) {
893 /* XXX currently assume that we are *NOT* polling */
894 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
896 if (qs->fl[0].credits < qs->fl[0].size - 16)
897 __refill_fl(sc, &qs->fl[0]);
898 if (qs->fl[1].credits < qs->fl[1].size - 16)
899 __refill_fl(sc, &qs->fl[1]);
901 if (status & (1 << qs->rspq.cntxt_id)) {
902 if (qs->rspq.credits) {
903 refill_rspq(sc, &qs->rspq, 1);
905 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
906 1 << qs->rspq.cntxt_id);
915 * init_qset_cntxt - initialize an SGE queue set context info
917 * @id: the queue set id
919 * Initializes the TIDs and context ids for the queues of a queue set.
922 init_qset_cntxt(struct sge_qset *qs, u_int id)
925 qs->rspq.cntxt_id = id;
926 qs->fl[0].cntxt_id = 2 * id;
927 qs->fl[1].cntxt_id = 2 * id + 1;
928 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
929 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
930 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
931 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
932 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
934 mbufq_init(&qs->txq[TXQ_ETH].sendq);
935 mbufq_init(&qs->txq[TXQ_OFLD].sendq);
936 mbufq_init(&qs->txq[TXQ_CTRL].sendq);
941 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
943 txq->in_use += ndesc;
945 * XXX we don't handle stopping of queue
946 * presumably start handles this when we bump against the end
948 txqs->gen = txq->gen;
949 txq->unacked += ndesc;
950 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
952 txqs->pidx = txq->pidx;
955 if (((txqs->pidx > txq->cidx) &&
956 (txq->pidx < txqs->pidx) &&
957 (txq->pidx >= txq->cidx)) ||
958 ((txqs->pidx < txq->cidx) &&
959 (txq->pidx >= txq-> cidx)) ||
960 ((txqs->pidx < txq->cidx) &&
961 (txq->cidx < txqs->pidx)))
962 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
963 txqs->pidx, txq->pidx, txq->cidx);
965 if (txq->pidx >= txq->size) {
966 txq->pidx -= txq->size;
973 * calc_tx_descs - calculate the number of Tx descriptors for a packet
974 * @m: the packet mbufs
975 * @nsegs: the number of segments
977 * Returns the number of Tx descriptors needed for the given Ethernet
978 * packet. Ethernet packets require addition of WR and CPL headers.
980 static __inline unsigned int
981 calc_tx_descs(const struct mbuf *m, int nsegs)
985 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
988 flits = sgl_len(nsegs) + 2;
990 if (m->m_pkthdr.csum_flags & CSUM_TSO)
993 return flits_to_desc(flits);
997 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
998 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1001 int err, pktlen, pass = 0;
1006 pktlen = m0->m_pkthdr.len;
1007 #if defined(__i386__) || defined(__amd64__)
1008 if (busdma_map_sg_collapse(m, segs, nsegs) == 0) {
1012 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
1017 if (err == EFBIG && pass == 0) {
1019 /* Too many segments, try to defrag */
1020 m0 = m_defrag(m0, M_DONTWAIT);
1028 } else if (err == ENOMEM) {
1032 printf("map failure err=%d pktlen=%d\n", err, pktlen);
1038 #if !defined(__i386__) && !defined(__amd64__)
1039 bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE);
1041 txsd->flags |= TX_SW_DESC_MAPPED;
1047 * make_sgl - populate a scatter/gather list for a packet
1048 * @sgp: the SGL to populate
1049 * @segs: the packet dma segments
1050 * @nsegs: the number of segments
1052 * Generates a scatter/gather list for the buffers that make up a packet
1053 * and returns the SGL size in 8-byte words. The caller must size the SGL
1056 static __inline void
1057 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1061 for (idx = 0, i = 0; i < nsegs; i++) {
1063 * firmware doesn't like empty segments
1065 if (segs[i].ds_len == 0)
1070 sgp->len[idx] = htobe32(segs[i].ds_len);
1071 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1082 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1083 * @adap: the adapter
1086 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
1087 * where the HW is going to sleep just after we checked, however,
1088 * then the interrupt handler will detect the outstanding TX packet
1089 * and ring the doorbell for us.
1091 * When GTS is disabled we unconditionally ring the doorbell.
1093 static __inline void
1094 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1097 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1098 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1099 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1101 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1104 t3_write_reg(adap, A_SG_KDOORBELL,
1105 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1108 wmb(); /* write descriptors before telling HW */
1109 t3_write_reg(adap, A_SG_KDOORBELL,
1110 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1114 static __inline void
1115 wr_gen2(struct tx_desc *d, unsigned int gen)
1117 #if SGE_NUM_GENBITS == 2
1118 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1123 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1124 * @ndesc: number of Tx descriptors spanned by the SGL
1125 * @txd: first Tx descriptor to be written
1126 * @txqs: txq state (generation and producer index)
1127 * @txq: the SGE Tx queue
1129 * @flits: number of flits to the start of the SGL in the first descriptor
1130 * @sgl_flits: the SGL size in flits
1131 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1132 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1134 * Write a work request header and an associated SGL. If the SGL is
1135 * small enough to fit into one Tx descriptor it has already been written
1136 * and we just need to write the WR header. Otherwise we distribute the
1137 * SGL across the number of descriptors it spans.
1140 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1141 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1142 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1145 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1146 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1148 if (__predict_true(ndesc == 1)) {
1149 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1150 V_WR_SGLSFLT(flits)) | wr_hi;
1152 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1153 V_WR_GEN(txqs->gen)) | wr_lo;
1155 wr_gen2(txd, txqs->gen);
1158 unsigned int ogen = txqs->gen;
1159 const uint64_t *fp = (const uint64_t *)sgl;
1160 struct work_request_hdr *wp = wrp;
1162 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1163 V_WR_SGLSFLT(flits)) | wr_hi;
1166 unsigned int avail = WR_FLITS - flits;
1168 if (avail > sgl_flits)
1170 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1179 if (++txqs->pidx == txq->size) {
1187 * when the head of the mbuf chain
1188 * is freed all clusters will be freed
1191 KASSERT(txsd->mi.mi_base == NULL,
1192 ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1193 wrp = (struct work_request_hdr *)txd;
1194 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1195 V_WR_SGLSFLT(1)) | wr_hi;
1196 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1198 V_WR_GEN(txqs->gen)) | wr_lo;
1199 wr_gen2(txd, txqs->gen);
1202 wrp->wr_hi |= htonl(F_WR_EOP);
1204 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1205 wr_gen2((struct tx_desc *)wp, ogen);
1209 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1210 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1212 #ifdef VLAN_SUPPORTED
1213 #define GET_VTAG(cntrl, m) \
1215 if ((m)->m_flags & M_VLANTAG) \
1216 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1219 #define GET_VTAG_MI(cntrl, mi) \
1221 if ((mi)->mi_flags & M_VLANTAG) \
1222 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \
1225 #define GET_VTAG(cntrl, m)
1226 #define GET_VTAG_MI(cntrl, m)
1230 t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
1234 struct sge_txq *txq;
1235 struct txq_state txqs;
1236 struct port_info *pi;
1237 unsigned int ndesc, flits, cntrl, mlen;
1238 int err, nsegs, tso_info = 0;
1240 struct work_request_hdr *wrp;
1241 struct tx_sw_desc *txsd;
1242 struct sg_ent *sgp, *sgl;
1243 uint32_t wr_hi, wr_lo, sgl_flits;
1244 bus_dma_segment_t segs[TX_MAX_SEGS];
1246 struct tx_desc *txd;
1247 struct mbuf_vec *mv;
1248 struct mbuf_iovec *mi;
1250 DPRINTF("t3_encap cpu=%d ", curcpu);
1255 txq = &qs->txq[TXQ_ETH];
1256 txd = &txq->desc[txq->pidx];
1257 txsd = &txq->sdesc[txq->pidx];
1261 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1262 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1264 printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx);
1266 mtx_assert(&txq->lock, MA_OWNED);
1267 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1269 * XXX need to add VLAN support for 6.x
1271 #ifdef VLAN_SUPPORTED
1272 if (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1273 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1275 KASSERT(txsd->mi.mi_base == NULL,
1276 ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1278 panic("count > 1 not support in CVS\n");
1279 if ((err = busdma_map_sg_vec(m, &m0, segs, count)))
1282 } else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) {
1284 printf("failed ... err=%d\n", err);
1287 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
1289 if (!(m0->m_pkthdr.len <= PIO_LEN)) {
1290 mi_collapse_mbuf(&txsd->mi, m0);
1294 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1296 struct mbuf_iovec *batchmi;
1299 batchmi = mv->mv_vec;
1301 wrp = (struct work_request_hdr *)txd;
1303 flits = count*2 + 1;
1304 txq_prod(txq, 1, &txqs);
1306 for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) {
1307 struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
1309 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1310 GET_VTAG_MI(cntrl, batchmi);
1311 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1312 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1313 cntrl |= F_TXPKT_IPCSUM_DIS;
1314 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1315 cntrl |= F_TXPKT_L4CSUM_DIS;
1316 cbe->cntrl = htonl(cntrl);
1317 cbe->len = htonl(batchmi->mi_len | 0x80000000);
1318 cbe->addr = htobe64(segs[i].ds_addr);
1319 txd->flit[fidx] |= htobe64(1 << 24);
1322 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1323 V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1325 wrp->wr_lo = htonl(V_WR_LEN(flits) |
1326 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1328 wr_gen2(txd, txqs.gen);
1329 check_ring_tx_db(sc, txq);
1332 } else if (tso_info) {
1333 int min_size = TCPPKTHDRSIZE, eth_type, tagged;
1334 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1340 GET_VTAG(cntrl, m0);
1341 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1342 hdr->cntrl = htonl(cntrl);
1343 mlen = m0->m_pkthdr.len;
1344 hdr->len = htonl(mlen | 0x80000000);
1346 DPRINTF("tso buf len=%d\n", mlen);
1348 tagged = m0->m_flags & M_VLANTAG;
1350 min_size -= ETHER_VLAN_ENCAP_LEN;
1352 if (__predict_false(mlen < min_size)) {
1353 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1354 m0, mlen, m0->m_pkthdr.tso_segsz,
1355 m0->m_pkthdr.csum_flags, m0->m_flags);
1356 panic("tx tso packet too small");
1359 /* Make sure that ether, ip, tcp headers are all in m0 */
1360 if (__predict_false(m0->m_len < min_size)) {
1361 m0 = m_pullup(m0, min_size);
1362 if (__predict_false(m0 == NULL)) {
1363 /* XXX panic probably an overreaction */
1364 panic("couldn't fit header into mbuf");
1367 pkthdr = m0->m_data;
1370 eth_type = CPL_ETH_II_VLAN;
1371 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1372 ETHER_VLAN_ENCAP_LEN);
1374 eth_type = CPL_ETH_II;
1375 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1377 tcp = (struct tcphdr *)((uint8_t *)ip +
1380 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1381 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1382 V_LSO_TCPHDR_WORDS(tcp->th_off);
1383 hdr->lso_info = htonl(tso_info);
1385 if (__predict_false(mlen <= PIO_LEN)) {
1386 /* pkt not undersized but fits in PIO_LEN
1387 * Indicates a TSO bug at the higher levels.
1390 DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1391 m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
1392 txq_prod(txq, 1, &txqs);
1393 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1396 flits = (mlen + 7) / 8 + 3;
1397 hdr->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1398 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1399 F_WR_SOP | F_WR_EOP | txqs.compl);
1401 hdr->wr.wr_lo = htonl(V_WR_LEN(flits) |
1402 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1404 wr_gen2(txd, txqs.gen);
1405 check_ring_tx_db(sc, txq);
1410 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1412 GET_VTAG(cntrl, m0);
1413 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1414 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1415 cntrl |= F_TXPKT_IPCSUM_DIS;
1416 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1417 cntrl |= F_TXPKT_L4CSUM_DIS;
1418 cpl->cntrl = htonl(cntrl);
1419 mlen = m0->m_pkthdr.len;
1420 cpl->len = htonl(mlen | 0x80000000);
1422 if (mlen <= PIO_LEN) {
1423 txq_prod(txq, 1, &txqs);
1424 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1427 flits = (mlen + 7) / 8 + 2;
1428 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1429 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1430 F_WR_SOP | F_WR_EOP | txqs.compl);
1432 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1433 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1435 wr_gen2(txd, txqs.gen);
1436 check_ring_tx_db(sc, txq);
1437 DPRINTF("pio buf\n");
1440 DPRINTF("regular buf\n");
1443 wrp = (struct work_request_hdr *)txd;
1447 * XXX need to move into one of the helper routines above
1450 if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0)
1454 ndesc = calc_tx_descs(m0, nsegs);
1456 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1457 make_sgl(sgp, segs, nsegs);
1459 sgl_flits = sgl_len(nsegs);
1461 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1462 txq_prod(txq, ndesc, &txqs);
1463 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1464 wr_lo = htonl(V_WR_TID(txq->token));
1465 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1466 check_ring_tx_db(pi->adapter, txq);
1468 if ((m0->m_type == MT_DATA) &&
1469 ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
1470 (m0->m_ext.ext_type != EXT_PACKET)) {
1471 m0->m_flags &= ~M_EXT ;
1472 cxgb_mbufs_outstanding--;
1481 * write_imm - write a packet into a Tx descriptor as immediate data
1482 * @d: the Tx descriptor to write
1484 * @len: the length of packet data to write as immediate data
1485 * @gen: the generation bit value to write
1487 * Writes a packet as immediate data into a Tx descriptor. The packet
1488 * contains a work request at its beginning. We must write the packet
1489 * carefully so the SGE doesn't read accidentally before it's written in
1492 static __inline void
1493 write_imm(struct tx_desc *d, struct mbuf *m,
1494 unsigned int len, unsigned int gen)
1496 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1497 struct work_request_hdr *to = (struct work_request_hdr *)d;
1500 panic("len too big %d\n", len);
1501 if (len < sizeof(*from))
1502 panic("len too small %d", len);
1504 memcpy(&to[1], &from[1], len - sizeof(*from));
1505 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1506 V_WR_BCNTLFLT(len & 7));
1508 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1509 V_WR_LEN((len + 7) / 8));
1513 * This check is a hack we should really fix the logic so
1514 * that this can't happen
1516 if (m->m_type != MT_DONTFREE)
1522 * check_desc_avail - check descriptor availability on a send queue
1523 * @adap: the adapter
1525 * @m: the packet needing the descriptors
1526 * @ndesc: the number of Tx descriptors needed
1527 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1529 * Checks if the requested number of Tx descriptors is available on an
1530 * SGE send queue. If the queue is already suspended or not enough
1531 * descriptors are available the packet is queued for later transmission.
1532 * Must be called with the Tx queue locked.
1534 * Returns 0 if enough descriptors are available, 1 if there aren't
1535 * enough descriptors and the packet has been queued, and 2 if the caller
1536 * needs to retry because there weren't enough descriptors at the
1537 * beginning of the call but some freed up in the mean time.
1540 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1541 struct mbuf *m, unsigned int ndesc,
1545 * XXX We currently only use this for checking the control queue
1546 * the control queue is only used for binding qsets which happens
1547 * at init time so we are guaranteed enough descriptors
1549 if (__predict_false(!mbufq_empty(&q->sendq))) {
1550 addq_exit: mbufq_tail(&q->sendq, m);
1553 if (__predict_false(q->size - q->in_use < ndesc)) {
1555 struct sge_qset *qs = txq_to_qset(q, qid);
1557 printf("stopping q\n");
1559 setbit(&qs->txq_stopped, qid);
1562 if (should_restart_tx(q) &&
1563 test_and_clear_bit(qid, &qs->txq_stopped))
1574 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1575 * @q: the SGE control Tx queue
1577 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1578 * that send only immediate data (presently just the control queues) and
1579 * thus do not have any mbufs
1581 static __inline void
1582 reclaim_completed_tx_imm(struct sge_txq *q)
1584 unsigned int reclaim = q->processed - q->cleaned;
1586 mtx_assert(&q->lock, MA_OWNED);
1588 q->in_use -= reclaim;
1589 q->cleaned += reclaim;
1593 immediate(const struct mbuf *m)
1595 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1599 * ctrl_xmit - send a packet through an SGE control Tx queue
1600 * @adap: the adapter
1601 * @q: the control queue
1604 * Send a packet through an SGE control Tx queue. Packets sent through
1605 * a control queue must fit entirely as immediate data in a single Tx
1606 * descriptor and have no page fragments.
1609 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1612 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1614 if (__predict_false(!immediate(m))) {
1619 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1620 wrp->wr_lo = htonl(V_WR_TID(q->token));
1623 again: reclaim_completed_tx_imm(q);
1625 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1626 if (__predict_false(ret)) {
1628 mtx_unlock(&q->lock);
1629 log(LOG_ERR, "no desc available\n");
1634 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1637 if (++q->pidx >= q->size) {
1641 mtx_unlock(&q->lock);
1643 t3_write_reg(adap, A_SG_KDOORBELL,
1644 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1650 * restart_ctrlq - restart a suspended control queue
1651 * @qs: the queue set cotaining the control queue
1653 * Resumes transmission on a suspended Tx control queue.
1656 restart_ctrlq(void *data, int npending)
1659 struct sge_qset *qs = (struct sge_qset *)data;
1660 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1661 adapter_t *adap = qs->port->adapter;
1663 log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
1666 again: reclaim_completed_tx_imm(q);
1668 while (q->in_use < q->size &&
1669 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1671 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1673 if (++q->pidx >= q->size) {
1679 if (!mbufq_empty(&q->sendq)) {
1680 setbit(&qs->txq_stopped, TXQ_CTRL);
1683 if (should_restart_tx(q) &&
1684 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1688 mtx_unlock(&q->lock);
1690 t3_write_reg(adap, A_SG_KDOORBELL,
1691 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1696 * Send a management message through control queue 0
1699 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1701 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1706 * free_qset - free the resources of an SGE queue set
1707 * @sc: the controller owning the queue set
1710 * Release the HW and SW resources associated with an SGE queue set, such
1711 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1712 * queue set must be quiesced prior to calling this.
1715 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1719 t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
1721 for (i = 0; i < SGE_TXQ_PER_SET; i++)
1722 if (q->txq[i].txq_mr.br_ring != NULL) {
1723 free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
1724 mtx_destroy(&q->txq[i].txq_mr.br_lock);
1726 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1727 if (q->fl[i].desc) {
1728 mtx_lock_spin(&sc->sge.reg_lock);
1729 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1730 mtx_unlock_spin(&sc->sge.reg_lock);
1731 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1732 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1734 bus_dma_tag_destroy(q->fl[i].desc_tag);
1735 bus_dma_tag_destroy(q->fl[i].entry_tag);
1737 if (q->fl[i].sdesc) {
1738 free_rx_bufs(sc, &q->fl[i]);
1739 free(q->fl[i].sdesc, M_DEVBUF);
1743 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1744 if (q->txq[i].desc) {
1745 mtx_lock_spin(&sc->sge.reg_lock);
1746 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1747 mtx_unlock_spin(&sc->sge.reg_lock);
1748 bus_dmamap_unload(q->txq[i].desc_tag,
1749 q->txq[i].desc_map);
1750 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1751 q->txq[i].desc_map);
1752 bus_dma_tag_destroy(q->txq[i].desc_tag);
1753 bus_dma_tag_destroy(q->txq[i].entry_tag);
1754 MTX_DESTROY(&q->txq[i].lock);
1756 if (q->txq[i].sdesc) {
1757 free(q->txq[i].sdesc, M_DEVBUF);
1762 mtx_lock_spin(&sc->sge.reg_lock);
1763 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1764 mtx_unlock_spin(&sc->sge.reg_lock);
1766 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1767 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1769 bus_dma_tag_destroy(q->rspq.desc_tag);
1770 MTX_DESTROY(&q->rspq.lock);
1773 #ifdef LRO_SUPPORTED
1774 tcp_lro_free(&q->lro.ctrl);
1777 bzero(q, sizeof(*q));
1781 * t3_free_sge_resources - free SGE resources
1782 * @sc: the adapter softc
1784 * Frees resources used by the SGE queue sets.
1787 t3_free_sge_resources(adapter_t *sc)
1791 #ifdef IFNET_MULTIQUEUE
1792 panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__);
1794 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1795 nqsets += sc->port[i].nqsets;
1797 for (i = 0; i < nqsets; ++i)
1798 t3_free_qset(sc, &sc->sge.qs[i]);
1802 * t3_sge_start - enable SGE
1803 * @sc: the controller softc
1805 * Enables the SGE for DMAs. This is the last step in starting packet
1809 t3_sge_start(adapter_t *sc)
1811 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1815 * t3_sge_stop - disable SGE operation
1818 * Disables the DMA engine. This can be called in emeregencies (e.g.,
1819 * from error interrupts) or from normal process context. In the latter
1820 * case it also disables any pending queue restart tasklets. Note that
1821 * if it is called in interrupt context it cannot disable the restart
1822 * tasklets as it cannot wait, however the tasklets will have no effect
1823 * since the doorbells are disabled and the driver will call this again
1824 * later from process context, at which time the tasklets will be stopped
1825 * if they are still running.
1828 t3_sge_stop(adapter_t *sc)
1832 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1837 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1838 nqsets += sc->port[i].nqsets;
1844 for (i = 0; i < nqsets; ++i) {
1845 struct sge_qset *qs = &sc->sge.qs[i];
1847 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1848 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1854 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
1855 * @adapter: the adapter
1856 * @q: the Tx queue to reclaim descriptors from
1857 * @reclaimable: the number of descriptors to reclaim
1858 * @m_vec_size: maximum number of buffers to reclaim
1859 * @desc_reclaimed: returns the number of descriptors reclaimed
1861 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1862 * Tx buffers. Called with the Tx queue lock held.
1864 * Returns number of buffers of reclaimed
1867 t3_free_tx_desc(struct sge_txq *q, int reclaimable)
1869 struct tx_sw_desc *txsd;
1873 T3_TRACE2(sc->tb[q->cntxt_id & 7],
1874 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
1877 txsd = &q->sdesc[cidx];
1878 DPRINTF("reclaiming %d WR\n", reclaimable);
1879 mtx_assert(&q->lock, MA_OWNED);
1880 while (reclaimable--) {
1881 DPRINTF("cidx=%d d=%p\n", cidx, txsd);
1882 if (txsd->mi.mi_base != NULL) {
1883 if (txsd->flags & TX_SW_DESC_MAPPED) {
1884 bus_dmamap_unload(q->entry_tag, txsd->map);
1885 txsd->flags &= ~TX_SW_DESC_MAPPED;
1887 m_freem_iovec(&txsd->mi);
1888 buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
1889 txsd->mi.mi_base = NULL;
1891 * XXX check for cache hit rate here
1894 q->port->ifp->if_opackets++;
1895 #if defined(DIAGNOSTIC) && 0
1896 if (m_get_priority(txsd->m[0]) != cidx)
1897 printf("pri=%d cidx=%d\n",
1898 (int)m_get_priority(txsd->m[0]), cidx);
1905 if (++cidx == q->size) {
1915 t3_free_tx_desc_all(struct sge_txq *q)
1918 struct tx_sw_desc *txsd;
1920 for (i = 0; i < q->size; i++) {
1921 txsd = &q->sdesc[i];
1922 if (txsd->mi.mi_base != NULL) {
1923 if (txsd->flags & TX_SW_DESC_MAPPED) {
1924 bus_dmamap_unload(q->entry_tag, txsd->map);
1925 txsd->flags &= ~TX_SW_DESC_MAPPED;
1927 m_freem_iovec(&txsd->mi);
1928 bzero(&txsd->mi, sizeof(txsd->mi));
1934 * is_new_response - check if a response is newly written
1935 * @r: the response descriptor
1936 * @q: the response queue
1938 * Returns true if a response descriptor contains a yet unprocessed
1942 is_new_response(const struct rsp_desc *r,
1943 const struct sge_rspq *q)
1945 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1948 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1949 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1950 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1951 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1952 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1954 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1955 #define NOMEM_INTR_DELAY 2500
1958 * write_ofld_wr - write an offload work request
1959 * @adap: the adapter
1960 * @m: the packet to send
1962 * @pidx: index of the first Tx descriptor to write
1963 * @gen: the generation value to use
1964 * @ndesc: number of descriptors the packet will occupy
1966 * Write an offload work request to send the supplied packet. The packet
1967 * data already carry the work request with most fields populated.
1970 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1971 struct sge_txq *q, unsigned int pidx,
1972 unsigned int gen, unsigned int ndesc,
1973 bus_dma_segment_t *segs, unsigned int nsegs)
1975 unsigned int sgl_flits, flits;
1976 struct work_request_hdr *from;
1977 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1978 struct tx_desc *d = &q->desc[pidx];
1979 struct txq_state txqs;
1981 if (immediate(m) && nsegs == 0) {
1982 write_imm(d, m, m->m_len, gen);
1986 /* Only TX_DATA builds SGLs */
1987 from = mtod(m, struct work_request_hdr *);
1988 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
1990 flits = m->m_len / 8;
1991 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1993 make_sgl(sgp, segs, nsegs);
1994 sgl_flits = sgl_len(nsegs);
2000 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
2001 from->wr_hi, from->wr_lo);
2005 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
2008 * Returns the number of Tx descriptors needed for the given offload
2009 * packet. These packets are already fully constructed.
2011 static __inline unsigned int
2012 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2014 unsigned int flits, cnt = 0;
2017 if (m->m_len <= WR_LEN && nsegs == 0)
2018 return (1); /* packet fits as immediate data */
2020 if (m->m_flags & M_IOVEC)
2021 cnt = mtomv(m)->mv_count;
2026 flits = m->m_len / 8;
2028 ndescs = flits_to_desc(flits + sgl_len(cnt));
2034 * ofld_xmit - send a packet through an offload queue
2035 * @adap: the adapter
2036 * @q: the Tx offload queue
2039 * Send an offload packet through an SGE offload queue.
2042 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
2046 unsigned int pidx, gen;
2047 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2048 struct tx_sw_desc *stx;
2050 nsegs = m_get_sgllen(m);
2051 vsegs = m_get_sgl(m);
2052 ndesc = calc_tx_descs_ofld(m, nsegs);
2053 busdma_map_sgl(vsegs, segs, nsegs);
2055 stx = &q->sdesc[q->pidx];
2056 KASSERT(stx->mi.mi_base == NULL, ("mi_base set"));
2059 again: reclaim_completed_tx_(q, 16);
2060 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2061 if (__predict_false(ret)) {
2063 printf("no ofld desc avail\n");
2065 m_set_priority(m, ndesc); /* save for restart */
2066 mtx_unlock(&q->lock);
2076 if (q->pidx >= q->size) {
2081 T3_TRACE5(adap->tb[q->cntxt_id & 7],
2082 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2083 ndesc, pidx, skb->len, skb->len - skb->data_len,
2084 skb_shinfo(skb)->nr_frags);
2086 mtx_unlock(&q->lock);
2088 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2089 check_ring_tx_db(adap, q);
2094 * restart_offloadq - restart a suspended offload queue
2095 * @qs: the queue set cotaining the offload queue
2097 * Resumes transmission on a suspended Tx offload queue.
2100 restart_offloadq(void *data, int npending)
2103 struct sge_qset *qs = data;
2104 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2105 adapter_t *adap = qs->port->adapter;
2106 bus_dma_segment_t segs[TX_MAX_SEGS];
2107 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2111 again: cleaned = reclaim_completed_tx_(q, 16);
2113 while ((m = mbufq_peek(&q->sendq)) != NULL) {
2114 unsigned int gen, pidx;
2115 unsigned int ndesc = m_get_priority(m);
2117 if (__predict_false(q->size - q->in_use < ndesc)) {
2118 setbit(&qs->txq_stopped, TXQ_OFLD);
2121 if (should_restart_tx(q) &&
2122 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2132 if (q->pidx >= q->size) {
2137 (void)mbufq_dequeue(&q->sendq);
2138 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2139 mtx_unlock(&q->lock);
2140 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2143 mtx_unlock(&q->lock);
2146 set_bit(TXQ_RUNNING, &q->flags);
2147 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2150 t3_write_reg(adap, A_SG_KDOORBELL,
2151 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2155 * queue_set - return the queue set a packet should use
2158 * Maps a packet to the SGE queue set it should use. The desired queue
2159 * set is carried in bits 1-3 in the packet's priority.
2162 queue_set(const struct mbuf *m)
2164 return m_get_priority(m) >> 1;
2168 * is_ctrl_pkt - return whether an offload packet is a control packet
2171 * Determines whether an offload packet should use an OFLD or a CTRL
2172 * Tx queue. This is indicated by bit 0 in the packet's priority.
2175 is_ctrl_pkt(const struct mbuf *m)
2177 return m_get_priority(m) & 1;
2181 * t3_offload_tx - send an offload packet
2182 * @tdev: the offload device to send to
2185 * Sends an offload packet. We use the packet priority to select the
2186 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2187 * should be sent as regular or control, bits 1-3 select the queue set.
2190 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2192 adapter_t *adap = tdev2adap(tdev);
2193 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2195 if (__predict_false(is_ctrl_pkt(m)))
2196 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2198 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2202 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2203 * @tdev: the offload device that will be receiving the packets
2204 * @q: the SGE response queue that assembled the bundle
2205 * @m: the partial bundle
2206 * @n: the number of packets in the bundle
2208 * Delivers a (partial) bundle of Rx offload packets to an offload device.
2210 static __inline void
2211 deliver_partial_bundle(struct t3cdev *tdev,
2213 struct mbuf *mbufs[], int n)
2216 q->offload_bundles++;
2217 cxgb_ofld_recv(tdev, mbufs, n);
2222 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2223 struct mbuf *m, struct mbuf *rx_gather[],
2224 unsigned int gather_idx)
2228 m->m_pkthdr.header = mtod(m, void *);
2229 rx_gather[gather_idx++] = m;
2230 if (gather_idx == RX_BUNDLE_SIZE) {
2231 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2233 rq->offload_bundles++;
2235 return (gather_idx);
2239 restart_tx(struct sge_qset *qs)
2241 struct adapter *sc = qs->port->adapter;
2244 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2245 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2246 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2247 qs->txq[TXQ_OFLD].restarts++;
2248 DPRINTF("restarting TXQ_OFLD\n");
2249 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2251 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2252 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2253 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2254 qs->txq[TXQ_CTRL].in_use);
2256 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2257 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2258 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2259 qs->txq[TXQ_CTRL].restarts++;
2260 DPRINTF("restarting TXQ_CTRL\n");
2261 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2266 * t3_sge_alloc_qset - initialize an SGE queue set
2267 * @sc: the controller softc
2268 * @id: the queue set id
2269 * @nports: how many Ethernet ports will be using this queue set
2270 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2271 * @p: configuration parameters for this queue set
2272 * @ntxq: number of Tx queues for the queue set
2273 * @pi: port info for queue set
2275 * Allocate resources and initialize an SGE queue set. A queue set
2276 * comprises a response queue, two Rx free-buffer queues, and up to 3
2277 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2278 * queue, offload queue, and control queue.
2281 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2282 const struct qset_params *p, int ntxq, struct port_info *pi)
2284 struct sge_qset *q = &sc->sge.qs[id];
2285 int i, header_size, ret = 0;
2287 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2288 if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
2289 M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
2290 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2293 q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
2294 q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
2295 mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
2298 init_qset_cntxt(q, id);
2301 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2302 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2303 &q->fl[0].desc, &q->fl[0].sdesc,
2304 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2305 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2306 printf("error %d from alloc ring fl0\n", ret);
2310 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2311 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2312 &q->fl[1].desc, &q->fl[1].sdesc,
2313 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2314 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2315 printf("error %d from alloc ring fl1\n", ret);
2319 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2320 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2321 &q->rspq.desc_tag, &q->rspq.desc_map,
2322 NULL, NULL)) != 0) {
2323 printf("error %d from alloc ring rspq\n", ret);
2327 for (i = 0; i < ntxq; ++i) {
2329 * The control queue always uses immediate data so does not
2330 * need to keep track of any mbufs.
2331 * XXX Placeholder for future TOE support.
2333 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2335 if ((ret = alloc_ring(sc, p->txq_size[i],
2336 sizeof(struct tx_desc), sz,
2337 &q->txq[i].phys_addr, &q->txq[i].desc,
2338 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2339 &q->txq[i].desc_map,
2340 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2341 printf("error %d from alloc ring tx %i\n", ret, i);
2344 mbufq_init(&q->txq[i].sendq);
2346 q->txq[i].size = p->txq_size[i];
2347 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2348 device_get_unit(sc->dev), irq_vec_idx, i);
2349 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2352 q->txq[TXQ_ETH].port = pi;
2354 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2355 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2356 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2357 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2359 q->fl[0].gen = q->fl[1].gen = 1;
2360 q->fl[0].size = p->fl_size;
2361 q->fl[1].size = p->jumbo_size;
2365 q->rspq.size = p->rspq_size;
2368 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
2369 q->txq[TXQ_ETH].stop_thres = nports *
2370 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2372 q->fl[0].buf_size = (MCLBYTES - header_size);
2373 q->fl[0].zone = zone_clust;
2374 q->fl[0].type = EXT_CLUSTER;
2375 #if __FreeBSD_version > 800000
2376 if (cxgb_use_16k_clusters) {
2377 q->fl[1].buf_size = MJUM16BYTES - header_size;
2378 q->fl[1].zone = zone_jumbo16;
2379 q->fl[1].type = EXT_JUMBO16;
2381 q->fl[1].buf_size = MJUM9BYTES - header_size;
2382 q->fl[1].zone = zone_jumbo9;
2383 q->fl[1].type = EXT_JUMBO9;
2386 q->fl[1].buf_size = MJUMPAGESIZE - header_size;
2387 q->fl[1].zone = zone_jumbop;
2388 q->fl[1].type = EXT_JUMBOP;
2391 #ifdef LRO_SUPPORTED
2392 /* Allocate and setup the lro_ctrl structure */
2393 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2394 ret = tcp_lro_init(&q->lro.ctrl);
2396 printf("error %d from tcp_lro_init\n", ret);
2399 q->lro.ctrl.ifp = pi->ifp;
2402 mtx_lock_spin(&sc->sge.reg_lock);
2403 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2404 q->rspq.phys_addr, q->rspq.size,
2405 q->fl[0].buf_size, 1, 0);
2407 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2411 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2412 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2413 q->fl[i].phys_addr, q->fl[i].size,
2414 q->fl[i].buf_size, p->cong_thres, 1,
2417 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2422 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2423 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2424 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2427 printf("error %d from t3_sge_init_ecntxt\n", ret);
2432 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2433 USE_GTS, SGE_CNTXT_OFLD, id,
2434 q->txq[TXQ_OFLD].phys_addr,
2435 q->txq[TXQ_OFLD].size, 0, 1, 0);
2437 printf("error %d from t3_sge_init_ecntxt\n", ret);
2443 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2445 q->txq[TXQ_CTRL].phys_addr,
2446 q->txq[TXQ_CTRL].size,
2447 q->txq[TXQ_CTRL].token, 1, 0);
2449 printf("error %d from t3_sge_init_ecntxt\n", ret);
2454 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2455 device_get_unit(sc->dev), irq_vec_idx);
2456 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2458 mtx_unlock_spin(&sc->sge.reg_lock);
2459 t3_update_qset_coalesce(q, p);
2462 refill_fl(sc, &q->fl[0], q->fl[0].size);
2463 refill_fl(sc, &q->fl[1], q->fl[1].size);
2464 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2466 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2467 V_NEWTIMER(q->rspq.holdoff_tmr));
2472 mtx_unlock_spin(&sc->sge.reg_lock);
2474 t3_free_qset(sc, q);
2480 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2481 * ethernet data. Hardware assistance with various checksums and any vlan tag
2482 * will also be taken into account here.
2485 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2487 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2488 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2489 struct ifnet *ifp = pi->ifp;
2491 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2493 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2494 cpl->csum_valid && cpl->csum == 0xffff) {
2495 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2496 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2497 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2498 m->m_pkthdr.csum_data = 0xffff;
2501 * XXX need to add VLAN support for 6.x
2503 #ifdef VLAN_SUPPORTED
2504 if (__predict_false(cpl->vlan_valid)) {
2505 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2506 m->m_flags |= M_VLANTAG;
2510 m->m_pkthdr.rcvif = ifp;
2511 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2513 #ifndef DISABLE_MBUF_IOVEC
2517 * adjust after conversion to mbuf chain
2519 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2520 m->m_len -= (sizeof(*cpl) + ethpad);
2521 m->m_data += (sizeof(*cpl) + ethpad);
2525 ext_free_handler(void *arg1, void * arg2)
2527 uintptr_t type = (uintptr_t)arg2;
2532 zone = m_getzonefromtype(type);
2533 m->m_ext.ext_type = (int)type;
2535 cxgb_cache_put(zone, m);
2539 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone)
2544 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
2545 sizeof(struct m_ext_) + sizeof(uint32_t);
2547 bzero(cl, header_size);
2548 m = (struct mbuf *)cl;
2551 SLIST_INIT(&m->m_pkthdr.tags);
2552 m->m_type = MT_DATA;
2553 m->m_flags = flags | M_NOFREE | M_EXT;
2554 m->m_data = cl + header_size;
2555 m->m_ext.ext_buf = cl;
2556 m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
2557 m->m_ext.ext_size = m_getsizefromtype(type);
2558 m->m_ext.ext_free = ext_free_handler;
2559 #if __FreeBSD_version >= 800016
2560 m->m_ext.ext_arg1 = cl;
2561 m->m_ext.ext_arg2 = (void *)(uintptr_t)type;
2563 m->m_ext.ext_args = (void *)(uintptr_t)type;
2565 m->m_ext.ext_type = EXT_EXTREF;
2566 *(m->m_ext.ref_cnt) = 1;
2567 DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
2572 * get_packet - return the next ingress packet buffer from a free list
2573 * @adap: the adapter that received the packet
2574 * @drop_thres: # of remaining buffers before we start dropping packets
2575 * @qs: the qset that the SGE free list holding the packet belongs to
2576 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2577 * @r: response descriptor
2579 * Get the next packet from a free list and complete setup of the
2580 * sk_buff. If the packet is small we make a copy and recycle the
2581 * original buffer, otherwise we use the original buffer itself. If a
2582 * positive drop threshold is supplied packets are dropped and their
2583 * buffers recycled if (a) the number of remaining buffers is under the
2584 * threshold and the packet is too big to copy, or (b) the packet should
2585 * be copied but there is no memory for the copy.
2587 #ifdef DISABLE_MBUF_IOVEC
2590 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2591 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2594 unsigned int len_cq = ntohl(r->len_cq);
2595 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2596 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2597 uint32_t len = G_RSPD_LEN(len_cq);
2598 uint32_t flags = ntohl(r->flags);
2599 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2601 struct mbuf *m, *m0;
2604 prefetch(sd->rxsd_cl);
2607 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2609 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2610 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2612 cl = mtod(m0, void *);
2613 memcpy(cl, sd->data, len);
2614 recycle_rx_buf(adap, fl, fl->cidx);
2620 bus_dmamap_unload(fl->entry_tag, sd->map);
2622 m = m0 = (struct mbuf *)cl;
2624 if ((sopeop == RSPQ_SOP_EOP) ||
2625 (sopeop == RSPQ_SOP))
2627 init_cluster_mbuf(cl, flags, fl->type, fl->zone);
2632 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2633 mh->mh_head = mh->mh_tail = m;
2634 m->m_pkthdr.len = len;
2637 case RSPQ_NSOP_NEOP:
2638 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2639 if (mh->mh_tail == NULL) {
2640 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2644 mh->mh_tail->m_next = m;
2646 mh->mh_head->m_pkthdr.len += len;
2650 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2651 m->m_pkthdr.len = len;
2652 mh->mh_head = mh->mh_tail = m;
2656 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2657 mh->mh_head->m_pkthdr.len += len;
2658 mh->mh_tail->m_next = m;
2663 if (++fl->cidx == fl->size)
2672 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2673 struct mbuf **m, struct rsp_desc *r)
2676 unsigned int len_cq = ntohl(r->len_cq);
2677 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2678 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2679 uint32_t len = G_RSPD_LEN(len_cq);
2680 uint32_t flags = ntohl(r->flags);
2681 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2686 if ((sd + 1 )->rxsd_cl)
2687 prefetch((sd + 1)->rxsd_cl);
2688 if ((sd + 2)->rxsd_cl)
2689 prefetch((sd + 2)->rxsd_cl);
2691 DPRINTF("rx cpu=%d\n", curcpu);
2693 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2695 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2696 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2698 cl = mtod(m0, void *);
2699 memcpy(cl, sd->data, len);
2700 recycle_rx_buf(adap, fl, fl->cidx);
2704 bus_dmamap_unload(fl->entry_tag, sd->map);
2706 *m = m0 = (struct mbuf *)cl;
2711 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2712 if (cl == sd->rxsd_cl)
2713 init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone);
2714 m0->m_len = m0->m_pkthdr.len = len;
2718 case RSPQ_NSOP_NEOP:
2719 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2720 panic("chaining unsupported");
2724 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2725 panic("chaining unsupported");
2730 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2731 panic("chaining unsupported");
2735 panic("append not supported");
2737 m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
2740 if (++fl->cidx == fl->size)
2747 * handle_rsp_cntrl_info - handles control information in a response
2748 * @qs: the queue set corresponding to the response
2749 * @flags: the response control flags
2751 * Handles the control information of an SGE response, such as GTS
2752 * indications and completion credits for the queue set's Tx queues.
2753 * HW coalesces credits, we don't do any extra SW coalescing.
2755 static __inline void
2756 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2758 unsigned int credits;
2761 if (flags & F_RSPD_TXQ0_GTS)
2762 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2764 credits = G_RSPD_TXQ0_CR(flags);
2766 qs->txq[TXQ_ETH].processed += credits;
2768 credits = G_RSPD_TXQ2_CR(flags);
2770 qs->txq[TXQ_CTRL].processed += credits;
2773 if (flags & F_RSPD_TXQ1_GTS)
2774 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2776 credits = G_RSPD_TXQ1_CR(flags);
2778 qs->txq[TXQ_OFLD].processed += credits;
2783 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2784 unsigned int sleeping)
2790 * process_responses - process responses from an SGE response queue
2791 * @adap: the adapter
2792 * @qs: the queue set to which the response queue belongs
2793 * @budget: how many responses can be processed in this round
2795 * Process responses from an SGE response queue up to the supplied budget.
2796 * Responses include received packets as well as credits and other events
2797 * for the queues that belong to the response queue's queue set.
2798 * A negative budget is effectively unlimited.
2800 * Additionally choose the interrupt holdoff time for the next interrupt
2801 * on this queue. If the system is under memory shortage use a fairly
2802 * long delay to help recovery.
2805 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2807 struct sge_rspq *rspq = &qs->rspq;
2808 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2809 int budget_left = budget;
2810 unsigned int sleeping = 0;
2811 #ifdef LRO_SUPPORTED
2812 int lro_enabled = qs->lro.enabled;
2814 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2816 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2819 static int last_holdoff = 0;
2820 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2821 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2822 last_holdoff = rspq->holdoff_tmr;
2825 rspq->next_holdoff = rspq->holdoff_tmr;
2827 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2828 int eth, eop = 0, ethpad = 0;
2829 uint32_t flags = ntohl(r->flags);
2830 uint32_t rss_csum = *(const uint32_t *)r;
2831 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2833 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2835 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2839 printf("async notification\n");
2841 if (rspq->rspq_mh.mh_head == NULL) {
2842 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2843 m = rspq->rspq_mh.mh_head;
2845 m = m_gethdr(M_DONTWAIT, MT_DATA);
2848 /* XXX m is lost here if rspq->rspq_mbuf is not NULL */
2853 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2854 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2855 *mtod(m, char *) = CPL_ASYNC_NOTIF;
2856 rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
2858 rspq->async_notif++;
2860 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2861 struct mbuf *m = NULL;
2863 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2864 r->rss_hdr.opcode, rspq->cidx);
2865 if (rspq->rspq_mh.mh_head == NULL)
2866 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2868 m = m_gethdr(M_DONTWAIT, MT_DATA);
2870 if (rspq->rspq_mh.mh_head == NULL && m == NULL) {
2872 rspq->next_holdoff = NOMEM_INTR_DELAY;
2876 get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
2879 } else if (r->len_cq) {
2880 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2882 #ifdef DISABLE_MBUF_IOVEC
2883 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
2885 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
2887 #ifdef IFNET_MULTIQUEUE
2888 rspq->rspq_mh.mh_head->m_pkthdr.rss_hash = rss_hash;
2892 DPRINTF("pure response\n");
2896 if (flags & RSPD_CTRL_MASK) {
2897 sleeping |= flags & RSPD_GTS_MASK;
2898 handle_rsp_cntrl_info(qs, flags);
2902 if (__predict_false(++rspq->cidx == rspq->size)) {
2908 if (++rspq->credits >= (rspq->size / 4)) {
2909 refill_rspq(adap, rspq, rspq->credits);
2912 DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags);
2915 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2919 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2922 ngathered = rx_offload(&adap->tdev, rspq,
2923 rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2924 rspq->rspq_mh.mh_head = NULL;
2925 DPRINTF("received offload packet\n");
2927 } else if (eth && eop) {
2928 struct mbuf *m = rspq->rspq_mh.mh_head;
2929 prefetch(mtod(m, uint8_t *));
2930 prefetch(mtod(m, uint8_t *) + L1_CACHE_BYTES);
2932 t3_rx_eth(adap, rspq, m, ethpad);
2934 #ifdef LRO_SUPPORTED
2936 * The T304 sends incoming packets on any qset. If LRO
2937 * is also enabled, we could end up sending packet up
2938 * lro_ctrl->ifp's input. That is incorrect.
2940 * The mbuf's rcvif was derived from the cpl header and
2941 * is accurate. Skip LRO and just use that.
2943 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
2945 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro &&
2946 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) {
2947 /* successfully queue'd for LRO */
2952 * LRO not enabled, packet unsuitable for LRO,
2953 * or unable to queue. Pass it up right now in
2956 struct ifnet *ifp = m->m_pkthdr.rcvif;
2957 (*ifp->if_input)(ifp, m);
2959 DPRINTF("received tunnel packet\n");
2960 rspq->rspq_mh.mh_head = NULL;
2963 __refill_fl_lt(adap, &qs->fl[0], 32);
2964 __refill_fl_lt(adap, &qs->fl[1], 32);
2968 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2970 #ifdef LRO_SUPPORTED
2972 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
2973 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
2974 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
2975 tcp_lro_flush(lro_ctrl, queued);
2980 check_ring_db(adap, qs, sleeping);
2982 smp_mb(); /* commit Tx queue processed updates */
2983 if (__predict_false(qs->txq_stopped > 1)) {
2984 printf("restarting tx on %p\n", qs);
2989 __refill_fl_lt(adap, &qs->fl[0], 512);
2990 __refill_fl_lt(adap, &qs->fl[1], 512);
2991 budget -= budget_left;
2996 * A helper function that processes responses and issues GTS.
2999 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3002 static int last_holdoff = 0;
3004 work = process_responses(adap, rspq_to_qset(rq), -1);
3006 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3007 printf("next_holdoff=%d\n", rq->next_holdoff);
3008 last_holdoff = rq->next_holdoff;
3010 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3011 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3018 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3019 * Handles data events from SGE response queues as well as error and other
3020 * async events as they all use the same interrupt pin. We use one SGE
3021 * response queue per port in this mode and protect all response queues with
3025 t3b_intr(void *data)
3028 adapter_t *adap = data;
3029 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3031 t3_write_reg(adap, A_PL_CLI, 0);
3032 map = t3_read_reg(adap, A_SG_DATA_INTR);
3037 if (__predict_false(map & F_ERRINTR))
3038 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3040 mtx_lock(&q0->lock);
3041 for_each_port(adap, i)
3043 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3044 mtx_unlock(&q0->lock);
3048 * The MSI interrupt handler. This needs to handle data events from SGE
3049 * response queues as well as error and other async events as they all use
3050 * the same MSI vector. We use one SGE response queue per port in this mode
3051 * and protect all response queues with queue 0's lock.
3054 t3_intr_msi(void *data)
3056 adapter_t *adap = data;
3057 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3058 int i, new_packets = 0;
3060 mtx_lock(&q0->lock);
3062 for_each_port(adap, i)
3063 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3065 mtx_unlock(&q0->lock);
3066 if (new_packets == 0)
3067 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3071 t3_intr_msix(void *data)
3073 struct sge_qset *qs = data;
3074 adapter_t *adap = qs->port->adapter;
3075 struct sge_rspq *rspq = &qs->rspq;
3076 #ifndef IFNET_MULTIQUEUE
3077 mtx_lock(&rspq->lock);
3079 if (mtx_trylock(&rspq->lock))
3083 if (process_responses_gts(adap, rspq) == 0)
3084 rspq->unhandled_irqs++;
3085 mtx_unlock(&rspq->lock);
3089 #define QDUMP_SBUF_SIZE 32 * 400
3091 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3093 struct sge_rspq *rspq;
3094 struct sge_qset *qs;
3095 int i, err, dump_end, idx;
3096 static int multiplier = 1;
3098 struct rsp_desc *rspd;
3102 qs = rspq_to_qset(rspq);
3103 if (rspq->rspq_dump_count == 0)
3105 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3107 "dump count is too large %d\n", rspq->rspq_dump_count);
3108 rspq->rspq_dump_count = 0;
3111 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3113 "dump start of %d is greater than queue size\n",
3114 rspq->rspq_dump_start);
3115 rspq->rspq_dump_start = 0;
3118 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3122 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3124 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3125 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3126 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3127 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3128 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3130 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3131 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3133 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3134 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3135 idx = i & (RSPQ_Q_SIZE-1);
3137 rspd = &rspq->desc[idx];
3138 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3139 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3140 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3141 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3142 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3143 be32toh(rspd->len_cq), rspd->intr_gen);
3145 if (sbuf_overflowed(sb)) {
3151 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3157 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3159 struct sge_txq *txq;
3160 struct sge_qset *qs;
3161 int i, j, err, dump_end;
3162 static int multiplier = 1;
3164 struct tx_desc *txd;
3165 uint32_t *WR, wr_hi, wr_lo, gen;
3169 qs = txq_to_qset(txq, TXQ_ETH);
3170 if (txq->txq_dump_count == 0) {
3173 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3175 "dump count is too large %d\n", txq->txq_dump_count);
3176 txq->txq_dump_count = 1;
3179 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3181 "dump start of %d is greater than queue size\n",
3182 txq->txq_dump_start);
3183 txq->txq_dump_start = 0;
3186 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3192 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3194 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3195 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3196 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3197 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3198 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3199 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3200 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3201 txq->txq_dump_start,
3202 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3204 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3205 for (i = txq->txq_dump_start; i < dump_end; i++) {
3206 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3207 WR = (uint32_t *)txd->flit;
3208 wr_hi = ntohl(WR[0]);
3209 wr_lo = ntohl(WR[1]);
3210 gen = G_WR_GEN(wr_lo);
3212 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3214 for (j = 2; j < 30; j += 4)
3215 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3216 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3219 if (sbuf_overflowed(sb)) {
3225 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3231 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3233 struct sge_txq *txq;
3234 struct sge_qset *qs;
3235 int i, j, err, dump_end;
3236 static int multiplier = 1;
3238 struct tx_desc *txd;
3239 uint32_t *WR, wr_hi, wr_lo, gen;
3242 qs = txq_to_qset(txq, TXQ_CTRL);
3243 if (txq->txq_dump_count == 0) {
3246 if (txq->txq_dump_count > 256) {
3248 "dump count is too large %d\n", txq->txq_dump_count);
3249 txq->txq_dump_count = 1;
3252 if (txq->txq_dump_start > 255) {
3254 "dump start of %d is greater than queue size\n",
3255 txq->txq_dump_start);
3256 txq->txq_dump_start = 0;
3261 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3262 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3263 txq->txq_dump_start,
3264 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3266 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3267 for (i = txq->txq_dump_start; i < dump_end; i++) {
3268 txd = &txq->desc[i & (255)];
3269 WR = (uint32_t *)txd->flit;
3270 wr_hi = ntohl(WR[0]);
3271 wr_lo = ntohl(WR[1]);
3272 gen = G_WR_GEN(wr_lo);
3274 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3276 for (j = 2; j < 30; j += 4)
3277 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3278 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3281 if (sbuf_overflowed(sb)) {
3287 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3293 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3295 adapter_t *sc = arg1;
3296 struct qset_params *qsp = &sc->params.sge.qset[0];
3298 struct sge_qset *qs;
3299 int i, j, err, nqsets = 0;
3302 if ((sc->flags & FULL_INIT_DONE) == 0)
3305 coalesce_usecs = qsp->coalesce_usecs;
3306 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3311 if (coalesce_usecs == qsp->coalesce_usecs)
3314 for (i = 0; i < sc->params.nports; i++)
3315 for (j = 0; j < sc->port[i].nqsets; j++)
3318 coalesce_usecs = max(1, coalesce_usecs);
3320 for (i = 0; i < nqsets; i++) {
3321 qs = &sc->sge.qs[i];
3322 qsp = &sc->params.sge.qset[i];
3323 qsp->coalesce_usecs = coalesce_usecs;
3325 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3326 &sc->sge.qs[0].rspq.lock;
3329 t3_update_qset_coalesce(qs, qsp);
3330 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3331 V_NEWTIMER(qs->rspq.holdoff_tmr));
3340 t3_add_attach_sysctls(adapter_t *sc)
3342 struct sysctl_ctx_list *ctx;
3343 struct sysctl_oid_list *children;
3345 ctx = device_get_sysctl_ctx(sc->dev);
3346 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3348 /* random information */
3349 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3351 CTLFLAG_RD, &sc->fw_version,
3352 0, "firmware version");
3353 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3355 CTLFLAG_RD, &sc->params.rev,
3357 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3359 CTLFLAG_RW, &cxgb_debug,
3360 0, "enable verbose debugging output");
3361 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce",
3362 CTLFLAG_RD, &sc->tunq_coalesce,
3363 "#tunneled packets freed");
3364 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3366 CTLFLAG_RD, &txq_fills,
3367 0, "#times txq overrun");
3368 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3369 "pcpu_cache_enable",
3370 CTLFLAG_RW, &cxgb_pcpu_cache_enable,
3371 0, "#enable driver local pcpu caches");
3372 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3374 CTLFLAG_RD, &cxgb_cached_allocations,
3375 0, "#times a cluster was allocated from cache");
3376 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3378 CTLFLAG_RD, &cxgb_cached,
3379 0, "#times a cluster was cached");
3380 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3382 CTLFLAG_RD, &cxgb_ext_freed,
3383 0, "#times a cluster was freed through ext_free");
3384 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3386 CTLFLAG_RD, &cxgb_ext_inited,
3387 0, "#times a cluster was initialized for ext_free");
3388 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3389 "mbufs_outstanding",
3390 CTLFLAG_RD, &cxgb_mbufs_outstanding,
3391 0, "#mbufs in flight in the driver");
3392 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3394 CTLFLAG_RD, &cxgb_pack_outstanding,
3395 0, "#packet in flight in the driver");
3399 static const char *rspq_name = "rspq";
3400 static const char *txq_names[] =
3408 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3410 struct port_info *p = arg1;
3416 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3419 t3_mac_update_stats(&p->mac);
3422 return (sysctl_handle_quad(oidp, parg, 0, req));
3426 t3_add_configured_sysctls(adapter_t *sc)
3428 struct sysctl_ctx_list *ctx;
3429 struct sysctl_oid_list *children;
3432 ctx = device_get_sysctl_ctx(sc->dev);
3433 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3435 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3437 CTLTYPE_INT|CTLFLAG_RW, sc,
3438 0, t3_set_coalesce_usecs,
3439 "I", "interrupt coalescing timer (us)");
3441 for (i = 0; i < sc->params.nports; i++) {
3442 struct port_info *pi = &sc->port[i];
3443 struct sysctl_oid *poid;
3444 struct sysctl_oid_list *poidlist;
3445 struct mac_stats *mstats = &pi->mac.stats;
3447 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3448 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3449 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3450 poidlist = SYSCTL_CHILDREN(poid);
3451 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3452 "nqsets", CTLFLAG_RD, &pi->nqsets,
3455 for (j = 0; j < pi->nqsets; j++) {
3456 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3457 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid, *lropoid;
3458 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist, *lropoidlist;
3459 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3461 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3463 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3464 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3465 qspoidlist = SYSCTL_CHILDREN(qspoid);
3467 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3468 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3469 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3471 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3472 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3473 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3475 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3476 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3477 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3479 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3480 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3481 lropoidlist = SYSCTL_CHILDREN(lropoid);
3483 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3484 CTLFLAG_RD, &qs->rspq.size,
3485 0, "#entries in response queue");
3486 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3487 CTLFLAG_RD, &qs->rspq.cidx,
3488 0, "consumer index");
3489 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3490 CTLFLAG_RD, &qs->rspq.credits,
3492 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3493 CTLFLAG_RD, &qs->rspq.phys_addr,
3494 "physical_address_of the queue");
3495 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3496 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3497 0, "start rspq dump entry");
3498 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3499 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3500 0, "#rspq entries to dump");
3501 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3502 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3503 0, t3_dump_rspq, "A", "dump of the response queue");
3506 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3507 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3508 0, "#tunneled packets dropped");
3509 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3510 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3511 0, "#tunneled packets waiting to be sent");
3512 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3513 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3514 0, "#tunneled packets queue producer index");
3515 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3516 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3517 0, "#tunneled packets queue consumer index");
3518 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3519 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3520 0, "#tunneled packets processed by the card");
3521 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3522 CTLFLAG_RD, &txq->cleaned,
3523 0, "#tunneled packets cleaned");
3524 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3525 CTLFLAG_RD, &txq->in_use,
3526 0, "#tunneled packet slots in use");
3527 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3528 CTLFLAG_RD, &txq->txq_frees,
3529 "#tunneled packets freed");
3530 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3531 CTLFLAG_RD, &txq->txq_skipped,
3532 0, "#tunneled packet descriptors skipped");
3533 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced",
3534 CTLFLAG_RD, &txq->txq_coalesced,
3535 0, "#tunneled packets coalesced");
3536 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3537 CTLFLAG_RD, &txq->txq_enqueued,
3538 0, "#tunneled packets enqueued to hardware");
3539 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3540 CTLFLAG_RD, &qs->txq_stopped,
3541 0, "tx queues stopped");
3542 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3543 CTLFLAG_RD, &txq->phys_addr,
3544 "physical_address_of the queue");
3545 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3546 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3547 0, "txq generation");
3548 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3549 CTLFLAG_RD, &txq->cidx,
3550 0, "hardware queue cidx");
3551 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3552 CTLFLAG_RD, &txq->pidx,
3553 0, "hardware queue pidx");
3554 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3555 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3556 0, "txq start idx for dump");
3557 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3558 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3559 0, "txq #entries to dump");
3560 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3561 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3562 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3564 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3565 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3566 0, "ctrlq start idx for dump");
3567 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3568 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3569 0, "ctrl #entries to dump");
3570 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3571 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3572 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3574 #ifdef LRO_SUPPORTED
3575 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3576 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3577 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3578 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3579 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3580 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3581 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3582 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3586 /* Now add a node for mac stats. */
3587 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3588 CTLFLAG_RD, NULL, "MAC statistics");
3589 poidlist = SYSCTL_CHILDREN(poid);
3592 * We (ab)use the length argument (arg2) to pass on the offset
3593 * of the data that we are interested in. This is only required
3594 * for the quad counters that are updated from the hardware (we
3595 * make sure that we return the latest value).
3596 * sysctl_handle_macstat first updates *all* the counters from
3597 * the hardware, and then returns the latest value of the
3598 * requested counter. Best would be to update only the
3599 * requested counter from hardware, but t3_mac_update_stats()
3600 * hides all the register details and we don't want to dive into
3603 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3604 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3605 sysctl_handle_macstat, "QU", 0)
3606 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3607 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3608 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3609 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3610 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3611 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3612 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3613 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3614 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3615 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3616 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3617 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3618 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3619 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3620 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3621 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3622 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3623 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3624 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3625 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3626 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3627 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3628 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3629 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3630 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3631 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3632 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3633 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3634 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3635 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3636 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3637 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3638 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3639 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3640 CXGB_SYSCTL_ADD_QUAD(rx_short);
3641 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3642 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3643 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3644 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3645 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3646 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3647 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3648 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3649 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3650 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3651 #undef CXGB_SYSCTL_ADD_QUAD
3653 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3654 CTLFLAG_RD, &mstats->a, 0)
3655 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3656 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3657 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3658 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3659 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3660 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3661 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3662 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3663 CXGB_SYSCTL_ADD_ULONG(num_resets);
3664 #undef CXGB_SYSCTL_ADD_ULONG
3669 * t3_get_desc - dump an SGE descriptor for debugging purposes
3670 * @qs: the queue set
3671 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3672 * @idx: the descriptor index in the queue
3673 * @data: where to dump the descriptor contents
3675 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3676 * size of the descriptor.
3679 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3680 unsigned char *data)
3686 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3688 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3689 return sizeof(struct tx_desc);
3693 if (!qs->rspq.desc || idx >= qs->rspq.size)
3695 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3696 return sizeof(struct rsp_desc);
3700 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3702 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3703 return sizeof(struct rx_desc);