1 /**************************************************************************
3 Copyright (c) 2007, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Chelsio Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ***************************************************************************/
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/module.h>
43 #include <machine/bus.h>
44 #include <machine/resource.h>
45 #include <sys/bus_dma.h>
47 #include <sys/queue.h>
48 #include <sys/sysctl.h>
49 #include <sys/taskqueue.h>
53 #include <sys/sched.h>
55 #include <sys/systm.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/in.h>
59 #include <netinet/ip.h>
60 #include <netinet/tcp.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/cxgb/common/cxgb_common.h>
65 #include <dev/cxgb/common/cxgb_regs.h>
66 #include <dev/cxgb/common/cxgb_sge_defs.h>
67 #include <dev/cxgb/common/cxgb_t3_cpl.h>
68 #include <dev/cxgb/common/cxgb_firmware_exports.h>
72 #define SGE_RX_SM_BUF_SIZE 1536
73 #define SGE_RX_DROP_THRES 16
76 * Period of the Tx buffer reclaim timer. This timer does not need to run
77 * frequently as Tx buffers are usually reclaimed by new Tx packets.
79 #define TX_RECLAIM_PERIOD (hz >> 2)
82 * work request size in bytes
84 #define WR_LEN (WR_FLITS * 8)
87 * Values for sge_txq.flags
90 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
91 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
95 uint64_t flit[TX_DESC_FLITS];
105 struct rsp_desc { /* response queue descriptor */
106 struct rss_header rss_hdr;
109 uint8_t imm_data[47];
113 #define RX_SW_DESC_MAP_CREATED (1 << 0)
114 #define RX_SW_DESC_INUSE (1 << 3)
115 #define TX_SW_DESC_MAPPED (1 << 4)
117 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
118 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
119 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
120 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
122 struct tx_sw_desc { /* SW state per Tx descriptor */
128 struct rx_sw_desc { /* SW state per Rx descriptor */
141 * Maps a number of flits to the number of Tx descriptors that can hold them.
144 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
146 * HW allows up to 4 descriptors to be combined into a WR.
148 static uint8_t flit_desc_map[] = {
150 #if SGE_NUM_GENBITS == 1
151 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
152 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
153 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
154 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
155 #elif SGE_NUM_GENBITS == 2
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
158 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
159 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
161 # error "SGE_NUM_GENBITS must be 1 or 2"
166 static int lro_default = 0;
169 static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
170 static void sge_timer_cb(void *arg);
171 static void sge_timer_reclaim(void *arg, int ncount);
172 static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec);
175 * reclaim_completed_tx - reclaims completed Tx descriptors
176 * @adapter: the adapter
177 * @q: the Tx queue to reclaim completed descriptors from
179 * Reclaims Tx descriptors that the SGE has indicated it has processed,
180 * and frees the associated buffers if possible. Called with the Tx
184 reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec)
186 int reclaimed, reclaim = desc_reclaimable(q);
189 mtx_assert(&q->lock, MA_OWNED);
192 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec);
193 reclaimed = min(reclaim, nbufs);
194 q->cleaned += reclaimed;
195 q->in_use -= reclaimed;
202 * t3_sge_init - initialize SGE
204 * @p: the SGE parameters
206 * Performs SGE initialization needed every time after a chip reset.
207 * We do not initialize any of the queue sets here, instead the driver
208 * top-level must request those individually. We also do not enable DMA
209 * here, that should be done after the queues have been set up.
212 t3_sge_init(adapter_t *adap, struct sge_params *p)
216 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
218 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
220 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
221 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
222 #if SGE_NUM_GENBITS == 1
223 ctrl |= F_EGRGENCTRL;
225 if (adap->params.rev > 0) {
226 if (!(adap->flags & (USING_MSIX | USING_MSI)))
227 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
228 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
230 t3_write_reg(adap, A_SG_CONTROL, ctrl);
231 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
232 V_LORCQDRBTHRSH(512));
233 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
234 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
235 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
236 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
237 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
238 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
239 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
240 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
241 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
246 * sgl_len - calculates the size of an SGL of the given capacity
247 * @n: the number of SGL entries
249 * Calculates the number of flits needed for a scatter/gather list that
250 * can hold the given number of entries.
252 static __inline unsigned int
253 sgl_len(unsigned int n)
255 return ((3 * n) / 2 + (n & 1));
259 * get_imm_packet - return the next ingress packet buffer from a response
260 * @resp: the response descriptor containing the packet data
262 * Return a packet containing the immediate data of the given response.
265 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh)
269 uint32_t flags = ntohl(resp->flags);
270 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
272 m = m_gethdr(M_NOWAIT, MT_DATA);
273 len = G_RSPD_LEN(ntohl(resp->len_cq));
275 * would be a firmware bug
277 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP)
281 MH_ALIGN(m, IMMED_PKT_SIZE);
282 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE);
287 mh->mh_head = mh->mh_tail = m;
288 m->m_pkthdr.len = len;
289 m->m_flags |= M_PKTHDR;
292 m->m_flags &= ~M_PKTHDR;
293 mh->mh_head->m_pkthdr.len += len;
294 mh->mh_tail->m_next = m;
303 static __inline u_int
304 flits_to_desc(u_int n)
306 return (flit_desc_map[n]);
310 t3_sge_err_intr_handler(adapter_t *adapter)
312 unsigned int v, status;
315 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
317 if (status & F_RSPQCREDITOVERFOW)
318 CH_ALERT(adapter, "SGE response queue credit overflow\n");
320 if (status & F_RSPQDISABLED) {
321 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
324 "packet delivered to disabled response queue (0x%x)\n",
325 (v >> S_RSPQ0DISABLED) & 0xff);
328 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
329 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
330 t3_fatal_err(adapter);
334 t3_sge_prep(adapter_t *adap, struct sge_params *p)
338 /* XXX Does ETHER_ALIGN need to be accounted for here? */
339 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data);
341 for (i = 0; i < SGE_QSETS; ++i) {
342 struct qset_params *q = p->qset + i;
344 q->polling = adap->params.rev > 0;
345 q->coalesce_nsecs = 3500;
346 q->rspq_size = RSPQ_Q_SIZE;
347 q->fl_size = FL_Q_SIZE;
348 q->jumbo_size = JUMBO_Q_SIZE;
349 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
350 q->txq_size[TXQ_OFLD] = 1024;
351 q->txq_size[TXQ_CTRL] = 256;
357 t3_sge_alloc(adapter_t *sc)
360 /* The parent tag. */
361 if (bus_dma_tag_create( NULL, /* parent */
362 1, 0, /* algnmnt, boundary */
363 BUS_SPACE_MAXADDR, /* lowaddr */
364 BUS_SPACE_MAXADDR, /* highaddr */
365 NULL, NULL, /* filter, filterarg */
366 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
367 BUS_SPACE_UNRESTRICTED, /* nsegments */
368 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
370 NULL, NULL, /* lock, lockarg */
372 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
377 * DMA tag for normal sized RX frames
379 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
380 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
381 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
382 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
387 * DMA tag for jumbo sized RX frames.
389 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR,
390 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE,
391 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
392 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
397 * DMA tag for TX frames.
399 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
400 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
401 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
402 NULL, NULL, &sc->tx_dmat)) {
403 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
411 t3_sge_free(struct adapter * sc)
414 if (sc->tx_dmat != NULL)
415 bus_dma_tag_destroy(sc->tx_dmat);
417 if (sc->rx_jumbo_dmat != NULL)
418 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
420 if (sc->rx_dmat != NULL)
421 bus_dma_tag_destroy(sc->rx_dmat);
423 if (sc->parent_dmat != NULL)
424 bus_dma_tag_destroy(sc->parent_dmat);
430 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
433 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
434 qs->rspq.polling = 0 /* p->polling */;
439 * refill_fl - refill an SGE free-buffer list
440 * @sc: the controller softc
441 * @q: the free-list to refill
442 * @n: the number of new buffers to allocate
444 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
445 * The caller must assure that @n does not exceed the queue's capacity.
448 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
450 bus_dma_segment_t seg;
451 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
452 struct rx_desc *d = &q->desc[q->pidx];
457 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, q->buf_size);
460 log(LOG_WARNING, "Failed to allocate mbuf\n");
464 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
465 bus_dmamap_create(sc->rx_jumbo_dmat, 0, &sd->map);
466 sd->flags |= RX_SW_DESC_MAP_CREATED;
468 sd->flags |= RX_SW_DESC_INUSE;
470 m->m_pkthdr.len = m->m_len = q->buf_size;
471 err = bus_dmamap_load_mbuf_sg(sc->rx_jumbo_dmat, sd->map, m, &seg,
472 &nsegs, BUS_DMA_NOWAIT);
474 log(LOG_WARNING, "failure in refill_fl %d\n", err);
480 d->addr_lo = htobe32(seg.ds_addr & 0xffffffff);
481 d->addr_hi = htobe32(((uint64_t)seg.ds_addr >>32) & 0xffffffff);
482 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
483 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
488 if (++q->pidx == q->size) {
498 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
503 * free_rx_bufs - free the Rx buffers on an SGE free list
504 * @sc: the controle softc
505 * @q: the SGE free list to clean up
507 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
508 * this queue should be stopped before calling this function.
511 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
513 u_int cidx = q->cidx;
515 while (q->credits--) {
516 struct rx_sw_desc *d = &q->sdesc[cidx];
518 if (d->flags & RX_SW_DESC_INUSE) {
519 bus_dmamap_unload(sc->rx_jumbo_dmat, d->map);
520 bus_dmamap_destroy(sc->rx_jumbo_dmat, d->map);
524 if (++cidx == q->size)
530 __refill_fl(adapter_t *adap, struct sge_fl *fl)
532 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
536 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
541 *addr = segs[0].ds_addr;
545 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
546 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
549 size_t len = nelem * elem_size;
554 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
555 BUS_SPACE_MAXADDR_32BIT,
556 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
557 len, 0, NULL, NULL, tag)) != 0) {
558 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
562 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
564 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
568 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
573 len = nelem * sw_size;
574 s = malloc(len, M_DEVBUF, M_WAITOK);
582 sge_slow_intr_handler(void *arg, int ncount)
586 t3_slow_intr_handler(sc);
590 sge_timer_cb(void *arg)
596 int reclaim_eth, reclaim_ofl, refill_rx;
598 for (i = 0; i < sc->params.nports; i++)
599 for (j = 0; j < sc->port[i].nqsets; j++) {
600 qs = &sc->sge.qs[i + j];
602 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned;
603 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
604 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
605 (qs->fl[1].credits < qs->fl[1].size));
606 if (reclaim_eth || reclaim_ofl || refill_rx) {
607 taskqueue_enqueue(sc->tq, &sc->timer_reclaim_task);
612 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
616 * This is meant to be a catch-all function to keep sge state private
621 t3_sge_init_sw(adapter_t *sc)
624 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
625 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
626 TASK_INIT(&sc->timer_reclaim_task, 0, sge_timer_reclaim, sc);
627 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
632 t3_sge_deinit_sw(adapter_t *sc)
634 callout_drain(&sc->sge_timer_ch);
635 taskqueue_drain(sc->tq, &sc->timer_reclaim_task);
636 taskqueue_drain(sc->tq, &sc->slow_intr_task);
640 * refill_rspq - replenish an SGE response queue
641 * @adapter: the adapter
642 * @q: the response queue to replenish
643 * @credits: how many new responses to make available
645 * Replenishes a response queue by making the supplied number of responses
649 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
652 /* mbufs are allocated on demand when a rspq entry is processed. */
653 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
654 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
659 sge_timer_reclaim(void *arg, int ncount)
666 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
669 * XXX assuming these quantities are allowed to change during operation
671 for (i = 0; i < sc->params.nports; i++)
672 nqsets += sc->port[i].nqsets;
674 for (i = 0; i < nqsets; i++) {
676 txq = &qs->txq[TXQ_ETH];
677 reclaimable = desc_reclaimable(txq);
678 if (reclaimable > 0) {
679 mtx_lock(&txq->lock);
680 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
681 mtx_unlock(&txq->lock);
683 for (i = 0; i < n; i++) {
688 txq = &qs->txq[TXQ_OFLD];
689 reclaimable = desc_reclaimable(txq);
690 if (reclaimable > 0) {
691 mtx_lock(&txq->lock);
692 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
693 mtx_unlock(&txq->lock);
695 for (i = 0; i < n; i++) {
700 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
701 &sc->sge.qs[0].rspq.lock;
703 if (mtx_trylock(lock)) {
704 /* XXX currently assume that we are *NOT* polling */
705 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
707 if (qs->fl[0].credits < qs->fl[0].size - 16)
708 __refill_fl(sc, &qs->fl[0]);
709 if (qs->fl[1].credits < qs->fl[1].size - 16)
710 __refill_fl(sc, &qs->fl[1]);
712 if (status & (1 << qs->rspq.cntxt_id)) {
713 if (qs->rspq.credits) {
714 refill_rspq(sc, &qs->rspq, 1);
716 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
717 1 << qs->rspq.cntxt_id);
726 * init_qset_cntxt - initialize an SGE queue set context info
728 * @id: the queue set id
730 * Initializes the TIDs and context ids for the queues of a queue set.
733 init_qset_cntxt(struct sge_qset *qs, u_int id)
736 qs->rspq.cntxt_id = id;
737 qs->fl[0].cntxt_id = 2 * id;
738 qs->fl[1].cntxt_id = 2 * id + 1;
739 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
740 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
741 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
742 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
743 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
748 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
750 txq->in_use += ndesc;
752 * XXX we don't handle stopping of queue
753 * presumably start handles this when we bump against the end
755 txqs->gen = txq->gen;
756 txq->unacked += ndesc;
757 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
759 txqs->pidx = txq->pidx;
762 if (txq->pidx >= txq->size) {
763 txq->pidx -= txq->size;
770 * calc_tx_descs - calculate the number of Tx descriptors for a packet
771 * @m: the packet mbufs
772 * @nsegs: the number of segments
774 * Returns the number of Tx descriptors needed for the given Ethernet
775 * packet. Ethernet packets require addition of WR and CPL headers.
777 static __inline unsigned int
778 calc_tx_descs(const struct mbuf *m, int nsegs)
782 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
785 flits = sgl_len(nsegs) + 2;
787 if (m->m_pkthdr.tso_segsz)
790 return flits_to_desc(flits);
793 static __inline unsigned int
794 busdma_map_mbufs(struct mbuf **m, adapter_t *sc, struct tx_sw_desc *stx,
795 bus_dma_segment_t *segs, int *nsegs)
797 struct mbuf *m0, *mtmp;
801 pktlen = m0->m_pkthdr.len;
802 err = bus_dmamap_load_mbuf_sg(sc->tx_dmat, stx->map, m0, segs, nsegs, 0);
811 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n",
812 err, m0->m_pkthdr.len, n);
818 /* Too many segments, try to defrag */
819 m0 = m_defrag(m0, M_NOWAIT);
826 err = bus_dmamap_load_mbuf_sg(sc->tx_dmat, stx->map, m0, segs, nsegs, 0);
835 printf("map failure err=%d pktlen=%d\n", err, pktlen);
841 bus_dmamap_sync(sc->tx_dmat, stx->map, BUS_DMASYNC_PREWRITE);
842 stx->flags |= TX_SW_DESC_MAPPED;
848 * make_sgl - populate a scatter/gather list for a packet
849 * @sgp: the SGL to populate
850 * @segs: the packet dma segments
851 * @nsegs: the number of segments
853 * Generates a scatter/gather list for the buffers that make up a packet
854 * and returns the SGL size in 8-byte words. The caller must size the SGL
858 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
862 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) {
866 sgp->len[idx] = htobe32(segs[i].ds_len);
867 sgp->addr[idx] = htobe64(segs[i].ds_addr);
875 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
879 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
880 * where the HW is going to sleep just after we checked, however,
881 * then the interrupt handler will detect the outstanding TX packet
882 * and ring the doorbell for us.
884 * When GTS is disabled we unconditionally ring the doorbell.
887 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
890 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
891 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
892 set_bit(TXQ_LAST_PKT_DB, &q->flags);
894 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
897 t3_write_reg(adap, A_SG_KDOORBELL,
898 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
901 wmb(); /* write descriptors before telling HW */
902 t3_write_reg(adap, A_SG_KDOORBELL,
903 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
908 wr_gen2(struct tx_desc *d, unsigned int gen)
910 #if SGE_NUM_GENBITS == 2
911 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
915 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
916 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
919 t3_encap(struct port_info *p, struct mbuf **m)
925 struct tx_sw_desc *stx;
926 struct txq_state txqs;
927 unsigned int nsegs, ndesc, flits, cntrl, mlen, tso_info;
930 struct work_request_hdr *wrp;
931 struct tx_sw_desc *txsd;
932 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
933 bus_dma_segment_t segs[TX_MAX_SEGS];
934 uint32_t wr_hi, wr_lo, sgl_flits;
937 struct cpl_tx_pkt *cpl;
939 DPRINTF("t3_encap ");
942 qs = &sc->sge.qs[p->first_qset];
943 txq = &qs->txq[TXQ_ETH];
944 stx = &txq->sdesc[txq->pidx];
945 txd = &txq->desc[txq->pidx];
946 cpl = (struct cpl_tx_pkt *)txd;
947 mlen = m0->m_pkthdr.len;
948 cpl->len = htonl(mlen | 0x80000000);
950 DPRINTF("mlen=%d\n", mlen);
952 * XXX handle checksum, TSO, and VLAN here
955 cntrl = V_TXPKT_INTF(p->port);
958 * XXX need to add VLAN support for 6.x
960 #ifdef VLAN_SUPPORTED
961 if (m0->m_flags & M_VLANTAG)
962 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
964 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
970 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
973 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
976 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
977 hdr->cntrl = htonl(cntrl);
979 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
981 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
986 if (__predict_false(m0->m_flags & M_VLANTAG)) {
987 eth_type = CPL_ETH_II_VLAN;
988 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
989 ETHER_VLAN_ENCAP_LEN);
991 eth_type = CPL_ETH_II;
992 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
994 tcp = (struct tcphdr *)((uint8_t *)ip +
997 tso_info |= V_LSO_ETH_TYPE(eth_type) |
998 V_LSO_IPHDR_WORDS(ip->ip_hl) |
999 V_LSO_TCPHDR_WORDS(tcp->th_off);
1000 hdr->lso_info = htonl(tso_info);
1004 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1005 cpl->cntrl = htonl(cntrl);
1007 if (mlen <= WR_LEN - sizeof(*cpl)) {
1008 txq_prod(txq, 1, &txqs);
1009 txq->sdesc[txqs.pidx].m = m0;
1011 if (m0->m_len == m0->m_pkthdr.len)
1012 memcpy(&txd->flit[2], m0->m_data, mlen);
1014 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1016 flits = (mlen + 7) / 8 + 2;
1017 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1018 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1019 F_WR_SOP | F_WR_EOP | txqs.compl);
1021 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1022 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1024 wr_gen2(txd, txqs.gen);
1025 check_ring_tx_db(sc, txq);
1031 wrp = (struct work_request_hdr *)txd;
1033 if ((err = busdma_map_mbufs(m, sc, stx, segs, &nsegs)) != 0) {
1037 ndesc = calc_tx_descs(m0, nsegs);
1039 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : &sgl[0];
1040 make_sgl(sgp, segs, nsegs);
1042 sgl_flits = sgl_len(nsegs);
1044 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1045 txq_prod(txq, ndesc, &txqs);
1046 txsd = &txq->sdesc[txqs.pidx];
1047 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1048 wr_lo = htonl(V_WR_TID(txq->token));
1051 if (__predict_true(ndesc == 1)) {
1052 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1053 V_WR_SGLSFLT(flits)) | wr_hi;
1055 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1056 V_WR_GEN(txqs.gen)) | wr_lo;
1058 wr_gen2(txd, txqs.gen);
1060 unsigned int ogen = txqs.gen;
1061 const uint64_t *fp = (const uint64_t *)sgl;
1062 struct work_request_hdr *wp = wrp;
1064 /* XXX - CHECK ME */
1065 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1066 V_WR_SGLSFLT(flits)) | wr_hi;
1069 unsigned int avail = WR_FLITS - flits;
1071 if (avail > sgl_flits)
1073 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1082 if (++txqs.pidx == txq->size) {
1090 * when the head of the mbuf chain
1091 * is freed all clusters will be freed
1095 wrp = (struct work_request_hdr *)txd;
1096 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1097 V_WR_SGLSFLT(1)) | wr_hi;
1098 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1100 V_WR_GEN(txqs.gen)) | wr_lo;
1101 wr_gen2(txd, txqs.gen);
1105 skb->priority = pidx;
1107 wrp->wr_hi |= htonl(F_WR_EOP);
1109 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1110 wr_gen2((struct tx_desc *)wp, ogen);
1112 check_ring_tx_db(p->adapter, txq);
1119 * write_imm - write a packet into a Tx descriptor as immediate data
1120 * @d: the Tx descriptor to write
1122 * @len: the length of packet data to write as immediate data
1123 * @gen: the generation bit value to write
1125 * Writes a packet as immediate data into a Tx descriptor. The packet
1126 * contains a work request at its beginning. We must write the packet
1127 * carefully so the SGE doesn't read accidentally before it's written in
1130 static __inline void write_imm(struct tx_desc *d, struct mbuf *m,
1131 unsigned int len, unsigned int gen)
1133 struct work_request_hdr *from = (struct work_request_hdr *)m->m_data;
1134 struct work_request_hdr *to = (struct work_request_hdr *)d;
1136 memcpy(&to[1], &from[1], len - sizeof(*from));
1137 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1138 V_WR_BCNTLFLT(len & 7));
1140 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1141 V_WR_LEN((len + 7) / 8));
1147 * check_desc_avail - check descriptor availability on a send queue
1148 * @adap: the adapter
1150 * @m: the packet needing the descriptors
1151 * @ndesc: the number of Tx descriptors needed
1152 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1154 * Checks if the requested number of Tx descriptors is available on an
1155 * SGE send queue. If the queue is already suspended or not enough
1156 * descriptors are available the packet is queued for later transmission.
1157 * Must be called with the Tx queue locked.
1159 * Returns 0 if enough descriptors are available, 1 if there aren't
1160 * enough descriptors and the packet has been queued, and 2 if the caller
1161 * needs to retry because there weren't enough descriptors at the
1162 * beginning of the call but some freed up in the mean time.
1165 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1166 struct mbuf *m, unsigned int ndesc,
1170 * XXX We currently only use this for checking the control queue
1171 * the control queue is only used for binding qsets which happens
1172 * at init time so we are guaranteed enough descriptors
1175 if (__predict_false(!skb_queue_empty(&q->sendq))) {
1176 addq_exit: __skb_queue_tail(&q->sendq, skb);
1179 if (__predict_false(q->size - q->in_use < ndesc)) {
1181 struct sge_qset *qs = txq_to_qset(q, qid);
1183 set_bit(qid, &qs->txq_stopped);
1184 smp_mb__after_clear_bit();
1186 if (should_restart_tx(q) &&
1187 test_and_clear_bit(qid, &qs->txq_stopped))
1199 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1200 * @q: the SGE control Tx queue
1202 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1203 * that send only immediate data (presently just the control queues) and
1204 * thus do not have any sk_buffs to release.
1206 static __inline void
1207 reclaim_completed_tx_imm(struct sge_txq *q)
1209 unsigned int reclaim = q->processed - q->cleaned;
1211 mtx_assert(&q->lock, MA_OWNED);
1213 q->in_use -= reclaim;
1214 q->cleaned += reclaim;
1218 immediate(const struct mbuf *m)
1220 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1224 * ctrl_xmit - send a packet through an SGE control Tx queue
1225 * @adap: the adapter
1226 * @q: the control queue
1229 * Send a packet through an SGE control Tx queue. Packets sent through
1230 * a control queue must fit entirely as immediate data in a single Tx
1231 * descriptor and have no page fragments.
1234 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1237 struct work_request_hdr *wrp = (struct work_request_hdr *)m->m_data;
1239 if (__predict_false(!immediate(m))) {
1244 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1245 wrp->wr_lo = htonl(V_WR_TID(q->token));
1248 again: reclaim_completed_tx_imm(q);
1250 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1251 if (__predict_false(ret)) {
1253 mtx_unlock(&q->lock);
1259 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1262 if (++q->pidx >= q->size) {
1266 mtx_unlock(&q->lock);
1268 t3_write_reg(adap, A_SG_KDOORBELL,
1269 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1273 #ifdef RESTART_CTRLQ
1275 * restart_ctrlq - restart a suspended control queue
1276 * @qs: the queue set cotaining the control queue
1278 * Resumes transmission on a suspended Tx control queue.
1281 restart_ctrlq(unsigned long data)
1284 struct sge_qset *qs = (struct sge_qset *)data;
1285 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1286 adapter_t *adap = qs->port->adapter;
1289 again: reclaim_completed_tx_imm(q);
1291 while (q->in_use < q->size &&
1292 (skb = __skb_dequeue(&q->sendq)) != NULL) {
1294 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1296 if (++q->pidx >= q->size) {
1302 if (!skb_queue_empty(&q->sendq)) {
1303 set_bit(TXQ_CTRL, &qs->txq_stopped);
1304 smp_mb__after_clear_bit();
1306 if (should_restart_tx(q) &&
1307 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1312 mtx_unlock(&q->lock);
1313 t3_write_reg(adap, A_SG_KDOORBELL,
1314 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1319 * Send a management message through control queue 0
1322 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1324 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1328 * t3_sge_alloc_qset - initialize an SGE queue set
1329 * @sc: the controller softc
1330 * @id: the queue set id
1331 * @nports: how many Ethernet ports will be using this queue set
1332 * @irq_vec_idx: the IRQ vector index for response queue interrupts
1333 * @p: configuration parameters for this queue set
1334 * @ntxq: number of Tx queues for the queue set
1335 * @pi: port info for queue set
1337 * Allocate resources and initialize an SGE queue set. A queue set
1338 * comprises a response queue, two Rx free-buffer queues, and up to 3
1339 * Tx queues. The Tx queues are assigned roles in the order Ethernet
1340 * queue, offload queue, and control queue.
1343 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
1344 const struct qset_params *p, int ntxq, struct port_info *pi)
1346 struct sge_qset *q = &sc->sge.qs[id];
1349 init_qset_cntxt(q, id);
1351 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
1352 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
1353 &q->fl[0].desc, &q->fl[0].sdesc,
1354 &q->fl[0].desc_tag, &q->fl[0].desc_map)) != 0) {
1355 printf("error %d from alloc ring fl0\n", ret);
1359 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
1360 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
1361 &q->fl[1].desc, &q->fl[1].sdesc,
1362 &q->fl[1].desc_tag, &q->fl[1].desc_map)) != 0) {
1363 printf("error %d from alloc ring fl1\n", ret);
1367 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
1368 &q->rspq.phys_addr, &q->rspq.desc, NULL,
1369 &q->rspq.desc_tag, &q->rspq.desc_map)) != 0) {
1370 printf("error %d from alloc ring rspq\n", ret);
1374 for (i = 0; i < ntxq; ++i) {
1376 * The control queue always uses immediate data so does not
1377 * need to keep track of any mbufs.
1378 * XXX Placeholder for future TOE support.
1380 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
1382 if ((ret = alloc_ring(sc, p->txq_size[i],
1383 sizeof(struct tx_desc), sz,
1384 &q->txq[i].phys_addr, &q->txq[i].desc,
1385 &q->txq[i].sdesc, &q->txq[i].desc_tag,
1386 &q->txq[i].desc_map)) != 0) {
1387 printf("error %d from alloc ring tx %i\n", ret, i);
1392 q->txq[i].size = p->txq_size[i];
1393 mtx_init(&q->txq[i].lock, "t3 txq lock", NULL, MTX_DEF);
1396 q->fl[0].gen = q->fl[1].gen = 1;
1397 q->fl[0].size = p->fl_size;
1398 q->fl[1].size = p->jumbo_size;
1401 q->rspq.size = p->rspq_size;
1402 mtx_init(&q->rspq.lock, "t3 rspq lock", NULL, MTX_DEF);
1404 q->txq[TXQ_ETH].stop_thres = nports *
1405 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
1407 q->fl[0].buf_size = MCLBYTES;
1408 q->fl[1].buf_size = MJUMPAGESIZE;
1409 q->lro.enabled = lro_default;
1411 mtx_lock(&sc->sge.reg_lock);
1412 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
1413 q->rspq.phys_addr, q->rspq.size,
1414 q->fl[0].buf_size, 1, 0);
1416 printf("error %d from t3_sge_init_rspcntxt\n", ret);
1420 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1421 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
1422 q->fl[i].phys_addr, q->fl[i].size,
1423 q->fl[i].buf_size, p->cong_thres, 1,
1426 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
1431 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
1432 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
1433 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
1436 printf("error %d from t3_sge_init_ecntxt\n", ret);
1441 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
1442 USE_GTS, SGE_CNTXT_OFLD, id,
1443 q->txq[TXQ_OFLD].phys_addr,
1444 q->txq[TXQ_OFLD].size, 0, 1, 0);
1446 printf("error %d from t3_sge_init_ecntxt\n", ret);
1452 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
1454 q->txq[TXQ_CTRL].phys_addr,
1455 q->txq[TXQ_CTRL].size,
1456 q->txq[TXQ_CTRL].token, 1, 0);
1458 printf("error %d from t3_sge_init_ecntxt\n", ret);
1463 mtx_unlock(&sc->sge.reg_lock);
1464 t3_update_qset_coalesce(q, p);
1467 refill_fl(sc, &q->fl[0], q->fl[0].size);
1468 refill_fl(sc, &q->fl[1], q->fl[1].size);
1469 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
1471 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
1472 V_NEWTIMER(q->rspq.holdoff_tmr));
1477 mtx_unlock(&sc->sge.reg_lock);
1479 t3_free_qset(sc, q);
1486 * free_qset - free the resources of an SGE queue set
1487 * @sc: the controller owning the queue set
1490 * Release the HW and SW resources associated with an SGE queue set, such
1491 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1492 * queue set must be quiesced prior to calling this.
1495 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1499 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1500 if (q->fl[i].desc) {
1501 mtx_lock(&sc->sge.reg_lock);
1502 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1503 mtx_unlock(&sc->sge.reg_lock);
1504 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1505 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1507 bus_dma_tag_destroy(q->fl[i].desc_tag);
1509 if (q->fl[i].sdesc) {
1510 free_rx_bufs(sc, &q->fl[i]);
1511 free(q->fl[i].sdesc, M_DEVBUF);
1515 for (i = 0; i < SGE_TXQ_PER_SET; ++i) {
1516 if (q->txq[i].desc) {
1517 mtx_lock(&sc->sge.reg_lock);
1518 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1519 mtx_unlock(&sc->sge.reg_lock);
1520 bus_dmamap_unload(q->txq[i].desc_tag,
1521 q->txq[i].desc_map);
1522 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1523 q->txq[i].desc_map);
1524 bus_dma_tag_destroy(q->txq[i].desc_tag);
1526 if (q->txq[i].sdesc) {
1527 free(q->txq[i].sdesc, M_DEVBUF);
1529 if (mtx_initialized(&q->txq[i].lock)) {
1530 mtx_destroy(&q->txq[i].lock);
1535 mtx_lock(&sc->sge.reg_lock);
1536 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1537 mtx_unlock(&sc->sge.reg_lock);
1539 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1540 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1542 bus_dma_tag_destroy(q->rspq.desc_tag);
1544 if (mtx_initialized(&q->rspq.lock)) {
1545 mtx_destroy(&q->rspq.lock);
1548 bzero(q, sizeof(*q));
1552 * t3_free_sge_resources - free SGE resources
1553 * @sc: the adapter softc
1555 * Frees resources used by the SGE queue sets.
1558 t3_free_sge_resources(adapter_t *sc)
1562 for (i = 0; i < SGE_QSETS; ++i)
1563 t3_free_qset(sc, &sc->sge.qs[i]);
1567 * t3_sge_start - enable SGE
1568 * @sc: the controller softc
1570 * Enables the SGE for DMAs. This is the last step in starting packet
1574 t3_sge_start(adapter_t *sc)
1576 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1581 * free_tx_desc - reclaims Tx descriptors and their buffers
1582 * @adapter: the adapter
1583 * @q: the Tx queue to reclaim descriptors from
1584 * @n: the number of descriptors to reclaim
1586 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1587 * Tx buffers. Called with the Tx queue lock held.
1590 free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec)
1592 struct tx_sw_desc *d;
1593 unsigned int cidx = q->cidx;
1597 T3_TRACE2(sc->tb[q->cntxt_id & 7],
1598 "reclaiming %u Tx descriptors at cidx %u", n, cidx);
1600 d = &q->sdesc[cidx];
1603 DPRINTF("cidx=%d d=%p\n", cidx, d);
1605 if (d->flags & TX_SW_DESC_MAPPED) {
1606 bus_dmamap_unload(sc->tx_dmat, d->map);
1607 bus_dmamap_destroy(sc->tx_dmat, d->map);
1608 d->flags &= ~TX_SW_DESC_MAPPED;
1610 m_vec[nbufs] = d->m;
1615 if (++cidx == q->size) {
1626 * is_new_response - check if a response is newly written
1627 * @r: the response descriptor
1628 * @q: the response queue
1630 * Returns true if a response descriptor contains a yet unprocessed
1634 is_new_response(const struct rsp_desc *r,
1635 const struct sge_rspq *q)
1637 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1640 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1641 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1642 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1643 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1644 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1646 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1647 #define NOMEM_INTR_DELAY 2500
1649 static __inline void
1650 deliver_partial_bundle(struct t3cdev *tdev,
1656 static __inline void
1657 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1662 rq->offload_skbs[rq->offload_skbs_idx++] = skb;
1663 if (rq->offload_skbs_idx == RX_BUNDLE_SIZE) {
1664 cxgb_ofld_recv(tdev, rq->offload_skbs, RX_BUNDLE_SIZE);
1665 rq->offload_skbs_idx = 0;
1666 rq->offload_bundles++;
1672 panic("implement offload enqueue\n");
1678 restart_tx(struct sge_qset *qs)
1684 t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad)
1686 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(m->m_data + ethpad);
1687 struct ifnet *ifp = pi->ifp;
1689 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, m->m_data, cpl->iff);
1690 if (&pi->adapter->port[cpl->iff] != pi)
1691 panic("bad port index %d m->m_data=%p\n", cpl->iff, m->m_data);
1694 m_adj(m, sizeof(*cpl) + ethpad);
1697 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
1698 cpl->csum_valid && cpl->csum == 0xffff) {
1699 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
1700 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1701 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
1702 m->m_pkthdr.csum_data = 0xffff;
1705 * XXX need to add VLAN support for 6.x
1707 #ifdef VLAN_SUPPORTED
1708 if (__predict_false(cpl->vlan_valid)) {
1709 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
1710 m->m_flags |= M_VLANTAG;
1713 m->m_pkthdr.rcvif = ifp;
1715 (*ifp->if_input)(ifp, m);
1719 * get_packet - return the next ingress packet buffer from a free list
1720 * @adap: the adapter that received the packet
1721 * @drop_thres: # of remaining buffers before we start dropping packets
1722 * @qs: the qset that the SGE free list holding the packet belongs to
1723 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
1724 * @r: response descriptor
1726 * Get the next packet from a free list and complete setup of the
1727 * sk_buff. If the packet is small we make a copy and recycle the
1728 * original buffer, otherwise we use the original buffer itself. If a
1729 * positive drop threshold is supplied packets are dropped and their
1730 * buffers recycled if (a) the number of remaining buffers is under the
1731 * threshold and the packet is too big to copy, or (b) the packet should
1732 * be copied but there is no memory for the copy.
1735 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
1736 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
1739 struct mbuf *m = NULL;
1740 unsigned int len_cq = ntohl(r->len_cq);
1741 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
1742 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
1743 uint32_t len = G_RSPD_LEN(len_cq);
1744 uint32_t flags = ntohl(r->flags);
1745 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
1748 prefetch(sd->m->m_data);
1751 bus_dmamap_sync(adap->rx_jumbo_dmat, sd->map, BUS_DMASYNC_POSTREAD);
1752 bus_dmamap_unload(adap->rx_jumbo_dmat, sd->map);
1758 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
1759 mh->mh_head = mh->mh_tail = m;
1760 m->m_pkthdr.len = len;
1761 m->m_flags |= M_PKTHDR;
1764 case RSPQ_NSOP_NEOP:
1765 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
1766 m->m_flags &= ~M_PKTHDR;
1767 if (mh->mh_tail == NULL) {
1769 printf("discarding intermediate descriptor entry\n");
1773 mh->mh_tail->m_next = m;
1775 mh->mh_head->m_pkthdr.len += len;
1779 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
1780 m->m_pkthdr.len = len;
1781 mh->mh_head = mh->mh_tail = m;
1782 m->m_flags |= M_PKTHDR;
1786 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
1787 m->m_flags &= ~M_PKTHDR;
1788 mh->mh_head->m_pkthdr.len += len;
1789 mh->mh_tail->m_next = m;
1794 if (++fl->cidx == fl->size)
1802 * handle_rsp_cntrl_info - handles control information in a response
1803 * @qs: the queue set corresponding to the response
1804 * @flags: the response control flags
1806 * Handles the control information of an SGE response, such as GTS
1807 * indications and completion credits for the queue set's Tx queues.
1808 * HW coalesces credits, we don't do any extra SW coalescing.
1810 static __inline void
1811 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
1813 unsigned int credits;
1816 if (flags & F_RSPD_TXQ0_GTS)
1817 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
1819 credits = G_RSPD_TXQ0_CR(flags);
1821 qs->txq[TXQ_ETH].processed += credits;
1822 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
1823 taskqueue_enqueue(qs->port->adapter->tq,
1824 &qs->port->adapter->timer_reclaim_task);
1827 credits = G_RSPD_TXQ2_CR(flags);
1829 qs->txq[TXQ_CTRL].processed += credits;
1832 if (flags & F_RSPD_TXQ1_GTS)
1833 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
1835 credits = G_RSPD_TXQ1_CR(flags);
1837 qs->txq[TXQ_OFLD].processed += credits;
1841 check_ring_db(adapter_t *adap, struct sge_qset *qs,
1842 unsigned int sleeping)
1848 * This is an awful hack to bind the ithread to CPU 1
1849 * to work around lack of ithread affinity
1852 bind_ithread(int cpu)
1854 KASSERT(cpu < mp_ncpus, ("invalid cpu identifier"));
1856 mtx_lock_spin(&sched_lock);
1857 sched_bind(curthread, cpu);
1858 mtx_unlock_spin(&sched_lock);
1864 * process_responses - process responses from an SGE response queue
1865 * @adap: the adapter
1866 * @qs: the queue set to which the response queue belongs
1867 * @budget: how many responses can be processed in this round
1869 * Process responses from an SGE response queue up to the supplied budget.
1870 * Responses include received packets as well as credits and other events
1871 * for the queues that belong to the response queue's queue set.
1872 * A negative budget is effectively unlimited.
1874 * Additionally choose the interrupt holdoff time for the next interrupt
1875 * on this queue. If the system is under memory shortage use a fairly
1876 * long delay to help recovery.
1879 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
1881 struct sge_rspq *rspq = &qs->rspq;
1882 struct rsp_desc *r = &rspq->desc[rspq->cidx];
1883 int budget_left = budget;
1884 unsigned int sleeping = 0;
1885 int lro = qs->lro.enabled;
1887 static uint8_t pinned[MAXCPU];
1890 static int last_holdoff = 0;
1891 if (rspq->holdoff_tmr != last_holdoff) {
1892 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
1893 last_holdoff = rspq->holdoff_tmr;
1896 if (pinned[qs->rspq.cntxt_id * adap->params.nports] == 0) {
1898 * Assumes that cntxt_id < mp_ncpus
1900 bind_ithread(qs->rspq.cntxt_id);
1901 pinned[qs->rspq.cntxt_id * adap->params.nports] = 1;
1903 rspq->next_holdoff = rspq->holdoff_tmr;
1905 while (__predict_true(budget_left && is_new_response(r, rspq))) {
1906 int eth, eop = 0, ethpad = 0;
1907 uint32_t flags = ntohl(r->flags);
1908 uint32_t rss_csum = *(const uint32_t *)r;
1909 uint32_t rss_hash = r->rss_hdr.rss_hash_val;
1911 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
1913 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
1915 printf("async notification\n");
1917 } else if (flags & F_RSPD_IMM_DATA_VALID) {
1919 printf("IMM DATA VALID\n");
1921 if(get_imm_packet(adap, r, &rspq->mh) == 0) {
1922 rspq->next_holdoff = NOMEM_INTR_DELAY;
1930 } else if (r->len_cq) {
1931 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
1934 eop = get_packet(adap, drop_thresh, qs, &rspq->mh, r);
1936 DPRINTF("pure response\n");
1940 if (flags & RSPD_CTRL_MASK) {
1941 sleeping |= flags & RSPD_GTS_MASK;
1942 handle_rsp_cntrl_info(qs, flags);
1946 if (__predict_false(++rspq->cidx == rspq->size)) {
1953 if (++rspq->credits >= (rspq->size / 4)) {
1954 refill_rspq(adap, rspq, rspq->credits);
1959 prefetch(rspq->mh.mh_head->m_data);
1960 prefetch(rspq->mh.mh_head->m_data + L1_CACHE_BYTES);
1963 t3_rx_eth_lro(adap, rspq, &rspq->mh, ethpad,
1964 rss_hash, rss_csum, lro);
1966 rspq->mh.mh_tail = rspq->mh.mh_head = NULL;
1969 if (__predict_false(r->rss_hdr.opcode == CPL_TRACE_PKT))
1972 rx_offload(&adap->tdev, rspq, m);
1976 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task);
1978 __refill_fl(adap, &qs->fl[0]);
1979 __refill_fl(adap, &qs->fl[1]);
1985 t3_sge_lro_flush_all(adap, qs);
1986 deliver_partial_bundle(&adap->tdev, rspq);
1989 check_ring_db(adap, qs, sleeping);
1991 smp_mb(); /* commit Tx queue processed updates */
1992 if (__predict_false(qs->txq_stopped != 0))
1995 budget -= budget_left;
2000 * A helper function that processes responses and issues GTS.
2003 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2006 static int last_holdoff = 0;
2008 work = process_responses(adap, rspq_to_qset(rq), -1);
2010 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2011 printf("next_holdoff=%d\n", rq->next_holdoff);
2012 last_holdoff = rq->next_holdoff;
2015 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2016 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2022 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2023 * Handles data events from SGE response queues as well as error and other
2024 * async events as they all use the same interrupt pin. We use one SGE
2025 * response queue per port in this mode and protect all response queues with
2029 t3b_intr(void *data)
2032 adapter_t *adap = data;
2033 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2034 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2037 t3_write_reg(adap, A_PL_CLI, 0);
2038 map = t3_read_reg(adap, A_SG_DATA_INTR);
2043 if (__predict_false(map & F_ERRINTR))
2044 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2046 mtx_lock(&q0->lock);
2048 if (__predict_true(map & 1))
2049 process_responses_gts(adap, q0);
2052 process_responses_gts(adap, q1);
2054 mtx_unlock(&q0->lock);
2058 * The MSI interrupt handler. This needs to handle data events from SGE
2059 * response queues as well as error and other async events as they all use
2060 * the same MSI vector. We use one SGE response queue per port in this mode
2061 * and protect all response queues with queue 0's lock.
2064 t3_intr_msi(void *data)
2066 adapter_t *adap = data;
2067 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2068 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2069 int new_packets = 0;
2071 mtx_lock(&q0->lock);
2072 if (process_responses_gts(adap, q0)) {
2076 if (adap->params.nports == 2 &&
2077 process_responses_gts(adap, q1)) {
2082 mtx_unlock(&q0->lock);
2083 if (new_packets == 0)
2084 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2088 t3_intr_msix(void *data)
2090 struct sge_qset *qs = data;
2091 adapter_t *adap = qs->port->adapter;
2092 struct sge_rspq *rspq = &qs->rspq;
2094 mtx_lock(&rspq->lock);
2095 if (process_responses_gts(adap, rspq) == 0) {
2097 rspq->unhandled_irqs++;
2100 mtx_unlock(&rspq->lock);
2104 t3_lro_enable(SYSCTL_HANDLER_ARGS)
2107 int i, j, enabled, err, nqsets = 0;
2110 enabled = sc->sge.qs[0].lro.enabled;
2111 err = sysctl_handle_int(oidp, &enabled, arg2, req);
2116 if (enabled == sc->sge.qs[0].lro.enabled)
2119 for (i = 0; i < sc->params.nports; i++)
2120 for (j = 0; j < sc->port[i].nqsets; j++)
2123 for (i = 0; i < nqsets; i++) {
2124 sc->sge.qs[i].lro.enabled = enabled;
2131 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
2133 adapter_t *sc = arg1;
2134 struct qset_params *qsp = &sc->params.sge.qset[0];
2136 struct sge_qset *qs;
2137 int i, j, err, nqsets = 0;
2140 coalesce_nsecs = qsp->coalesce_nsecs;
2141 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
2146 if (coalesce_nsecs == qsp->coalesce_nsecs)
2149 for (i = 0; i < sc->params.nports; i++)
2150 for (j = 0; j < sc->port[i].nqsets; j++)
2153 coalesce_nsecs = max(100, coalesce_nsecs);
2155 for (i = 0; i < nqsets; i++) {
2156 qs = &sc->sge.qs[i];
2157 qsp = &sc->params.sge.qset[i];
2158 qsp->coalesce_nsecs = coalesce_nsecs;
2160 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
2161 &sc->sge.qs[0].rspq.lock;
2164 t3_update_qset_coalesce(qs, qsp);
2165 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2166 V_NEWTIMER(qs->rspq.holdoff_tmr));
2175 t3_add_sysctls(adapter_t *sc)
2177 struct sysctl_ctx_list *ctx;
2178 struct sysctl_oid_list *children;
2180 ctx = device_get_sysctl_ctx(sc->dev);
2181 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
2183 /* random information */
2184 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
2186 CTLFLAG_RD, &sc->fw_version,
2187 0, "firmware version");
2189 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2191 CTLTYPE_INT|CTLFLAG_RW, sc,
2193 "I", "enable large receive offload");
2195 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2197 CTLTYPE_INT|CTLFLAG_RW, sc,
2198 0, t3_set_coalesce_nsecs,
2199 "I", "interrupt coalescing timer (ns)");
2200 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2202 CTLFLAG_RW, &cxgb_debug,
2203 0, "enable verbose debugging output");
2208 * t3_get_desc - dump an SGE descriptor for debugging purposes
2209 * @qs: the queue set
2210 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2211 * @idx: the descriptor index in the queue
2212 * @data: where to dump the descriptor contents
2214 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2215 * size of the descriptor.
2218 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2219 unsigned char *data)
2225 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2227 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2228 return sizeof(struct tx_desc);
2232 if (!qs->rspq.desc || idx >= qs->rspq.size)
2234 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2235 return sizeof(struct rsp_desc);
2239 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2241 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2242 return sizeof(struct rx_desc);