2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2016 Matthew Macy <mmacy@mattmacy.io>
6 * Copyright (c) 2021 Rubicon Communications, LLC (Netgate)
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
36 #include <net/rss_config.h>
37 #include <netinet/in_rss.h>
41 #define DPRINTF device_printf
46 /*********************************************************************
47 * Local Function prototypes
48 *********************************************************************/
49 static int igc_isc_txd_encap(void *arg, if_pkt_info_t pi);
50 static void igc_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
51 static int igc_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear);
53 static void igc_isc_rxd_refill(void *arg, if_rxd_update_t iru);
55 static void igc_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
57 static int igc_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
60 static int igc_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
62 static int igc_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi,
63 uint32_t *cmd_type_len, uint32_t *olinfo_status);
64 static int igc_tso_setup(struct tx_ring *txr, if_pkt_info_t pi,
65 uint32_t *cmd_type_len, uint32_t *olinfo_status);
67 static void igc_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype);
68 static int igc_determine_rsstype(uint16_t pkt_info);
70 extern void igc_if_enable_intr(if_ctx_t ctx);
71 extern int igc_intr(void *arg);
73 struct if_txrx igc_txrx = {
74 .ift_txd_encap = igc_isc_txd_encap,
75 .ift_txd_flush = igc_isc_txd_flush,
76 .ift_txd_credits_update = igc_isc_txd_credits_update,
77 .ift_rxd_available = igc_isc_rxd_available,
78 .ift_rxd_pkt_get = igc_isc_rxd_pkt_get,
79 .ift_rxd_refill = igc_isc_rxd_refill,
80 .ift_rxd_flush = igc_isc_rxd_flush,
81 .ift_legacy_intr = igc_intr
85 igc_dump_rs(struct igc_adapter *adapter)
87 if_softc_ctx_t scctx = adapter->shared;
88 struct igc_tx_queue *que;
90 qidx_t i, ntxd, qid, cur;
95 ntxd = scctx->isc_ntxd[0];
96 for (qid = 0; qid < adapter->tx_num_queues; qid++) {
97 que = &adapter->tx_queues[qid];
99 rs_cidx = txr->tx_rs_cidx;
100 if (rs_cidx != txr->tx_rs_pidx) {
101 cur = txr->tx_rsq[rs_cidx];
102 status = txr->tx_base[cur].upper.fields.status;
103 if (!(status & IGC_TXD_STAT_DD))
104 printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur);
106 rs_cidx = (rs_cidx-1)&(ntxd-1);
107 cur = txr->tx_rsq[rs_cidx];
108 printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d ", qid, rs_cidx, cur);
110 printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed, txr->tx_rs_pidx);
111 for (i = 0; i < ntxd; i++) {
112 if (txr->tx_base[i].upper.fields.status & IGC_TXD_STAT_DD)
113 printf("%d set ", i);
119 /**********************************************************************
121 * Setup work for hardware segmentation offload (TSO) on
122 * adapters using advanced tx descriptors
124 **********************************************************************/
126 igc_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len,
127 uint32_t *olinfo_status)
129 struct igc_adv_tx_context_desc *TXD;
130 uint32_t type_tucmd_mlhl = 0, vlan_macip_lens = 0;
131 uint32_t mss_l4len_idx = 0;
134 switch(pi->ipi_etype) {
136 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV6;
139 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4;
140 /* Tell transmit desc to also do IPv4 checksum. */
141 *olinfo_status |= IGC_TXD_POPTS_IXSM << 8;
144 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
145 __func__, ntohs(pi->ipi_etype));
149 TXD = (struct igc_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx];
151 /* This is used in the transmit desc in encap */
152 paylen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen;
154 /* VLAN MACLEN IPLEN */
155 if (pi->ipi_mflags & M_VLANTAG) {
156 vlan_macip_lens |= (pi->ipi_vtag << IGC_ADVTXD_VLAN_SHIFT);
159 vlan_macip_lens |= pi->ipi_ehdrlen << IGC_ADVTXD_MACLEN_SHIFT;
160 vlan_macip_lens |= pi->ipi_ip_hlen;
161 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
163 /* ADV DTYPE TUCMD */
164 type_tucmd_mlhl |= IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
165 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
166 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
169 mss_l4len_idx |= (pi->ipi_tso_segsz << IGC_ADVTXD_MSS_SHIFT);
170 mss_l4len_idx |= (pi->ipi_tcp_hlen << IGC_ADVTXD_L4LEN_SHIFT);
171 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
173 TXD->seqnum_seed = htole32(0);
174 *cmd_type_len |= IGC_ADVTXD_DCMD_TSE;
175 *olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
176 *olinfo_status |= paylen << IGC_ADVTXD_PAYLEN_SHIFT;
181 /*********************************************************************
183 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
185 **********************************************************************/
187 igc_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len,
188 uint32_t *olinfo_status)
190 struct igc_adv_tx_context_desc *TXD;
191 uint32_t vlan_macip_lens, type_tucmd_mlhl;
192 uint32_t mss_l4len_idx;
193 mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0;
195 /* First check if TSO is to be used */
196 if (pi->ipi_csum_flags & CSUM_TSO)
197 return (igc_tso_setup(txr, pi, cmd_type_len, olinfo_status));
199 /* Indicate the whole packet as payload when not doing TSO */
200 *olinfo_status |= pi->ipi_len << IGC_ADVTXD_PAYLEN_SHIFT;
202 /* Now ready a context descriptor */
203 TXD = (struct igc_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx];
206 ** In advanced descriptors the vlan tag must
207 ** be placed into the context descriptor. Hence
208 ** we need to make one even if not doing offloads.
210 if (pi->ipi_mflags & M_VLANTAG) {
211 vlan_macip_lens |= (pi->ipi_vtag << IGC_ADVTXD_VLAN_SHIFT);
212 } else if ((pi->ipi_csum_flags & IGC_CSUM_OFFLOAD) == 0) {
216 /* Set the ether header length */
217 vlan_macip_lens |= pi->ipi_ehdrlen << IGC_ADVTXD_MACLEN_SHIFT;
219 switch(pi->ipi_etype) {
221 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4;
224 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV6;
230 vlan_macip_lens |= pi->ipi_ip_hlen;
231 type_tucmd_mlhl |= IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
233 switch (pi->ipi_ipproto) {
235 if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) {
236 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
237 *olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
241 if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) {
242 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP;
243 *olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
247 if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) {
248 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_SCTP;
249 *olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
256 /* Now copy bits into descriptor */
257 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
258 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
259 TXD->seqnum_seed = htole32(0);
260 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
266 igc_isc_txd_encap(void *arg, if_pkt_info_t pi)
268 struct igc_adapter *sc = arg;
269 if_softc_ctx_t scctx = sc->shared;
270 struct igc_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
271 struct tx_ring *txr = &que->txr;
272 int nsegs = pi->ipi_nsegs;
273 bus_dma_segment_t *segs = pi->ipi_segs;
274 union igc_adv_tx_desc *txd = NULL;
276 uint32_t olinfo_status, cmd_type_len, txd_flags;
279 pidx_last = olinfo_status = 0;
280 /* Basic descriptor defines */
281 cmd_type_len = (IGC_ADVTXD_DTYP_DATA |
282 IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DCMD_DEXT);
284 if (pi->ipi_mflags & M_VLANTAG)
285 cmd_type_len |= IGC_ADVTXD_DCMD_VLE;
288 ntxd = scctx->isc_ntxd[0];
289 txd_flags = pi->ipi_flags & IPI_TX_INTR ? IGC_ADVTXD_DCMD_RS : 0;
290 /* Consume the first descriptor */
291 i += igc_tx_ctx_setup(txr, pi, &cmd_type_len, &olinfo_status);
292 if (i == scctx->isc_ntxd[0])
295 for (j = 0; j < nsegs; j++) {
299 txd = (union igc_adv_tx_desc *)&txr->tx_base[i];
300 seglen = segs[j].ds_len;
301 segaddr = htole64(segs[j].ds_addr);
303 txd->read.buffer_addr = segaddr;
304 txd->read.cmd_type_len = htole32(IGC_ADVTXD_DCMD_IFCS |
305 cmd_type_len | seglen);
306 txd->read.olinfo_status = htole32(olinfo_status);
308 if (++i == scctx->isc_ntxd[0]) {
313 txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
314 txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
315 MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
318 txd->read.cmd_type_len |= htole32(IGC_ADVTXD_DCMD_EOP | txd_flags);
319 pi->ipi_new_pidx = i;
325 igc_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
327 struct igc_adapter *adapter = arg;
328 struct igc_tx_queue *que = &adapter->tx_queues[txqid];
329 struct tx_ring *txr = &que->txr;
331 IGC_WRITE_REG(&adapter->hw, IGC_TDT(txr->me), pidx);
335 igc_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
337 struct igc_adapter *adapter = arg;
338 if_softc_ctx_t scctx = adapter->shared;
339 struct igc_tx_queue *que = &adapter->tx_queues[txqid];
340 struct tx_ring *txr = &que->txr;
342 qidx_t processed = 0;
344 qidx_t cur, prev, ntxd, rs_cidx;
348 rs_cidx = txr->tx_rs_cidx;
349 if (rs_cidx == txr->tx_rs_pidx)
351 cur = txr->tx_rsq[rs_cidx];
352 status = ((union igc_adv_tx_desc *)&txr->tx_base[cur])->wb.status;
353 updated = !!(status & IGC_TXD_STAT_DD);
358 /* If clear is false just let caller know that there
359 * are descriptors to reclaim */
363 prev = txr->tx_cidx_processed;
364 ntxd = scctx->isc_ntxd[0];
367 delta = (int32_t)cur - (int32_t)prev;
374 rs_cidx = (rs_cidx + 1) & (ntxd-1);
375 if (rs_cidx == txr->tx_rs_pidx)
377 cur = txr->tx_rsq[rs_cidx];
378 status = ((union igc_adv_tx_desc *)&txr->tx_base[cur])->wb.status;
379 } while ((status & IGC_TXD_STAT_DD));
381 txr->tx_rs_cidx = rs_cidx;
382 txr->tx_cidx_processed = prev;
387 igc_isc_rxd_refill(void *arg, if_rxd_update_t iru)
389 struct igc_adapter *sc = arg;
390 if_softc_ctx_t scctx = sc->shared;
391 uint16_t rxqid = iru->iru_qsidx;
392 struct igc_rx_queue *que = &sc->rx_queues[rxqid];
393 union igc_adv_rx_desc *rxd;
394 struct rx_ring *rxr = &que->rxr;
396 uint32_t next_pidx, pidx;
400 paddrs = iru->iru_paddrs;
401 pidx = iru->iru_pidx;
402 count = iru->iru_count;
404 for (i = 0, next_pidx = pidx; i < count; i++) {
405 rxd = (union igc_adv_rx_desc *)&rxr->rx_base[next_pidx];
407 rxd->read.pkt_addr = htole64(paddrs[i]);
408 if (++next_pidx == scctx->isc_nrxd[0])
414 igc_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
416 struct igc_adapter *sc = arg;
417 struct igc_rx_queue *que = &sc->rx_queues[rxqid];
418 struct rx_ring *rxr = &que->rxr;
420 IGC_WRITE_REG(&sc->hw, IGC_RDT(rxr->me), pidx);
424 igc_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
426 struct igc_adapter *sc = arg;
427 if_softc_ctx_t scctx = sc->shared;
428 struct igc_rx_queue *que = &sc->rx_queues[rxqid];
429 struct rx_ring *rxr = &que->rxr;
430 union igc_adv_rx_desc *rxd;
431 uint32_t staterr = 0;
434 for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
435 rxd = (union igc_adv_rx_desc *)&rxr->rx_base[i];
436 staterr = le32toh(rxd->wb.upper.status_error);
438 if ((staterr & IGC_RXD_STAT_DD) == 0)
440 if (++i == scctx->isc_nrxd[0])
442 if (staterr & IGC_RXD_STAT_EOP)
448 /****************************************************************
449 * Routine sends data which has been dma'ed into host memory
450 * to upper layer. Initialize ri structure.
452 * Returns 0 upon success, errno on failure
453 ***************************************************************/
456 igc_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
458 struct igc_adapter *adapter = arg;
459 if_softc_ctx_t scctx = adapter->shared;
460 struct igc_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx];
461 struct rx_ring *rxr = &que->rxr;
462 union igc_adv_rx_desc *rxd;
464 uint16_t pkt_info, len;
465 uint32_t ptype, staterr;
473 rxd = (union igc_adv_rx_desc *)&rxr->rx_base[cidx];
474 staterr = le32toh(rxd->wb.upper.status_error);
475 pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info);
477 MPASS ((staterr & IGC_RXD_STAT_DD) != 0);
479 len = le16toh(rxd->wb.upper.length);
480 ptype = le32toh(rxd->wb.lower.lo_dword.data) & IGC_PKTTYPE_MASK;
483 rxr->rx_bytes += ri->iri_len;
485 rxd->wb.upper.status_error = 0;
486 eop = ((staterr & IGC_RXD_STAT_EOP) == IGC_RXD_STAT_EOP);
488 /* Make sure bad packets are discarded */
489 if (eop && ((staterr & IGC_RXDEXT_STATERR_RXE) != 0)) {
490 adapter->dropped_pkts++;
494 ri->iri_frags[i].irf_flid = 0;
495 ri->iri_frags[i].irf_idx = cidx;
496 ri->iri_frags[i].irf_len = len;
498 if (++cidx == scctx->isc_nrxd[0])
501 if (rxr->hdr_split == true) {
502 ri->iri_frags[i].irf_flid = 1;
503 ri->iri_frags[i].irf_idx = cidx;
504 if (++cidx == scctx->isc_nrxd[0])
513 if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0)
514 igc_rx_checksum(staterr, ri, ptype);
516 if (staterr & IGC_RXD_STAT_VP) {
517 ri->iri_vtag = le16toh(rxd->wb.upper.vlan);
518 ri->iri_flags |= M_VLANTAG;
522 le32toh(rxd->wb.lower.hi_dword.rss);
523 ri->iri_rsstype = igc_determine_rsstype(pkt_info);
529 /*********************************************************************
531 * Verify that the hardware indicated that the checksum is valid.
532 * Inform the stack about the status of checksum so that stack
533 * doesn't spend time verifying the checksum.
535 *********************************************************************/
537 igc_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype)
539 uint16_t status = (uint16_t)staterr;
540 uint8_t errors = (uint8_t)(staterr >> 24);
542 if (__predict_false(status & IGC_RXD_STAT_IXSM))
545 /* If there is a layer 3 or 4 error we are done */
546 if (__predict_false(errors & (IGC_RXD_ERR_IPE | IGC_RXD_ERR_TCPE)))
549 /* IP Checksum Good */
550 if (status & IGC_RXD_STAT_IPCS)
551 ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
553 /* Valid L4E checksum */
554 if (__predict_true(status &
555 (IGC_RXD_STAT_TCPCS | IGC_RXD_STAT_UDPCS))) {
556 /* SCTP header present */
557 if (__predict_false((ptype & IGC_RXDADV_PKTTYPE_ETQF) == 0 &&
558 (ptype & IGC_RXDADV_PKTTYPE_SCTP) != 0)) {
559 ri->iri_csum_flags |= CSUM_SCTP_VALID;
561 ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
562 ri->iri_csum_data = htons(0xffff);
567 /********************************************************************
569 * Parse the packet type to determine the appropriate hash
571 ******************************************************************/
573 igc_determine_rsstype(uint16_t pkt_info)
575 switch (pkt_info & IGC_RXDADV_RSSTYPE_MASK) {
576 case IGC_RXDADV_RSSTYPE_IPV4_TCP:
577 return M_HASHTYPE_RSS_TCP_IPV4;
578 case IGC_RXDADV_RSSTYPE_IPV4:
579 return M_HASHTYPE_RSS_IPV4;
580 case IGC_RXDADV_RSSTYPE_IPV6_TCP:
581 return M_HASHTYPE_RSS_TCP_IPV6;
582 case IGC_RXDADV_RSSTYPE_IPV6_EX:
583 return M_HASHTYPE_RSS_IPV6_EX;
584 case IGC_RXDADV_RSSTYPE_IPV6:
585 return M_HASHTYPE_RSS_IPV6;
586 case IGC_RXDADV_RSSTYPE_IPV6_TCP_EX:
587 return M_HASHTYPE_RSS_TCP_IPV6_EX;
589 return M_HASHTYPE_OPAQUE;