2 * Copyright (c) 2016 Matthew Macy <mmacy@mattmacy.io>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <net/rss_config.h>
32 #include <netinet/in_rss.h>
36 #define DPRINTF device_printf
41 /*********************************************************************
42 * Local Function prototypes
43 *********************************************************************/
44 static int igb_isc_txd_encap(void *arg, if_pkt_info_t pi);
45 static void igb_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
46 static int igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear);
48 static void igb_isc_rxd_refill(void *arg, if_rxd_update_t iru);
50 static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx);
51 static int igb_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget);
53 static int igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
55 static int igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status);
56 static int igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status);
58 static void igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype);
59 static int igb_determine_rsstype(u16 pkt_info);
61 extern void igb_if_enable_intr(if_ctx_t ctx);
62 extern int em_intr(void *arg);
64 struct if_txrx igb_txrx = {
65 .ift_txd_encap = igb_isc_txd_encap,
66 .ift_txd_flush = igb_isc_txd_flush,
67 .ift_txd_credits_update = igb_isc_txd_credits_update,
68 .ift_rxd_available = igb_isc_rxd_available,
69 .ift_rxd_pkt_get = igb_isc_rxd_pkt_get,
70 .ift_rxd_refill = igb_isc_rxd_refill,
71 .ift_rxd_flush = igb_isc_rxd_flush,
72 .ift_legacy_intr = em_intr
75 extern if_shared_ctx_t em_sctx;
77 /**********************************************************************
79 * Setup work for hardware segmentation offload (TSO) on
80 * adapters using advanced tx descriptors
82 **********************************************************************/
84 igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status)
86 struct e1000_adv_tx_context_desc *TXD;
87 struct adapter *adapter = txr->adapter;
88 u32 type_tucmd_mlhl = 0, vlan_macip_lens = 0;
89 u32 mss_l4len_idx = 0;
92 switch(pi->ipi_etype) {
94 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
97 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
98 /* Tell transmit desc to also do IPv4 checksum. */
99 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
102 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
103 __func__, ntohs(pi->ipi_etype));
107 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx];
109 /* This is used in the transmit desc in encap */
110 paylen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen;
112 /* VLAN MACLEN IPLEN */
113 if (pi->ipi_mflags & M_VLANTAG) {
114 vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT);
117 vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
118 vlan_macip_lens |= pi->ipi_ip_hlen;
119 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
121 /* ADV DTYPE TUCMD */
122 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
123 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
124 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
127 mss_l4len_idx |= (pi->ipi_tso_segsz << E1000_ADVTXD_MSS_SHIFT);
128 mss_l4len_idx |= (pi->ipi_tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
129 /* 82575 needs the queue index added */
130 if (adapter->hw.mac.type == e1000_82575)
131 mss_l4len_idx |= txr->me << 4;
132 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
134 TXD->seqnum_seed = htole32(0);
135 *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
136 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
137 *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
142 /*********************************************************************
144 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
146 **********************************************************************/
148 igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status)
150 struct e1000_adv_tx_context_desc *TXD;
151 struct adapter *adapter = txr->adapter;
152 u32 vlan_macip_lens, type_tucmd_mlhl;
154 mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0;
156 /* First check if TSO is to be used */
157 if (pi->ipi_csum_flags & CSUM_TSO)
158 return (igb_tso_setup(txr, pi, cmd_type_len, olinfo_status));
160 /* Indicate the whole packet as payload when not doing TSO */
161 *olinfo_status |= pi->ipi_len << E1000_ADVTXD_PAYLEN_SHIFT;
163 /* Now ready a context descriptor */
164 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx];
167 ** In advanced descriptors the vlan tag must
168 ** be placed into the context descriptor. Hence
169 ** we need to make one even if not doing offloads.
171 if (pi->ipi_mflags & M_VLANTAG) {
172 vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT);
173 } else if ((pi->ipi_csum_flags & IGB_CSUM_OFFLOAD) == 0) {
177 /* Set the ether header length */
178 vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
180 switch(pi->ipi_etype) {
182 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
185 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
191 vlan_macip_lens |= pi->ipi_ip_hlen;
192 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
194 switch (pi->ipi_ipproto) {
196 if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) {
197 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
198 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
202 if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) {
203 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
204 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
208 if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) {
209 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
210 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
217 /* 82575 needs the queue index added */
218 if (adapter->hw.mac.type == e1000_82575)
219 mss_l4len_idx = txr->me << 4;
221 /* Now copy bits into descriptor */
222 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
223 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
224 TXD->seqnum_seed = htole32(0);
225 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
231 igb_isc_txd_encap(void *arg, if_pkt_info_t pi)
233 struct adapter *sc = arg;
234 if_softc_ctx_t scctx = sc->shared;
235 struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
236 struct tx_ring *txr = &que->txr;
237 int nsegs = pi->ipi_nsegs;
238 bus_dma_segment_t *segs = pi->ipi_segs;
239 union e1000_adv_tx_desc *txd = NULL;
241 u32 olinfo_status, cmd_type_len, txd_flags;
244 pidx_last = olinfo_status = 0;
245 /* Basic descriptor defines */
246 cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
247 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
249 if (pi->ipi_mflags & M_VLANTAG)
250 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
253 ntxd = scctx->isc_ntxd[0];
254 txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_ADVTXD_DCMD_RS : 0;
255 /* Consume the first descriptor */
256 i += igb_tx_ctx_setup(txr, pi, &cmd_type_len, &olinfo_status);
257 if (i == scctx->isc_ntxd[0])
260 /* 82575 needs the queue index added */
261 if (sc->hw.mac.type == e1000_82575)
262 olinfo_status |= txr->me << 4;
264 for (j = 0; j < nsegs; j++) {
268 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
269 seglen = segs[j].ds_len;
270 segaddr = htole64(segs[j].ds_addr);
272 txd->read.buffer_addr = segaddr;
273 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
274 cmd_type_len | seglen);
275 txd->read.olinfo_status = htole32(olinfo_status);
277 if (++i == scctx->isc_ntxd[0]) {
282 txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
283 txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
284 MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
287 txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | txd_flags);
288 pi->ipi_new_pidx = i;
294 igb_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
296 struct adapter *adapter = arg;
297 struct em_tx_queue *que = &adapter->tx_queues[txqid];
298 struct tx_ring *txr = &que->txr;
300 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx);
304 igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
306 struct adapter *adapter = arg;
307 if_softc_ctx_t scctx = adapter->shared;
308 struct em_tx_queue *que = &adapter->tx_queues[txqid];
309 struct tx_ring *txr = &que->txr;
311 qidx_t processed = 0;
313 qidx_t cur, prev, ntxd, rs_cidx;
317 rs_cidx = txr->tx_rs_cidx;
318 if (rs_cidx == txr->tx_rs_pidx)
320 cur = txr->tx_rsq[rs_cidx];
321 status = ((union e1000_adv_tx_desc *)&txr->tx_base[cur])->wb.status;
322 updated = !!(status & E1000_TXD_STAT_DD);
327 /* If clear is false just let caller know that there
328 * are descriptors to reclaim */
332 prev = txr->tx_cidx_processed;
333 ntxd = scctx->isc_ntxd[0];
336 delta = (int32_t)cur - (int32_t)prev;
343 rs_cidx = (rs_cidx + 1) & (ntxd-1);
344 if (rs_cidx == txr->tx_rs_pidx)
346 cur = txr->tx_rsq[rs_cidx];
347 status = ((union e1000_adv_tx_desc *)&txr->tx_base[cur])->wb.status;
348 } while ((status & E1000_TXD_STAT_DD));
350 txr->tx_rs_cidx = rs_cidx;
351 txr->tx_cidx_processed = prev;
356 igb_isc_rxd_refill(void *arg, if_rxd_update_t iru)
358 struct adapter *sc = arg;
359 if_softc_ctx_t scctx = sc->shared;
360 uint16_t rxqid = iru->iru_qsidx;
361 struct em_rx_queue *que = &sc->rx_queues[rxqid];
362 union e1000_adv_rx_desc *rxd;
363 struct rx_ring *rxr = &que->rxr;
365 uint32_t next_pidx, pidx;
369 paddrs = iru->iru_paddrs;
370 pidx = iru->iru_pidx;
371 count = iru->iru_count;
373 for (i = 0, next_pidx = pidx; i < count; i++) {
374 rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[next_pidx];
376 rxd->read.pkt_addr = htole64(paddrs[i]);
377 if (++next_pidx == scctx->isc_nrxd[0])
383 igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
385 struct adapter *sc = arg;
386 struct em_rx_queue *que = &sc->rx_queues[rxqid];
387 struct rx_ring *rxr = &que->rxr;
389 E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
393 igb_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
395 struct adapter *sc = arg;
396 if_softc_ctx_t scctx = sc->shared;
397 struct em_rx_queue *que = &sc->rx_queues[rxqid];
398 struct rx_ring *rxr = &que->rxr;
399 union e1000_adv_rx_desc *rxd;
403 for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
404 rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[i];
405 staterr = le32toh(rxd->wb.upper.status_error);
407 if ((staterr & E1000_RXD_STAT_DD) == 0)
409 if (++i == scctx->isc_nrxd[0])
411 if (staterr & E1000_RXD_STAT_EOP)
417 /****************************************************************
418 * Routine sends data which has been dma'ed into host memory
419 * to upper layer. Initialize ri structure.
421 * Returns 0 upon success, errno on failure
422 ***************************************************************/
425 igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
427 struct adapter *adapter = arg;
428 if_softc_ctx_t scctx = adapter->shared;
429 struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx];
430 struct rx_ring *rxr = &que->rxr;
431 struct ifnet *ifp = iflib_get_ifp(adapter->ctx);
432 union e1000_adv_rx_desc *rxd;
440 int cidx = ri->iri_cidx;
443 rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[cidx];
444 staterr = le32toh(rxd->wb.upper.status_error);
445 pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info);
447 MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
449 len = le16toh(rxd->wb.upper.length);
450 ptype = le32toh(rxd->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
453 rxr->rx_bytes += ri->iri_len;
455 rxd->wb.upper.status_error = 0;
456 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
458 if (((adapter->hw.mac.type == e1000_i350) ||
459 (adapter->hw.mac.type == e1000_i354)) &&
460 (staterr & E1000_RXDEXT_STATERR_LB))
461 vtag = be16toh(rxd->wb.upper.vlan);
463 vtag = le16toh(rxd->wb.upper.vlan);
465 /* Make sure bad packets are discarded */
466 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
467 adapter->dropped_pkts++;
471 ri->iri_frags[i].irf_flid = 0;
472 ri->iri_frags[i].irf_idx = cidx;
473 ri->iri_frags[i].irf_len = len;
475 if (++cidx == scctx->isc_nrxd[0])
478 if (rxr->hdr_split == TRUE) {
479 ri->iri_frags[i].irf_flid = 1;
480 ri->iri_frags[i].irf_idx = cidx;
481 if (++cidx == scctx->isc_nrxd[0])
490 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
491 igb_rx_checksum(staterr, ri, ptype);
493 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
494 (staterr & E1000_RXD_STAT_VP) != 0) {
496 ri->iri_flags |= M_VLANTAG;
499 le32toh(rxd->wb.lower.hi_dword.rss);
500 ri->iri_rsstype = igb_determine_rsstype(pkt_info);
506 /*********************************************************************
508 * Verify that the hardware indicated that the checksum is valid.
509 * Inform the stack about the status of checksum so that stack
510 * doesn't spend time verifying the checksum.
512 *********************************************************************/
514 igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype)
516 u16 status = (u16)staterr;
517 u8 errors = (u8) (staterr >> 24);
520 /* Ignore Checksum bit is set */
521 if (status & E1000_RXD_STAT_IXSM) {
522 ri->iri_csum_flags = 0;
526 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
527 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
532 if (status & E1000_RXD_STAT_IPCS) {
534 if (!(errors & E1000_RXD_ERR_IPE)) {
535 /* IP Checksum Good */
536 ri->iri_csum_flags = CSUM_IP_CHECKED;
537 ri->iri_csum_flags |= CSUM_IP_VALID;
539 ri->iri_csum_flags = 0;
542 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
543 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
544 if (sctp) /* reassign */
545 type = CSUM_SCTP_VALID;
547 if (!(errors & E1000_RXD_ERR_TCPE)) {
548 ri->iri_csum_flags |= type;
550 ri->iri_csum_data = htons(0xffff);
556 /********************************************************************
558 * Parse the packet type to determine the appropriate hash
560 ******************************************************************/
562 igb_determine_rsstype(u16 pkt_info)
564 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
565 case E1000_RXDADV_RSSTYPE_IPV4_TCP:
566 return M_HASHTYPE_RSS_TCP_IPV4;
567 case E1000_RXDADV_RSSTYPE_IPV4:
568 return M_HASHTYPE_RSS_IPV4;
569 case E1000_RXDADV_RSSTYPE_IPV6_TCP:
570 return M_HASHTYPE_RSS_TCP_IPV6;
571 case E1000_RXDADV_RSSTYPE_IPV6_EX:
572 return M_HASHTYPE_RSS_IPV6_EX;
573 case E1000_RXDADV_RSSTYPE_IPV6:
574 return M_HASHTYPE_RSS_IPV6;
575 case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
576 return M_HASHTYPE_RSS_TCP_IPV6_EX;
578 return M_HASHTYPE_OPAQUE;