2 * Copyright (c) 2016 Matt Macy <mmacy@nextbsd.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <net/rss_config.h>
32 #include <netinet/in_rss.h>
36 #define DPRINTF device_printf
41 /*********************************************************************
42 * Local Function prototypes
43 *********************************************************************/
44 static int igb_isc_txd_encap(void *arg, if_pkt_info_t pi);
45 static void igb_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx);
46 static int igb_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx, bool clear);
48 static void igb_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused,
49 uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buf_len __unused);
50 static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx);
51 static int igb_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx,
53 static int igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
55 static int igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status);
56 static int igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status);
58 static void igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype);
59 static int igb_determine_rsstype(u16 pkt_info);
61 extern void igb_if_enable_intr(if_ctx_t ctx);
62 extern int em_intr(void *arg);
64 struct if_txrx igb_txrx = {
67 igb_isc_txd_credits_update,
68 igb_isc_rxd_available,
75 extern if_shared_ctx_t em_sctx;
77 /**********************************************************************
79 * Setup work for hardware segmentation offload (TSO) on
80 * adapters using advanced tx descriptors
82 **********************************************************************/
84 igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status)
86 struct e1000_adv_tx_context_desc *TXD;
87 struct adapter *adapter = txr->adapter;
88 u32 type_tucmd_mlhl = 0, vlan_macip_lens = 0;
89 u32 mss_l4len_idx = 0;
92 switch(pi->ipi_etype) {
94 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
97 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
98 /* Tell transmit desc to also do IPv4 checksum. */
99 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
102 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
103 __func__, ntohs(pi->ipi_etype));
107 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx];
109 /* This is used in the transmit desc in encap */
110 paylen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen;
112 /* VLAN MACLEN IPLEN */
113 if (pi->ipi_mflags & M_VLANTAG) {
114 vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT);
117 vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
118 vlan_macip_lens |= pi->ipi_ip_hlen;
119 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
121 /* ADV DTYPE TUCMD */
122 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
123 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
124 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
127 mss_l4len_idx |= (pi->ipi_tso_segsz << E1000_ADVTXD_MSS_SHIFT);
128 mss_l4len_idx |= (pi->ipi_tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
129 /* 82575 needs the queue index added */
130 if (adapter->hw.mac.type == e1000_82575)
131 mss_l4len_idx |= txr->me << 4;
132 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
134 TXD->seqnum_seed = htole32(0);
135 *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
136 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
137 *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
142 /*********************************************************************
144 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
146 **********************************************************************/
148 igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status)
150 struct e1000_adv_tx_context_desc *TXD;
151 struct adapter *adapter = txr->adapter;
152 u32 vlan_macip_lens, type_tucmd_mlhl;
154 mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0;
157 /* First check if TSO is to be used */
158 if (pi->ipi_csum_flags & CSUM_TSO)
159 return (igb_tso_setup(txr, pi, cmd_type_len, olinfo_status));
161 /* Indicate the whole packet as payload when not doing TSO */
162 *olinfo_status |= pi->ipi_len << E1000_ADVTXD_PAYLEN_SHIFT;
164 /* Now ready a context descriptor */
165 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx];
168 ** In advanced descriptors the vlan tag must
169 ** be placed into the context descriptor. Hence
170 ** we need to make one even if not doing offloads.
172 if (pi->ipi_mflags & M_VLANTAG) {
173 vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT);
174 } else if ((pi->ipi_csum_flags & IGB_CSUM_OFFLOAD) == 0) {
178 /* Set the ether header length */
179 vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
181 switch(pi->ipi_etype) {
183 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
186 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
193 vlan_macip_lens |= pi->ipi_ip_hlen;
194 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
196 switch (pi->ipi_ipproto) {
198 #if __FreeBSD_version >= 1000000
199 if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
201 if (pi->ipi_csum_flags & CSUM_TCP)
203 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
206 #if __FreeBSD_version >= 1000000
207 if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
209 if (pi->ipi_csum_flags & CSUM_UDP)
211 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
214 #if __FreeBSD_version >= 800000
216 #if __FreeBSD_version >= 1000000
217 if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
219 if (pi->ipi_csum_flags & CSUM_SCTP)
221 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
229 if (offload) /* For the TX descriptor setup */
230 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
232 /* 82575 needs the queue index added */
233 if (adapter->hw.mac.type == e1000_82575)
234 mss_l4len_idx = txr->me << 4;
236 /* Now copy bits into descriptor */
237 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
238 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
239 TXD->seqnum_seed = htole32(0);
240 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
246 igb_isc_txd_encap(void *arg, if_pkt_info_t pi)
248 struct adapter *sc = arg;
249 if_softc_ctx_t scctx = sc->shared;
250 struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
251 struct tx_ring *txr = &que->txr;
252 int nsegs = pi->ipi_nsegs;
253 bus_dma_segment_t *segs = pi->ipi_segs;
254 struct em_txbuffer *txbuf;
255 union e1000_adv_tx_desc *txd = NULL;
257 int i, j, first, pidx_last;
258 u32 olinfo_status, cmd_type_len;
260 pidx_last = olinfo_status = 0;
261 /* Basic descriptor defines */
262 cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
263 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
265 if (pi->ipi_mflags & M_VLANTAG)
266 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
268 first = i = pi->ipi_pidx;
270 /* Consume the first descriptor */
271 i += igb_tx_ctx_setup(txr, pi, &cmd_type_len, &olinfo_status);
272 if (i == scctx->isc_ntxd[0])
275 /* 82575 needs the queue index added */
276 if (sc->hw.mac.type == e1000_82575)
277 olinfo_status |= txr->me << 4;
279 for (j = 0; j < nsegs; j++) {
283 txbuf = &txr->tx_buffers[i];
284 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
285 seglen = segs[j].ds_len;
286 segaddr = htole64(segs[j].ds_addr);
288 txd->read.buffer_addr = segaddr;
289 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
290 cmd_type_len | seglen);
291 txd->read.olinfo_status = htole32(olinfo_status);
293 if (++i == scctx->isc_ntxd[0]) {
298 txd->read.cmd_type_len |=
299 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
301 /* Set the EOP descriptor that will be marked done */
302 txbuf = &txr->tx_buffers[first];
303 txbuf->eop = pidx_last;
305 pi->ipi_new_pidx = i;
311 igb_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx)
313 struct adapter *adapter = arg;
314 struct em_tx_queue *que = &adapter->tx_queues[txqid];
315 struct tx_ring *txr = &que->txr;
317 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx);
321 igb_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx_init, bool clear)
323 struct adapter *adapter = arg;
324 if_softc_ctx_t scctx = adapter->shared;
325 struct em_tx_queue *que = &adapter->tx_queues[txqid];
326 struct tx_ring *txr = &que->txr;
328 u32 cidx, ntxd, processed = 0;
330 struct em_txbuffer *buf;
331 union e1000_adv_tx_desc *txd, *eop;
336 buf = &txr->tx_buffers[cidx];
337 txd = (union e1000_adv_tx_desc *)&txr->tx_base[cidx];
338 ntxd = scctx->isc_ntxd[0];
339 limit = adapter->tx_process_limit;
342 if (buf->eop == -1) /* No work */
345 eop = (union e1000_adv_tx_desc *)&txr->tx_base[buf->eop];
346 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
347 break; /* I/O not complete */
350 buf->eop = -1; /* clear indicate processed */
352 /* We clean the range if multi segment */
357 if (++cidx == scctx->isc_ntxd[0]) {
359 buf = txr->tx_buffers;
360 txd = (union e1000_adv_tx_desc *)txr->tx_base;
363 buf = &txr->tx_buffers[cidx];
370 /* Try the next packet */
374 /* reset with a wrap */
375 if (++cidx == scctx->isc_ntxd[0]) {
377 buf = txr->tx_buffers;
378 txd = (union e1000_adv_tx_desc *)txr->tx_base;
382 } while (__predict_true(--limit) && cidx != cidx_init);
388 igb_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused,
389 uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused,
390 uint16_t count, uint16_t buf_len __unused)
392 struct adapter *sc = arg;
393 if_softc_ctx_t scctx = sc->shared;
394 struct em_rx_queue *que = &sc->rx_queues[rxqid];
395 union e1000_adv_rx_desc *rxd;
396 struct rx_ring *rxr = &que->rxr;
400 for (i = 0, next_pidx = pidx; i < count; i++) {
401 rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[next_pidx];
403 rxd->read.pkt_addr = htole64(paddrs[i]);
404 if (++next_pidx == scctx->isc_nrxd[0])
410 igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx)
412 struct adapter *sc = arg;
413 struct em_rx_queue *que = &sc->rx_queues[rxqid];
414 struct rx_ring *rxr = &que->rxr;
416 E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
420 igb_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget)
422 struct adapter *sc = arg;
423 if_softc_ctx_t scctx = sc->shared;
424 struct em_rx_queue *que = &sc->rx_queues[rxqid];
425 struct rx_ring *rxr = &que->rxr;
426 union e1000_adv_rx_desc *rxd;
430 for (iter = cnt = 0, i = idx; iter < scctx->isc_nrxd[0] && iter <= budget;) {
431 rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[i];
432 staterr = le32toh(rxd->wb.upper.status_error);
434 if ((staterr & E1000_RXD_STAT_DD) == 0)
437 if (++i == scctx->isc_nrxd[0]) {
441 if (staterr & E1000_RXD_STAT_EOP)
446 struct e1000_hw *hw = &sc->hw;
448 rdt = E1000_READ_REG(hw, E1000_RDT(rxr->me));
449 rdh = E1000_READ_REG(hw, E1000_RDH(rxr->me));
450 DPRINTF(iflib_get_dev(sc->ctx), "sidx:%d eidx:%d iter=%d pktcnt=%d RDT=%d RDH=%d\n", idx, i, iter, cnt, rdt, rdh);
455 /****************************************************************
456 * Routine sends data which has been dma'ed into host memory
457 * to upper layer. Initialize ri structure.
459 * Returns 0 upon success, errno on failure
460 ***************************************************************/
463 igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
465 struct adapter *adapter = arg;
466 if_softc_ctx_t scctx = adapter->shared;
467 struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx];
468 struct rx_ring *rxr = &que->rxr;
469 struct ifnet *ifp = iflib_get_ifp(adapter->ctx);
470 union e1000_adv_rx_desc *rxd;
478 int cidx = ri->iri_cidx;
481 rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[cidx];
482 staterr = le32toh(rxd->wb.upper.status_error);
483 pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info);
485 MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
487 len = le16toh(rxd->wb.upper.length);
488 ptype = le32toh(rxd->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
491 rxr->rx_bytes += ri->iri_len;
493 rxd->wb.upper.status_error = 0;
494 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
496 if (((adapter->hw.mac.type == e1000_i350) ||
497 (adapter->hw.mac.type == e1000_i354)) &&
498 (staterr & E1000_RXDEXT_STATERR_LB))
499 vtag = be16toh(rxd->wb.upper.vlan);
501 vtag = le16toh(rxd->wb.upper.vlan);
503 /* Make sure bad packets are discarded */
504 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
505 adapter->dropped_pkts++;
509 ri->iri_frags[i].irf_flid = 0;
510 ri->iri_frags[i].irf_idx = cidx;
511 ri->iri_frags[i].irf_len = len;
513 if (++cidx == scctx->isc_nrxd[0])
516 if (rxr->hdr_split == TRUE) {
517 ri->iri_frags[i].irf_flid = 1;
518 ri->iri_frags[i].irf_idx = cidx;
519 if (++cidx == scctx->isc_nrxd[0])
528 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
529 igb_rx_checksum(staterr, ri, ptype);
531 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
532 (staterr & E1000_RXD_STAT_VP) != 0) {
534 ri->iri_flags |= M_VLANTAG;
537 le32toh(rxd->wb.lower.hi_dword.rss);
538 ri->iri_rsstype = igb_determine_rsstype(pkt_info);
544 /*********************************************************************
546 * Verify that the hardware indicated that the checksum is valid.
547 * Inform the stack about the status of checksum so that stack
548 * doesn't spend time verifying the checksum.
550 *********************************************************************/
552 igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype)
554 u16 status = (u16)staterr;
555 u8 errors = (u8) (staterr >> 24);
558 /* Ignore Checksum bit is set */
559 if (status & E1000_RXD_STAT_IXSM) {
560 ri->iri_csum_flags = 0;
564 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
565 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
570 if (status & E1000_RXD_STAT_IPCS) {
572 if (!(errors & E1000_RXD_ERR_IPE)) {
573 /* IP Checksum Good */
574 ri->iri_csum_flags = CSUM_IP_CHECKED;
575 ri->iri_csum_flags |= CSUM_IP_VALID;
577 ri->iri_csum_flags = 0;
580 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
581 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
582 #if __FreeBSD_version >= 800000
583 if (sctp) /* reassign */
584 type = CSUM_SCTP_VALID;
587 if (!(errors & E1000_RXD_ERR_TCPE)) {
588 ri->iri_csum_flags |= type;
590 ri->iri_csum_data = htons(0xffff);
596 /********************************************************************
598 * Parse the packet type to determine the appropriate hash
600 ******************************************************************/
602 igb_determine_rsstype(u16 pkt_info)
604 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
605 case E1000_RXDADV_RSSTYPE_IPV4_TCP:
606 return M_HASHTYPE_RSS_TCP_IPV4;
607 case E1000_RXDADV_RSSTYPE_IPV4:
608 return M_HASHTYPE_RSS_IPV4;
609 case E1000_RXDADV_RSSTYPE_IPV6_TCP:
610 return M_HASHTYPE_RSS_TCP_IPV6;
611 case E1000_RXDADV_RSSTYPE_IPV6_EX:
612 return M_HASHTYPE_RSS_IPV6_EX;
613 case E1000_RXDADV_RSSTYPE_IPV6:
614 return M_HASHTYPE_RSS_IPV6;
615 case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
616 return M_HASHTYPE_RSS_TCP_IPV6_EX;
618 return M_HASHTYPE_OPAQUE;