1 /******************************************************************************
3 Copyright (c) 2001-2010, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
53 #include <sys/module.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
62 #include <machine/smp.h>
63 #include <machine/bus.h>
64 #include <machine/resource.h>
67 #include <net/ethernet.h>
69 #include <net/if_arp.h>
70 #include <net/if_dl.h>
71 #include <net/if_media.h>
73 #include <net/if_types.h>
74 #include <net/if_vlan_var.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip6.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_lro.h>
83 #include <netinet/udp.h>
85 #include <machine/in_cksum.h>
86 #include <dev/led/led.h>
87 #include <dev/pci/pcivar.h>
88 #include <dev/pci/pcireg.h>
90 #include "e1000_api.h"
91 #include "e1000_82575.h"
94 /*********************************************************************
95 * Set this to one to display debug statistics
96 *********************************************************************/
97 int igb_display_debug_stats = 0;
99 /*********************************************************************
101 *********************************************************************/
102 char igb_driver_version[] = "version - 1.9.5";
105 /*********************************************************************
106 * PCI Device ID Table
108 * Used by probe to select devices to load on
109 * Last field stores an index into e1000_strings
110 * Last entry must be all 0s
112 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113 *********************************************************************/
115 static igb_vendor_info_t igb_vendor_info_array[] =
117 { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
118 { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119 PCI_ANY_ID, PCI_ANY_ID, 0},
120 { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121 PCI_ANY_ID, PCI_ANY_ID, 0},
122 { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0},
123 { 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0},
124 { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
125 { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
126 { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
127 { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128 PCI_ANY_ID, PCI_ANY_ID, 0},
129 { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130 PCI_ANY_ID, PCI_ANY_ID, 0},
131 { 0x8086, E1000_DEV_ID_82580_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82580_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82580_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
134 { 0x8086, E1000_DEV_ID_82580_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
135 { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
136 PCI_ANY_ID, PCI_ANY_ID, 0},
137 /* required last entry */
141 /*********************************************************************
142 * Table of branding strings for all supported NICs.
143 *********************************************************************/
145 static char *igb_strings[] = {
146 "Intel(R) PRO/1000 Network Connection"
149 /*********************************************************************
150 * Function prototypes
151 *********************************************************************/
152 static int igb_probe(device_t);
153 static int igb_attach(device_t);
154 static int igb_detach(device_t);
155 static int igb_shutdown(device_t);
156 static int igb_suspend(device_t);
157 static int igb_resume(device_t);
158 static void igb_start(struct ifnet *);
159 static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
160 #if __FreeBSD_version >= 800000
161 static int igb_mq_start(struct ifnet *, struct mbuf *);
162 static int igb_mq_start_locked(struct ifnet *,
163 struct tx_ring *, struct mbuf *);
164 static void igb_qflush(struct ifnet *);
166 static int igb_ioctl(struct ifnet *, u_long, caddr_t);
167 static void igb_init(void *);
168 static void igb_init_locked(struct adapter *);
169 static void igb_stop(void *);
170 static void igb_media_status(struct ifnet *, struct ifmediareq *);
171 static int igb_media_change(struct ifnet *);
172 static void igb_identify_hardware(struct adapter *);
173 static int igb_allocate_pci_resources(struct adapter *);
174 static int igb_allocate_msix(struct adapter *);
175 static int igb_allocate_legacy(struct adapter *);
176 static int igb_setup_msix(struct adapter *);
177 static void igb_free_pci_resources(struct adapter *);
178 static void igb_local_timer(void *);
179 static void igb_reset(struct adapter *);
180 static void igb_setup_interface(device_t, struct adapter *);
181 static int igb_allocate_queues(struct adapter *);
182 static void igb_configure_queues(struct adapter *);
184 static int igb_allocate_transmit_buffers(struct tx_ring *);
185 static void igb_setup_transmit_structures(struct adapter *);
186 static void igb_setup_transmit_ring(struct tx_ring *);
187 static void igb_initialize_transmit_units(struct adapter *);
188 static void igb_free_transmit_structures(struct adapter *);
189 static void igb_free_transmit_buffers(struct tx_ring *);
191 static int igb_allocate_receive_buffers(struct rx_ring *);
192 static int igb_setup_receive_structures(struct adapter *);
193 static int igb_setup_receive_ring(struct rx_ring *);
194 static void igb_initialize_receive_units(struct adapter *);
195 static void igb_free_receive_structures(struct adapter *);
196 static void igb_free_receive_buffers(struct rx_ring *);
197 static void igb_free_receive_ring(struct rx_ring *);
199 static void igb_enable_intr(struct adapter *);
200 static void igb_disable_intr(struct adapter *);
201 static void igb_update_stats_counters(struct adapter *);
202 static bool igb_txeof(struct tx_ring *);
204 static __inline void igb_rx_discard(struct rx_ring *, int);
205 static __inline void igb_rx_input(struct rx_ring *,
206 struct ifnet *, struct mbuf *, u32);
208 static bool igb_rxeof(struct igb_queue *, int);
209 static void igb_rx_checksum(u32, struct mbuf *, u32);
210 static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
211 static bool igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
212 static void igb_set_promisc(struct adapter *);
213 static void igb_disable_promisc(struct adapter *);
214 static void igb_set_multi(struct adapter *);
215 static void igb_print_hw_stats(struct adapter *);
216 static void igb_update_link_status(struct adapter *);
217 static void igb_refresh_mbufs(struct rx_ring *, int);
219 static void igb_register_vlan(void *, struct ifnet *, u16);
220 static void igb_unregister_vlan(void *, struct ifnet *, u16);
221 static void igb_setup_vlan_hw_support(struct adapter *);
223 static int igb_xmit(struct tx_ring *, struct mbuf **);
224 static int igb_dma_malloc(struct adapter *, bus_size_t,
225 struct igb_dma_alloc *, int);
226 static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
227 static void igb_print_debug_info(struct adapter *);
228 static void igb_print_nvm_info(struct adapter *);
229 static int igb_is_valid_ether_addr(u8 *);
230 static int igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
231 static int igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
232 /* Management and WOL Support */
233 static void igb_init_manageability(struct adapter *);
234 static void igb_release_manageability(struct adapter *);
235 static void igb_get_hw_control(struct adapter *);
236 static void igb_release_hw_control(struct adapter *);
237 static void igb_enable_wakeup(device_t);
238 static void igb_led_func(void *, int);
240 static int igb_irq_fast(void *);
241 static void igb_add_rx_process_limit(struct adapter *, const char *,
242 const char *, int *, int);
243 static void igb_handle_rxtx(void *context, int pending);
244 static void igb_handle_que(void *context, int pending);
245 static void igb_handle_link(void *context, int pending);
247 /* These are MSIX only irq handlers */
248 static void igb_msix_que(void *);
249 static void igb_msix_link(void *);
251 #ifdef DEVICE_POLLING
252 static poll_handler_t igb_poll;
255 /*********************************************************************
256 * FreeBSD Device Interface Entry Points
257 *********************************************************************/
259 static device_method_t igb_methods[] = {
260 /* Device interface */
261 DEVMETHOD(device_probe, igb_probe),
262 DEVMETHOD(device_attach, igb_attach),
263 DEVMETHOD(device_detach, igb_detach),
264 DEVMETHOD(device_shutdown, igb_shutdown),
265 DEVMETHOD(device_suspend, igb_suspend),
266 DEVMETHOD(device_resume, igb_resume),
270 static driver_t igb_driver = {
271 "igb", igb_methods, sizeof(struct adapter),
274 static devclass_t igb_devclass;
275 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
276 MODULE_DEPEND(igb, pci, 1, 1, 1);
277 MODULE_DEPEND(igb, ether, 1, 1, 1);
279 /*********************************************************************
280 * Tunable default values.
281 *********************************************************************/
283 /* Descriptor defaults */
284 static int igb_rxd = IGB_DEFAULT_RXD;
285 static int igb_txd = IGB_DEFAULT_TXD;
286 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
287 TUNABLE_INT("hw.igb.txd", &igb_txd);
290 ** AIM: Adaptive Interrupt Moderation
291 ** which means that the interrupt rate
292 ** is varied over time based on the
293 ** traffic for that interrupt vector
295 static int igb_enable_aim = TRUE;
296 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
299 * MSIX should be the default for best performance,
300 * but this allows it to be forced off for testing.
302 static int igb_enable_msix = 1;
303 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
306 * Header split has seemed to be beneficial in
307 * many circumstances tested, however there have
308 * been some stability issues, so the default is
311 static bool igb_header_split = FALSE;
312 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
315 ** This will autoconfigure based on
316 ** the number of CPUs if left at 0.
318 static int igb_num_queues = 0;
319 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
321 /* How many packets rxeof tries to clean at a time */
322 static int igb_rx_process_limit = 100;
323 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
325 /* Flow control setting - default to FULL */
326 static int igb_fc_setting = e1000_fc_full;
327 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
330 ** Shadow VFTA table, this is needed because
331 ** the real filter table gets cleared during
332 ** a soft reset and the driver needs to be able
335 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
338 /*********************************************************************
339 * Device identification routine
341 * igb_probe determines if the driver should be loaded on
342 * adapter based on PCI vendor/device id of the adapter.
344 * return BUS_PROBE_DEFAULT on success, positive on failure
345 *********************************************************************/
348 igb_probe(device_t dev)
350 char adapter_name[60];
351 uint16_t pci_vendor_id = 0;
352 uint16_t pci_device_id = 0;
353 uint16_t pci_subvendor_id = 0;
354 uint16_t pci_subdevice_id = 0;
355 igb_vendor_info_t *ent;
357 INIT_DEBUGOUT("igb_probe: begin");
359 pci_vendor_id = pci_get_vendor(dev);
360 if (pci_vendor_id != IGB_VENDOR_ID)
363 pci_device_id = pci_get_device(dev);
364 pci_subvendor_id = pci_get_subvendor(dev);
365 pci_subdevice_id = pci_get_subdevice(dev);
367 ent = igb_vendor_info_array;
368 while (ent->vendor_id != 0) {
369 if ((pci_vendor_id == ent->vendor_id) &&
370 (pci_device_id == ent->device_id) &&
372 ((pci_subvendor_id == ent->subvendor_id) ||
373 (ent->subvendor_id == PCI_ANY_ID)) &&
375 ((pci_subdevice_id == ent->subdevice_id) ||
376 (ent->subdevice_id == PCI_ANY_ID))) {
377 sprintf(adapter_name, "%s %s",
378 igb_strings[ent->index],
380 device_set_desc_copy(dev, adapter_name);
381 return (BUS_PROBE_DEFAULT);
389 /*********************************************************************
390 * Device initialization routine
392 * The attach entry point is called when the driver is being loaded.
393 * This routine identifies the type of hardware, allocates all resources
394 * and initializes the hardware.
396 * return 0 on success, positive on failure
397 *********************************************************************/
400 igb_attach(device_t dev)
402 struct adapter *adapter;
406 INIT_DEBUGOUT("igb_attach: begin");
408 adapter = device_get_softc(dev);
409 adapter->dev = adapter->osdep.dev = dev;
410 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
413 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416 igb_sysctl_debug_info, "I", "Debug Information");
418 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
419 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420 OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
421 igb_sysctl_stats, "I", "Statistics");
423 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
424 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
425 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
426 &igb_fc_setting, 0, "Flow Control");
428 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430 OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
431 &igb_enable_aim, 1, "Interrupt Moderation");
433 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
435 /* Determine hardware and mac info */
436 igb_identify_hardware(adapter);
438 /* Setup PCI resources */
439 if (igb_allocate_pci_resources(adapter)) {
440 device_printf(dev, "Allocation of PCI resources failed\n");
445 /* Do Shared Code initialization */
446 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447 device_printf(dev, "Setup of Shared code failed\n");
452 e1000_get_bus_info(&adapter->hw);
454 /* Sysctls for limiting the amount of work done in the taskqueue */
455 igb_add_rx_process_limit(adapter, "rx_processing_limit",
456 "max number of rx packets to process", &adapter->rx_process_limit,
457 igb_rx_process_limit);
460 * Validate number of transmit and receive descriptors. It
461 * must not exceed hardware maximum, and must be multiple
462 * of E1000_DBA_ALIGN.
464 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467 IGB_DEFAULT_TXD, igb_txd);
468 adapter->num_tx_desc = IGB_DEFAULT_TXD;
470 adapter->num_tx_desc = igb_txd;
471 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474 IGB_DEFAULT_RXD, igb_rxd);
475 adapter->num_rx_desc = IGB_DEFAULT_RXD;
477 adapter->num_rx_desc = igb_rxd;
479 adapter->hw.mac.autoneg = DO_AUTO_NEG;
480 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
484 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485 adapter->hw.phy.mdix = AUTO_ALL_MODES;
486 adapter->hw.phy.disable_polarity_correction = FALSE;
487 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
491 * Set the frame limits assuming
492 * standard ethernet sized frames.
494 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495 adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
498 ** Allocate and Setup Queues
500 if (igb_allocate_queues(adapter)) {
506 ** Start from a known state, this is
507 ** important in reading the nvm and
510 e1000_reset_hw(&adapter->hw);
512 /* Make sure we have a good EEPROM before we read from it */
513 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
515 ** Some PCI-E parts fail the first check due to
516 ** the link being in sleep state, call it again,
517 ** if it fails a second time its a real issue.
519 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
521 "The EEPROM Checksum Is Not Valid\n");
528 ** Copy the permanent MAC address out of the EEPROM
530 if (e1000_read_mac_addr(&adapter->hw) < 0) {
531 device_printf(dev, "EEPROM read error while reading MAC"
536 /* Check its sanity */
537 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538 device_printf(dev, "Invalid MAC address\n");
544 ** Configure Interrupts
546 if ((adapter->msix > 1) && (igb_enable_msix))
547 error = igb_allocate_msix(adapter);
548 else /* MSI or Legacy */
549 error = igb_allocate_legacy(adapter);
553 /* Setup OS specific network interface */
554 igb_setup_interface(dev, adapter);
556 /* Now get a good starting state */
559 /* Initialize statistics */
560 igb_update_stats_counters(adapter);
562 adapter->hw.mac.get_link_status = 1;
563 igb_update_link_status(adapter);
565 /* Indicate SOL/IDER usage */
566 if (e1000_check_reset_block(&adapter->hw))
568 "PHY reset is blocked due to SOL/IDER session.\n");
570 /* Determine if we have to control management hardware */
571 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
576 /* APME bit in EEPROM is mapped to WUC.APME */
577 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
579 adapter->wol = E1000_WUFC_MAG;
581 /* Register for VLAN events */
582 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
583 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
584 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
585 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
587 /* Tell the stack that the interface is not active */
588 adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
590 adapter->led_dev = led_create(igb_led_func, adapter,
591 device_get_nameunit(dev));
593 INIT_DEBUGOUT("igb_attach: end");
598 igb_free_transmit_structures(adapter);
599 igb_free_receive_structures(adapter);
600 igb_release_hw_control(adapter);
602 igb_free_pci_resources(adapter);
603 IGB_CORE_LOCK_DESTROY(adapter);
608 /*********************************************************************
609 * Device removal routine
611 * The detach entry point is called when the driver is being removed.
612 * This routine stops the adapter and deallocates all the resources
613 * that were allocated for driver operation.
615 * return 0 on success, positive on failure
616 *********************************************************************/
619 igb_detach(device_t dev)
621 struct adapter *adapter = device_get_softc(dev);
622 struct ifnet *ifp = adapter->ifp;
624 INIT_DEBUGOUT("igb_detach: begin");
626 /* Make sure VLANS are not using driver */
627 if (adapter->ifp->if_vlantrunk != NULL) {
628 device_printf(dev,"Vlan in use, detach first\n");
632 if (adapter->led_dev != NULL)
633 led_destroy(adapter->led_dev);
635 #ifdef DEVICE_POLLING
636 if (ifp->if_capenable & IFCAP_POLLING)
637 ether_poll_deregister(ifp);
640 IGB_CORE_LOCK(adapter);
641 adapter->in_detach = 1;
643 IGB_CORE_UNLOCK(adapter);
645 e1000_phy_hw_reset(&adapter->hw);
647 /* Give control back to firmware */
648 igb_release_manageability(adapter);
649 igb_release_hw_control(adapter);
652 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
653 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
654 igb_enable_wakeup(dev);
657 /* Unregister VLAN events */
658 if (adapter->vlan_attach != NULL)
659 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
660 if (adapter->vlan_detach != NULL)
661 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
663 ether_ifdetach(adapter->ifp);
665 callout_drain(&adapter->timer);
667 igb_free_pci_resources(adapter);
668 bus_generic_detach(dev);
671 igb_free_transmit_structures(adapter);
672 igb_free_receive_structures(adapter);
674 IGB_CORE_LOCK_DESTROY(adapter);
679 /*********************************************************************
681 * Shutdown entry point
683 **********************************************************************/
686 igb_shutdown(device_t dev)
688 return igb_suspend(dev);
692 * Suspend/resume device methods.
695 igb_suspend(device_t dev)
697 struct adapter *adapter = device_get_softc(dev);
699 IGB_CORE_LOCK(adapter);
703 igb_release_manageability(adapter);
704 igb_release_hw_control(adapter);
707 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
708 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
709 igb_enable_wakeup(dev);
712 IGB_CORE_UNLOCK(adapter);
714 return bus_generic_suspend(dev);
718 igb_resume(device_t dev)
720 struct adapter *adapter = device_get_softc(dev);
721 struct ifnet *ifp = adapter->ifp;
723 IGB_CORE_LOCK(adapter);
724 igb_init_locked(adapter);
725 igb_init_manageability(adapter);
727 if ((ifp->if_flags & IFF_UP) &&
728 (ifp->if_drv_flags & IFF_DRV_RUNNING))
731 IGB_CORE_UNLOCK(adapter);
733 return bus_generic_resume(dev);
737 /*********************************************************************
738 * Transmit entry point
740 * igb_start is called by the stack to initiate a transmit.
741 * The driver will remain in this routine as long as there are
742 * packets to transmit and transmit resources are available.
743 * In case resources are not available stack is notified and
744 * the packet is requeued.
745 **********************************************************************/
748 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
750 struct adapter *adapter = ifp->if_softc;
753 IGB_TX_LOCK_ASSERT(txr);
755 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
758 if (!adapter->link_active)
761 /* Call cleanup if number of TX descriptors low */
762 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
765 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
766 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
767 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
770 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
774 * Encapsulation can modify our pointer, and or make it
775 * NULL on failure. In that event, we can't requeue.
777 if (igb_xmit(txr, &m_head)) {
780 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
781 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
785 /* Send a copy of the frame to the BPF listener */
786 ETHER_BPF_MTAP(ifp, m_head);
788 /* Set watchdog on */
789 txr->watchdog_time = ticks;
790 txr->watchdog_check = TRUE;
795 * Legacy TX driver routine, called from the
796 * stack, always uses tx[0], and spins for it.
797 * Should not be used with multiqueue tx
800 igb_start(struct ifnet *ifp)
802 struct adapter *adapter = ifp->if_softc;
803 struct tx_ring *txr = adapter->tx_rings;
805 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
807 igb_start_locked(txr, ifp);
813 #if __FreeBSD_version >= 800000
815 ** Multiqueue Transmit driver
819 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
821 struct adapter *adapter = ifp->if_softc;
825 /* Which queue to use */
826 if ((m->m_flags & M_FLOWID) != 0)
827 i = m->m_pkthdr.flowid % adapter->num_queues;
829 txr = &adapter->tx_rings[i];
831 if (IGB_TX_TRYLOCK(txr)) {
832 err = igb_mq_start_locked(ifp, txr, m);
835 err = drbr_enqueue(ifp, txr->br, m);
841 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
843 struct adapter *adapter = txr->adapter;
847 IGB_TX_LOCK_ASSERT(txr);
849 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
850 IFF_DRV_RUNNING || adapter->link_active == 0) {
852 err = drbr_enqueue(ifp, txr->br, m);
856 /* Call cleanup if number of TX descriptors low */
857 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
862 next = drbr_dequeue(ifp, txr->br);
863 } else if (drbr_needs_enqueue(ifp, txr->br)) {
864 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
866 next = drbr_dequeue(ifp, txr->br);
870 /* Process the queue */
871 while (next != NULL) {
872 if ((err = igb_xmit(txr, &next)) != 0) {
874 err = drbr_enqueue(ifp, txr->br, next);
878 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
879 ETHER_BPF_MTAP(ifp, next);
880 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
882 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
883 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
886 next = drbr_dequeue(ifp, txr->br);
889 /* Set the watchdog */
890 txr->watchdog_check = TRUE;
891 txr->watchdog_time = ticks;
897 ** Flush all ring buffers
900 igb_qflush(struct ifnet *ifp)
902 struct adapter *adapter = ifp->if_softc;
903 struct tx_ring *txr = adapter->tx_rings;
906 for (int i = 0; i < adapter->num_queues; i++, txr++) {
908 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
914 #endif /* __FreeBSD_version >= 800000 */
916 /*********************************************************************
919 * igb_ioctl is called when the user wants to configure the
922 * return 0 on success, positive on failure
923 **********************************************************************/
926 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
928 struct adapter *adapter = ifp->if_softc;
929 struct ifreq *ifr = (struct ifreq *)data;
931 struct ifaddr *ifa = (struct ifaddr *)data;
935 if (adapter->in_detach)
941 if (ifa->ifa_addr->sa_family == AF_INET) {
944 * Since resetting hardware takes a very long time
945 * and results in link renegotiation we only
946 * initialize the hardware only when it is absolutely
949 ifp->if_flags |= IFF_UP;
950 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
951 IGB_CORE_LOCK(adapter);
952 igb_init_locked(adapter);
953 IGB_CORE_UNLOCK(adapter);
955 if (!(ifp->if_flags & IFF_NOARP))
956 arp_ifinit(ifp, ifa);
959 error = ether_ioctl(ifp, command, data);
965 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
967 IGB_CORE_LOCK(adapter);
968 max_frame_size = 9234;
969 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
971 IGB_CORE_UNLOCK(adapter);
976 ifp->if_mtu = ifr->ifr_mtu;
977 adapter->max_frame_size =
978 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
979 igb_init_locked(adapter);
980 IGB_CORE_UNLOCK(adapter);
984 IOCTL_DEBUGOUT("ioctl rcv'd:\
985 SIOCSIFFLAGS (Set Interface Flags)");
986 IGB_CORE_LOCK(adapter);
987 if (ifp->if_flags & IFF_UP) {
988 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
989 if ((ifp->if_flags ^ adapter->if_flags) &
990 (IFF_PROMISC | IFF_ALLMULTI)) {
991 igb_disable_promisc(adapter);
992 igb_set_promisc(adapter);
995 igb_init_locked(adapter);
997 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
999 adapter->if_flags = ifp->if_flags;
1000 IGB_CORE_UNLOCK(adapter);
1004 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1005 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1006 IGB_CORE_LOCK(adapter);
1007 igb_disable_intr(adapter);
1008 igb_set_multi(adapter);
1009 #ifdef DEVICE_POLLING
1010 if (!(ifp->if_capenable & IFCAP_POLLING))
1012 igb_enable_intr(adapter);
1013 IGB_CORE_UNLOCK(adapter);
1017 /* Check SOL/IDER usage */
1018 IGB_CORE_LOCK(adapter);
1019 if (e1000_check_reset_block(&adapter->hw)) {
1020 IGB_CORE_UNLOCK(adapter);
1021 device_printf(adapter->dev, "Media change is"
1022 " blocked due to SOL/IDER session.\n");
1025 IGB_CORE_UNLOCK(adapter);
1027 IOCTL_DEBUGOUT("ioctl rcv'd: \
1028 SIOCxIFMEDIA (Get/Set Interface Media)");
1029 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1035 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1037 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1038 #ifdef DEVICE_POLLING
1039 if (mask & IFCAP_POLLING) {
1040 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1041 error = ether_poll_register(igb_poll, ifp);
1044 IGB_CORE_LOCK(adapter);
1045 igb_disable_intr(adapter);
1046 ifp->if_capenable |= IFCAP_POLLING;
1047 IGB_CORE_UNLOCK(adapter);
1049 error = ether_poll_deregister(ifp);
1050 /* Enable interrupt even in error case */
1051 IGB_CORE_LOCK(adapter);
1052 igb_enable_intr(adapter);
1053 ifp->if_capenable &= ~IFCAP_POLLING;
1054 IGB_CORE_UNLOCK(adapter);
1058 if (mask & IFCAP_HWCSUM) {
1059 ifp->if_capenable ^= IFCAP_HWCSUM;
1062 if (mask & IFCAP_TSO4) {
1063 ifp->if_capenable ^= IFCAP_TSO4;
1066 if (mask & IFCAP_VLAN_HWTAGGING) {
1067 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1070 if (mask & IFCAP_VLAN_HWFILTER) {
1071 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1074 if (mask & IFCAP_LRO) {
1075 ifp->if_capenable ^= IFCAP_LRO;
1078 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1080 VLAN_CAPABILITIES(ifp);
1085 error = ether_ioctl(ifp, command, data);
1093 /*********************************************************************
1096 * This routine is used in two ways. It is used by the stack as
1097 * init entry point in network interface structure. It is also used
1098 * by the driver as a hw/sw initialization routine to get to a
1101 * return 0 on success, positive on failure
1102 **********************************************************************/
1105 igb_init_locked(struct adapter *adapter)
1107 struct ifnet *ifp = adapter->ifp;
1108 device_t dev = adapter->dev;
1110 INIT_DEBUGOUT("igb_init: begin");
1112 IGB_CORE_LOCK_ASSERT(adapter);
1114 igb_disable_intr(adapter);
1115 callout_stop(&adapter->timer);
1117 /* Get the latest mac address, User can use a LAA */
1118 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1121 /* Put the address into the Receive Address Array */
1122 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1125 igb_update_link_status(adapter);
1127 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1129 /* Use real VLAN Filter support? */
1130 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1131 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1132 /* Use real VLAN Filter support */
1133 igb_setup_vlan_hw_support(adapter);
1136 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1137 ctrl |= E1000_CTRL_VME;
1138 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1142 /* Set hardware offload abilities */
1143 ifp->if_hwassist = 0;
1144 if (ifp->if_capenable & IFCAP_TXCSUM) {
1145 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1146 #if __FreeBSD_version >= 800000
1147 if (adapter->hw.mac.type == e1000_82576)
1148 ifp->if_hwassist |= CSUM_SCTP;
1152 if (ifp->if_capenable & IFCAP_TSO4)
1153 ifp->if_hwassist |= CSUM_TSO;
1155 /* Configure for OS presence */
1156 igb_init_manageability(adapter);
1158 /* Prepare transmit descriptors and buffers */
1159 igb_setup_transmit_structures(adapter);
1160 igb_initialize_transmit_units(adapter);
1162 /* Setup Multicast table */
1163 igb_set_multi(adapter);
1166 ** Figure out the desired mbuf pool
1167 ** for doing jumbo/packetsplit
1169 if (ifp->if_mtu > ETHERMTU)
1170 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1172 adapter->rx_mbuf_sz = MCLBYTES;
1174 /* Prepare receive descriptors and buffers */
1175 if (igb_setup_receive_structures(adapter)) {
1176 device_printf(dev, "Could not setup receive structures\n");
1179 igb_initialize_receive_units(adapter);
1181 /* Don't lose promiscuous settings */
1182 igb_set_promisc(adapter);
1184 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1185 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1187 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1188 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1190 if (adapter->msix > 1) /* Set up queue routing */
1191 igb_configure_queues(adapter);
1193 /* Set up VLAN tag offload and filter */
1194 igb_setup_vlan_hw_support(adapter);
1196 /* this clears any pending interrupts */
1197 E1000_READ_REG(&adapter->hw, E1000_ICR);
1198 #ifdef DEVICE_POLLING
1200 * Only enable interrupts if we are not polling, make sure
1201 * they are off otherwise.
1203 if (ifp->if_capenable & IFCAP_POLLING)
1204 igb_disable_intr(adapter);
1206 #endif /* DEVICE_POLLING */
1208 igb_enable_intr(adapter);
1209 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1212 /* Don't reset the phy next time init gets called */
1213 adapter->hw.phy.reset_disable = TRUE;
1219 struct adapter *adapter = arg;
1221 IGB_CORE_LOCK(adapter);
1222 igb_init_locked(adapter);
1223 IGB_CORE_UNLOCK(adapter);
1228 igb_handle_rxtx(void *context, int pending)
1230 struct igb_queue *que = context;
1231 struct adapter *adapter = que->adapter;
1232 struct tx_ring *txr = adapter->tx_rings;
1237 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1238 if (igb_rxeof(que, adapter->rx_process_limit))
1239 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1243 #if __FreeBSD_version >= 800000
1244 if (!drbr_empty(ifp, txr->br))
1245 igb_mq_start_locked(ifp, txr, NULL);
1247 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1248 igb_start_locked(txr, ifp);
1253 igb_enable_intr(adapter);
1257 igb_handle_que(void *context, int pending)
1259 struct igb_queue *que = context;
1260 struct adapter *adapter = que->adapter;
1261 struct tx_ring *txr = que->txr;
1262 struct ifnet *ifp = adapter->ifp;
1265 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1266 more = igb_rxeof(que, -1);
1270 #if __FreeBSD_version >= 800000
1271 igb_mq_start_locked(ifp, txr, NULL);
1273 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1274 igb_start_locked(txr, ifp);
1278 taskqueue_enqueue(que->tq, &que->que_task);
1283 /* Reenable this interrupt */
1284 #ifdef DEVICE_POLLING
1285 if (!(ifp->if_capenable & IFCAP_POLLING))
1287 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1290 /* Deal with link in a sleepable context */
1292 igb_handle_link(void *context, int pending)
1294 struct adapter *adapter = context;
1296 adapter->hw.mac.get_link_status = 1;
1297 igb_update_link_status(adapter);
1300 /*********************************************************************
1302 * MSI/Legacy Deferred
1303 * Interrupt Service routine
1305 *********************************************************************/
1307 igb_irq_fast(void *arg)
1309 struct adapter *adapter = arg;
1313 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1316 if (reg_icr == 0xffffffff)
1317 return FILTER_STRAY;
1319 /* Definitely not our interrupt. */
1321 return FILTER_STRAY;
1323 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1324 return FILTER_STRAY;
1327 * Mask interrupts until the taskqueue is finished running. This is
1328 * cheap, just assume that it is needed. This also works around the
1329 * MSI message reordering errata on certain systems.
1331 igb_disable_intr(adapter);
1332 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1334 /* Link status change */
1335 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1336 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1338 if (reg_icr & E1000_ICR_RXO)
1339 adapter->rx_overruns++;
1340 return FILTER_HANDLED;
1343 #ifdef DEVICE_POLLING
1344 /*********************************************************************
1346 * Legacy polling routine : if using this code you MUST be sure that
1347 * multiqueue is not defined, ie, set igb_num_queues to 1.
1349 *********************************************************************/
1350 #if __FreeBSD_version >= 800000
1351 #define POLL_RETURN_COUNT(a) (a)
1354 #define POLL_RETURN_COUNT(a)
1357 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1359 struct adapter *adapter = ifp->if_softc;
1360 struct igb_queue *que = adapter->queues;
1361 struct tx_ring *txr = adapter->tx_rings;
1362 u32 reg_icr, rx_done = 0;
1363 u32 loop = IGB_MAX_LOOP;
1366 IGB_CORE_LOCK(adapter);
1367 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1368 IGB_CORE_UNLOCK(adapter);
1369 return POLL_RETURN_COUNT(rx_done);
1372 if (cmd == POLL_AND_CHECK_STATUS) {
1373 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1374 /* Link status change */
1375 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1376 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1378 if (reg_icr & E1000_ICR_RXO)
1379 adapter->rx_overruns++;
1381 IGB_CORE_UNLOCK(adapter);
1383 /* TODO: rx_count */
1384 rx_done = igb_rxeof(que, count) ? 1 : 0;
1388 more = igb_txeof(txr);
1389 } while (loop-- && more);
1390 #if __FreeBSD_version >= 800000
1391 if (!drbr_empty(ifp, txr->br))
1392 igb_mq_start_locked(ifp, txr, NULL);
1394 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1395 igb_start_locked(txr, ifp);
1398 return POLL_RETURN_COUNT(rx_done);
1400 #endif /* DEVICE_POLLING */
1402 /*********************************************************************
1404 * MSIX TX Interrupt Service routine
1406 **********************************************************************/
1408 igb_msix_que(void *arg)
1410 struct igb_queue *que = arg;
1411 struct adapter *adapter = que->adapter;
1412 struct tx_ring *txr = que->txr;
1413 struct rx_ring *rxr = que->rxr;
1415 bool more_tx, more_rx;
1417 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1421 more_tx = igb_txeof(txr);
1424 more_rx = igb_rxeof(que, adapter->rx_process_limit);
1426 if (igb_enable_aim == FALSE)
1429 ** Do Adaptive Interrupt Moderation:
1430 ** - Write out last calculated setting
1431 ** - Calculate based on average size over
1432 ** the last interval.
1434 if (que->eitr_setting)
1435 E1000_WRITE_REG(&adapter->hw,
1436 E1000_EITR(que->msix), que->eitr_setting);
1438 que->eitr_setting = 0;
1440 /* Idle, do nothing */
1441 if ((txr->bytes == 0) && (rxr->bytes == 0))
1444 /* Used half Default if sub-gig */
1445 if (adapter->link_speed != 1000)
1446 newitr = IGB_DEFAULT_ITR / 2;
1448 if ((txr->bytes) && (txr->packets))
1449 newitr = txr->bytes/txr->packets;
1450 if ((rxr->bytes) && (rxr->packets))
1451 newitr = max(newitr,
1452 (rxr->bytes / rxr->packets));
1453 newitr += 24; /* account for hardware frame, crc */
1454 /* set an upper boundary */
1455 newitr = min(newitr, 3000);
1456 /* Be nice to the mid range */
1457 if ((newitr > 300) && (newitr < 1200))
1458 newitr = (newitr / 3);
1460 newitr = (newitr / 2);
1462 newitr &= 0x7FFC; /* Mask invalid bits */
1463 if (adapter->hw.mac.type == e1000_82575)
1464 newitr |= newitr << 16;
1466 newitr |= E1000_EITR_CNT_IGNR;
1468 /* save for next interrupt */
1469 que->eitr_setting = newitr;
1478 /* Schedule a clean task if needed*/
1479 if (more_tx || more_rx)
1480 taskqueue_enqueue(que->tq, &que->que_task);
1482 /* Reenable this interrupt */
1483 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1488 /*********************************************************************
1490 * MSIX Link Interrupt Service routine
1492 **********************************************************************/
1495 igb_msix_link(void *arg)
1497 struct adapter *adapter = arg;
1500 ++adapter->link_irq;
1501 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1502 if (!(icr & E1000_ICR_LSC))
1504 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1508 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1509 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1514 /*********************************************************************
1516 * Media Ioctl callback
1518 * This routine is called whenever the user queries the status of
1519 * the interface using ifconfig.
1521 **********************************************************************/
1523 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1525 struct adapter *adapter = ifp->if_softc;
1526 u_char fiber_type = IFM_1000_SX;
1528 INIT_DEBUGOUT("igb_media_status: begin");
1530 IGB_CORE_LOCK(adapter);
1531 igb_update_link_status(adapter);
1533 ifmr->ifm_status = IFM_AVALID;
1534 ifmr->ifm_active = IFM_ETHER;
1536 if (!adapter->link_active) {
1537 IGB_CORE_UNLOCK(adapter);
1541 ifmr->ifm_status |= IFM_ACTIVE;
1543 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1544 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1545 ifmr->ifm_active |= fiber_type | IFM_FDX;
1547 switch (adapter->link_speed) {
1549 ifmr->ifm_active |= IFM_10_T;
1552 ifmr->ifm_active |= IFM_100_TX;
1555 ifmr->ifm_active |= IFM_1000_T;
1558 if (adapter->link_duplex == FULL_DUPLEX)
1559 ifmr->ifm_active |= IFM_FDX;
1561 ifmr->ifm_active |= IFM_HDX;
1563 IGB_CORE_UNLOCK(adapter);
1566 /*********************************************************************
1568 * Media Ioctl callback
1570 * This routine is called when the user changes speed/duplex using
1571 * media/mediopt option with ifconfig.
1573 **********************************************************************/
1575 igb_media_change(struct ifnet *ifp)
1577 struct adapter *adapter = ifp->if_softc;
1578 struct ifmedia *ifm = &adapter->media;
1580 INIT_DEBUGOUT("igb_media_change: begin");
1582 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1585 IGB_CORE_LOCK(adapter);
1586 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1588 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1589 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1594 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1595 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1598 adapter->hw.mac.autoneg = FALSE;
1599 adapter->hw.phy.autoneg_advertised = 0;
1600 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1601 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1603 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1606 adapter->hw.mac.autoneg = FALSE;
1607 adapter->hw.phy.autoneg_advertised = 0;
1608 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1609 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1611 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1614 device_printf(adapter->dev, "Unsupported media type\n");
1617 /* As the speed/duplex settings my have changed we need to
1620 adapter->hw.phy.reset_disable = FALSE;
1622 igb_init_locked(adapter);
1623 IGB_CORE_UNLOCK(adapter);
1629 /*********************************************************************
1631 * This routine maps the mbufs to Advanced TX descriptors.
1632 * used by the 82575 adapter.
1634 **********************************************************************/
1637 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1639 struct adapter *adapter = txr->adapter;
1640 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1642 struct igb_tx_buffer *tx_buffer, *tx_buffer_mapped;
1643 union e1000_adv_tx_desc *txd = NULL;
1644 struct mbuf *m_head;
1645 u32 olinfo_status = 0, cmd_type_len = 0;
1646 int nsegs, i, j, error, first, last = 0;
1652 /* Set basic descriptor constants */
1653 cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1654 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1655 if (m_head->m_flags & M_VLANTAG)
1656 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1659 * Force a cleanup if number of TX descriptors
1660 * available hits the threshold
1662 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1664 /* Now do we at least have a minimal? */
1665 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1666 txr->no_desc_avail++;
1672 * Map the packet for DMA.
1674 * Capture the first descriptor index,
1675 * this descriptor will have the index
1676 * of the EOP which is the only one that
1677 * now gets a DONE bit writeback.
1679 first = txr->next_avail_desc;
1680 tx_buffer = &txr->tx_buffers[first];
1681 tx_buffer_mapped = tx_buffer;
1682 map = tx_buffer->map;
1684 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1685 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1687 if (error == EFBIG) {
1690 m = m_defrag(*m_headp, M_DONTWAIT);
1692 adapter->mbuf_defrag_failed++;
1700 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1701 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1703 if (error == ENOMEM) {
1704 adapter->no_tx_dma_setup++;
1706 } else if (error != 0) {
1707 adapter->no_tx_dma_setup++;
1712 } else if (error == ENOMEM) {
1713 adapter->no_tx_dma_setup++;
1715 } else if (error != 0) {
1716 adapter->no_tx_dma_setup++;
1722 /* Check again to be sure we have enough descriptors */
1723 if (nsegs > (txr->tx_avail - 2)) {
1724 txr->no_desc_avail++;
1725 bus_dmamap_unload(txr->txtag, map);
1731 * Set up the context descriptor:
1732 * used when any hardware offload is done.
1733 * This includes CSUM, VLAN, and TSO. It
1734 * will use the first descriptor.
1736 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1737 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1738 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1739 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1740 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1743 } else if (igb_tx_ctx_setup(txr, m_head))
1744 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1746 /* Calculate payload length */
1747 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1748 << E1000_ADVTXD_PAYLEN_SHIFT);
1750 /* 82575 needs the queue index added */
1751 if (adapter->hw.mac.type == e1000_82575)
1752 olinfo_status |= txr->me << 4;
1754 /* Set up our transmit descriptors */
1755 i = txr->next_avail_desc;
1756 for (j = 0; j < nsegs; j++) {
1758 bus_addr_t seg_addr;
1760 tx_buffer = &txr->tx_buffers[i];
1761 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1762 seg_addr = segs[j].ds_addr;
1763 seg_len = segs[j].ds_len;
1765 txd->read.buffer_addr = htole64(seg_addr);
1766 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1767 txd->read.olinfo_status = htole32(olinfo_status);
1769 if (++i == adapter->num_tx_desc)
1771 tx_buffer->m_head = NULL;
1772 tx_buffer->next_eop = -1;
1775 txr->next_avail_desc = i;
1776 txr->tx_avail -= nsegs;
1778 tx_buffer->m_head = m_head;
1779 tx_buffer_mapped->map = tx_buffer->map;
1780 tx_buffer->map = map;
1781 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1784 * Last Descriptor of Packet
1785 * needs End Of Packet (EOP)
1786 * and Report Status (RS)
1788 txd->read.cmd_type_len |=
1789 htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1791 * Keep track in the first buffer which
1792 * descriptor will be written back
1794 tx_buffer = &txr->tx_buffers[first];
1795 tx_buffer->next_eop = last;
1796 txr->watchdog_time = ticks;
1799 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1800 * that this frame is available to transmit.
1802 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1803 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1804 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1812 igb_set_promisc(struct adapter *adapter)
1814 struct ifnet *ifp = adapter->ifp;
1817 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1819 if (ifp->if_flags & IFF_PROMISC) {
1820 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1821 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1822 } else if (ifp->if_flags & IFF_ALLMULTI) {
1823 reg_rctl |= E1000_RCTL_MPE;
1824 reg_rctl &= ~E1000_RCTL_UPE;
1825 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1830 igb_disable_promisc(struct adapter *adapter)
1834 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1836 reg_rctl &= (~E1000_RCTL_UPE);
1837 reg_rctl &= (~E1000_RCTL_MPE);
1838 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1842 /*********************************************************************
1845 * This routine is called whenever multicast address list is updated.
1847 **********************************************************************/
1850 igb_set_multi(struct adapter *adapter)
1852 struct ifnet *ifp = adapter->ifp;
1853 struct ifmultiaddr *ifma;
1855 u8 mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1859 IOCTL_DEBUGOUT("igb_set_multi: begin");
1861 #if __FreeBSD_version < 800000
1864 if_maddr_rlock(ifp);
1866 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1867 if (ifma->ifma_addr->sa_family != AF_LINK)
1870 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1873 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1874 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1877 #if __FreeBSD_version < 800000
1878 IF_ADDR_UNLOCK(ifp);
1880 if_maddr_runlock(ifp);
1883 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1884 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1885 reg_rctl |= E1000_RCTL_MPE;
1886 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1888 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1892 /*********************************************************************
1894 * This routine checks for link status,
1895 * updates statistics, and does the watchdog.
1897 **********************************************************************/
1900 igb_local_timer(void *arg)
1902 struct adapter *adapter = arg;
1903 struct ifnet *ifp = adapter->ifp;
1904 device_t dev = adapter->dev;
1905 struct tx_ring *txr = adapter->tx_rings;
1908 IGB_CORE_LOCK_ASSERT(adapter);
1910 igb_update_link_status(adapter);
1911 igb_update_stats_counters(adapter);
1913 if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1914 igb_print_hw_stats(adapter);
1917 ** Watchdog: check for time since any descriptor was cleaned
1919 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1920 if (txr->watchdog_check == FALSE)
1922 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1926 /* Trigger an RX interrupt on all queues */
1927 #ifdef DEVICE_POLLING
1928 if (!(ifp->if_capenable & IFCAP_POLLING))
1930 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1931 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1935 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1936 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1937 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1938 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1939 device_printf(dev,"TX(%d) desc avail = %d,"
1940 "Next TX to Clean = %d\n",
1941 txr->me, txr->tx_avail, txr->next_to_clean);
1942 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1943 adapter->watchdog_events++;
1944 igb_init_locked(adapter);
1948 igb_update_link_status(struct adapter *adapter)
1950 struct e1000_hw *hw = &adapter->hw;
1951 struct ifnet *ifp = adapter->ifp;
1952 device_t dev = adapter->dev;
1953 struct tx_ring *txr = adapter->tx_rings;
1956 /* Get the cached link value or read for real */
1957 switch (hw->phy.media_type) {
1958 case e1000_media_type_copper:
1959 if (hw->mac.get_link_status) {
1960 /* Do the work to read phy */
1961 e1000_check_for_link(hw);
1962 link_check = !hw->mac.get_link_status;
1966 case e1000_media_type_fiber:
1967 e1000_check_for_link(hw);
1968 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1971 case e1000_media_type_internal_serdes:
1972 e1000_check_for_link(hw);
1973 link_check = adapter->hw.mac.serdes_has_link;
1976 case e1000_media_type_unknown:
1980 /* Now we check if a transition has happened */
1981 if (link_check && (adapter->link_active == 0)) {
1982 e1000_get_speed_and_duplex(&adapter->hw,
1983 &adapter->link_speed, &adapter->link_duplex);
1985 device_printf(dev, "Link is up %d Mbps %s\n",
1986 adapter->link_speed,
1987 ((adapter->link_duplex == FULL_DUPLEX) ?
1988 "Full Duplex" : "Half Duplex"));
1989 adapter->link_active = 1;
1990 ifp->if_baudrate = adapter->link_speed * 1000000;
1991 /* This can sleep */
1992 if_link_state_change(ifp, LINK_STATE_UP);
1993 } else if (!link_check && (adapter->link_active == 1)) {
1994 ifp->if_baudrate = adapter->link_speed = 0;
1995 adapter->link_duplex = 0;
1997 device_printf(dev, "Link is Down\n");
1998 adapter->link_active = 0;
1999 /* This can sleep */
2000 if_link_state_change(ifp, LINK_STATE_DOWN);
2001 /* Turn off watchdogs */
2002 for (int i = 0; i < adapter->num_queues; i++, txr++)
2003 txr->watchdog_check = FALSE;
2007 /*********************************************************************
2009 * This routine disables all traffic on the adapter by issuing a
2010 * global reset on the MAC and deallocates TX/RX buffers.
2012 **********************************************************************/
2017 struct adapter *adapter = arg;
2018 struct ifnet *ifp = adapter->ifp;
2019 struct tx_ring *txr = adapter->tx_rings;
2021 IGB_CORE_LOCK_ASSERT(adapter);
2023 INIT_DEBUGOUT("igb_stop: begin");
2025 igb_disable_intr(adapter);
2027 callout_stop(&adapter->timer);
2029 /* Tell the stack that the interface is no longer active */
2030 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2032 /* Unarm watchdog timer. */
2033 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2035 txr->watchdog_check = FALSE;
2039 e1000_reset_hw(&adapter->hw);
2040 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2042 e1000_led_off(&adapter->hw);
2043 e1000_cleanup_led(&adapter->hw);
2047 /*********************************************************************
2049 * Determine hardware revision.
2051 **********************************************************************/
2053 igb_identify_hardware(struct adapter *adapter)
2055 device_t dev = adapter->dev;
2057 /* Make sure our PCI config space has the necessary stuff set */
2058 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2059 if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2060 (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2061 device_printf(dev, "Memory Access and/or Bus Master bits "
2063 adapter->hw.bus.pci_cmd_word |=
2064 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2065 pci_write_config(dev, PCIR_COMMAND,
2066 adapter->hw.bus.pci_cmd_word, 2);
2069 /* Save off the information about this board */
2070 adapter->hw.vendor_id = pci_get_vendor(dev);
2071 adapter->hw.device_id = pci_get_device(dev);
2072 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2073 adapter->hw.subsystem_vendor_id =
2074 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2075 adapter->hw.subsystem_device_id =
2076 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2078 /* Do Shared Code Init and Setup */
2079 if (e1000_set_mac_type(&adapter->hw)) {
2080 device_printf(dev, "Setup init failure\n");
2086 igb_allocate_pci_resources(struct adapter *adapter)
2088 device_t dev = adapter->dev;
2092 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2094 if (adapter->pci_mem == NULL) {
2095 device_printf(dev, "Unable to allocate bus resource: memory\n");
2098 adapter->osdep.mem_bus_space_tag =
2099 rman_get_bustag(adapter->pci_mem);
2100 adapter->osdep.mem_bus_space_handle =
2101 rman_get_bushandle(adapter->pci_mem);
2102 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2104 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2106 /* This will setup either MSI/X or MSI */
2107 adapter->msix = igb_setup_msix(adapter);
2108 adapter->hw.back = &adapter->osdep;
2113 /*********************************************************************
2115 * Setup the Legacy or MSI Interrupt handler
2117 **********************************************************************/
2119 igb_allocate_legacy(struct adapter *adapter)
2121 device_t dev = adapter->dev;
2122 struct igb_queue *que = adapter->queues;
2125 /* Turn off all interrupts */
2126 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2129 if (adapter->msix == 1)
2132 /* We allocate a single interrupt resource */
2133 adapter->res = bus_alloc_resource_any(dev,
2134 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2135 if (adapter->res == NULL) {
2136 device_printf(dev, "Unable to allocate bus resource: "
2142 * Try allocating a fast interrupt and the associated deferred
2143 * processing contexts.
2145 TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, que);
2146 /* Make tasklet for deferred link handling */
2147 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2148 adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2149 taskqueue_thread_enqueue, &adapter->tq);
2150 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2151 device_get_nameunit(adapter->dev));
2152 if ((error = bus_setup_intr(dev, adapter->res,
2153 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2154 adapter, &adapter->tag)) != 0) {
2155 device_printf(dev, "Failed to register fast interrupt "
2156 "handler: %d\n", error);
2157 taskqueue_free(adapter->tq);
2166 /*********************************************************************
2168 * Setup the MSIX Queue Interrupt handlers:
2170 **********************************************************************/
2172 igb_allocate_msix(struct adapter *adapter)
2174 device_t dev = adapter->dev;
2175 struct igb_queue *que = adapter->queues;
2176 int error, rid, vector = 0;
2179 for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2181 que->res = bus_alloc_resource_any(dev,
2182 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2183 if (que->res == NULL) {
2185 "Unable to allocate bus resource: "
2186 "MSIX Queue Interrupt\n");
2189 error = bus_setup_intr(dev, que->res,
2190 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2191 igb_msix_que, que, &que->tag);
2194 device_printf(dev, "Failed to register Queue handler");
2198 if (adapter->hw.mac.type == e1000_82575)
2199 que->eims = E1000_EICR_TX_QUEUE0 << i;
2201 que->eims = 1 << vector;
2203 ** Bind the msix vector, and thus the
2204 ** rings to the corresponding cpu.
2206 if (adapter->num_queues > 1)
2207 bus_bind_intr(dev, que->res, i);
2208 /* Make tasklet for deferred handling */
2209 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2210 que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2211 taskqueue_thread_enqueue, &que->tq);
2212 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2213 device_get_nameunit(adapter->dev));
2218 adapter->res = bus_alloc_resource_any(dev,
2219 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2220 if (adapter->res == NULL) {
2222 "Unable to allocate bus resource: "
2223 "MSIX Link Interrupt\n");
2226 if ((error = bus_setup_intr(dev, adapter->res,
2227 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2228 igb_msix_link, adapter, &adapter->tag)) != 0) {
2229 device_printf(dev, "Failed to register Link handler");
2232 adapter->linkvec = vector;
2234 /* Make tasklet for deferred handling */
2235 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2236 adapter->tq = taskqueue_create_fast("igb_link", M_NOWAIT,
2237 taskqueue_thread_enqueue, &adapter->tq);
2238 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s link",
2239 device_get_nameunit(adapter->dev));
2246 igb_configure_queues(struct adapter *adapter)
2248 struct e1000_hw *hw = &adapter->hw;
2249 struct igb_queue *que;
2251 u32 newitr = IGB_DEFAULT_ITR;
2253 /* First turn on RSS capability */
2254 if (adapter->hw.mac.type > e1000_82575)
2255 E1000_WRITE_REG(hw, E1000_GPIE,
2256 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2257 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2260 switch (adapter->hw.mac.type) {
2263 for (int i = 0; i < adapter->num_queues; i++) {
2265 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2266 que = &adapter->queues[i];
2269 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2272 ivar |= que->msix | E1000_IVAR_VALID;
2274 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2277 for (int i = 0; i < adapter->num_queues; i++) {
2279 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2280 que = &adapter->queues[i];
2283 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2286 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2288 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2289 adapter->eims_mask |= que->eims;
2292 /* And for the link interrupt */
2293 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2294 adapter->link_mask = 1 << adapter->linkvec;
2295 adapter->eims_mask |= adapter->link_mask;
2296 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2300 for (int i = 0; i < adapter->num_queues; i++) {
2301 u32 index = i & 0x7; /* Each IVAR has two entries */
2302 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2303 que = &adapter->queues[i];
2306 ivar |= que->msix | E1000_IVAR_VALID;
2309 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2311 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2312 adapter->eims_mask |= que->eims;
2315 for (int i = 0; i < adapter->num_queues; i++) {
2316 u32 index = i & 0x7; /* Each IVAR has two entries */
2317 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2318 que = &adapter->queues[i];
2321 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2324 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2326 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2327 adapter->eims_mask |= que->eims;
2330 /* And for the link interrupt */
2331 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2332 adapter->link_mask = 1 << adapter->linkvec;
2333 adapter->eims_mask |= adapter->link_mask;
2334 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2338 /* enable MSI-X support*/
2339 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2340 tmp |= E1000_CTRL_EXT_PBA_CLR;
2341 /* Auto-Mask interrupts upon ICR read. */
2342 tmp |= E1000_CTRL_EXT_EIAME;
2343 tmp |= E1000_CTRL_EXT_IRCA;
2344 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2347 for (int i = 0; i < adapter->num_queues; i++) {
2348 que = &adapter->queues[i];
2349 tmp = E1000_EICR_RX_QUEUE0 << i;
2350 tmp |= E1000_EICR_TX_QUEUE0 << i;
2352 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2354 adapter->eims_mask |= que->eims;
2358 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2360 adapter->link_mask |= E1000_EIMS_OTHER;
2361 adapter->eims_mask |= adapter->link_mask;
2366 /* Set the starting interrupt rate */
2367 if (hw->mac.type == e1000_82575)
2368 newitr |= newitr << 16;
2370 newitr |= E1000_EITR_CNT_IGNR;
2372 for (int i = 0; i < adapter->num_queues; i++) {
2373 que = &adapter->queues[i];
2374 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2382 igb_free_pci_resources(struct adapter *adapter)
2384 struct igb_queue *que = adapter->queues;
2385 device_t dev = adapter->dev;
2389 ** There is a slight possibility of a failure mode
2390 ** in attach that will result in entering this function
2391 ** before interrupt resources have been initialized, and
2392 ** in that case we do not want to execute the loops below
2393 ** We can detect this reliably by the state of the adapter
2396 if (adapter->res == NULL)
2400 * First release all the interrupt resources:
2402 for (int i = 0; i < adapter->num_queues; i++, que++) {
2403 rid = que->msix + 1;
2404 if (que->tag != NULL) {
2405 bus_teardown_intr(dev, que->res, que->tag);
2408 if (que->res != NULL)
2409 bus_release_resource(dev,
2410 SYS_RES_IRQ, rid, que->res);
2413 /* Clean the Legacy or Link interrupt last */
2414 if (adapter->linkvec) /* we are doing MSIX */
2415 rid = adapter->linkvec + 1;
2417 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2419 if (adapter->tag != NULL) {
2420 bus_teardown_intr(dev, adapter->res, adapter->tag);
2421 adapter->tag = NULL;
2423 if (adapter->res != NULL)
2424 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2428 pci_release_msi(dev);
2430 if (adapter->msix_mem != NULL)
2431 bus_release_resource(dev, SYS_RES_MEMORY,
2432 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2434 if (adapter->pci_mem != NULL)
2435 bus_release_resource(dev, SYS_RES_MEMORY,
2436 PCIR_BAR(0), adapter->pci_mem);
2441 * Setup Either MSI/X or MSI
2444 igb_setup_msix(struct adapter *adapter)
2446 device_t dev = adapter->dev;
2447 int rid, want, queues, msgs;
2449 /* tuneable override */
2450 if (igb_enable_msix == 0)
2453 /* First try MSI/X */
2454 rid = PCIR_BAR(IGB_MSIX_BAR);
2455 adapter->msix_mem = bus_alloc_resource_any(dev,
2456 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2457 if (!adapter->msix_mem) {
2458 /* May not be enabled */
2459 device_printf(adapter->dev,
2460 "Unable to map MSIX table \n");
2464 msgs = pci_msix_count(dev);
2465 if (msgs == 0) { /* system has msix disabled */
2466 bus_release_resource(dev, SYS_RES_MEMORY,
2467 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2468 adapter->msix_mem = NULL;
2472 /* Figure out a reasonable auto config value */
2473 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2475 /* Manual override */
2476 if (igb_num_queues != 0)
2477 queues = igb_num_queues;
2479 /* Can have max of 4 queues on 82575 */
2480 if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2484 ** One vector (RX/TX pair) per queue
2485 ** plus an additional for Link interrupt
2491 device_printf(adapter->dev,
2492 "MSIX Configuration Problem, "
2493 "%d vectors configured, but %d queues wanted!\n",
2497 if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2498 device_printf(adapter->dev,
2499 "Using MSIX interrupts with %d vectors\n", msgs);
2500 adapter->num_queues = queues;
2504 msgs = pci_msi_count(dev);
2505 if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2506 device_printf(adapter->dev,"Using MSI interrupt\n");
2510 /*********************************************************************
2512 * Set up an fresh starting state
2514 **********************************************************************/
2516 igb_reset(struct adapter *adapter)
2518 device_t dev = adapter->dev;
2519 struct e1000_hw *hw = &adapter->hw;
2520 struct e1000_fc_info *fc = &hw->fc;
2521 struct ifnet *ifp = adapter->ifp;
2525 INIT_DEBUGOUT("igb_reset: begin");
2527 /* Let the firmware know the OS is in control */
2528 igb_get_hw_control(adapter);
2531 * Packet Buffer Allocation (PBA)
2532 * Writing PBA sets the receive portion of the buffer
2533 * the remainder is used for the transmit buffer.
2535 switch (hw->mac.type) {
2537 pba = E1000_PBA_32K;
2540 pba = E1000_PBA_64K;
2543 pba = E1000_PBA_35K;
2548 /* Special needs in case of Jumbo frames */
2549 if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2550 u32 tx_space, min_tx, min_rx;
2551 pba = E1000_READ_REG(hw, E1000_PBA);
2552 tx_space = pba >> 16;
2554 min_tx = (adapter->max_frame_size +
2555 sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2556 min_tx = roundup2(min_tx, 1024);
2558 min_rx = adapter->max_frame_size;
2559 min_rx = roundup2(min_rx, 1024);
2561 if (tx_space < min_tx &&
2562 ((min_tx - tx_space) < pba)) {
2563 pba = pba - (min_tx - tx_space);
2565 * if short on rx space, rx wins
2566 * and must trump tx adjustment
2571 E1000_WRITE_REG(hw, E1000_PBA, pba);
2574 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2577 * These parameters control the automatic generation (Tx) and
2578 * response (Rx) to Ethernet PAUSE frames.
2579 * - High water mark should allow for at least two frames to be
2580 * received after sending an XOFF.
2581 * - Low water mark works best when it is very near the high water mark.
2582 * This allows the receiver to restart by sending XON when it has
2585 hwm = min(((pba << 10) * 9 / 10),
2586 ((pba << 10) - 2 * adapter->max_frame_size));
2588 if (hw->mac.type < e1000_82576) {
2589 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
2590 fc->low_water = fc->high_water - 8;
2592 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
2593 fc->low_water = fc->high_water - 16;
2596 fc->pause_time = IGB_FC_PAUSE_TIME;
2597 fc->send_xon = TRUE;
2599 /* Set Flow control, use the tunable location if sane */
2600 if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2601 fc->requested_mode = igb_fc_setting;
2603 fc->requested_mode = e1000_fc_none;
2605 fc->current_mode = fc->requested_mode;
2607 /* Issue a global reset */
2609 E1000_WRITE_REG(hw, E1000_WUC, 0);
2611 if (e1000_init_hw(hw) < 0)
2612 device_printf(dev, "Hardware Initialization Failed\n");
2614 if (hw->mac.type == e1000_82580) {
2617 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2619 * 0x80000000 - enable DMA COAL
2620 * 0x10000000 - use L0s as low power
2621 * 0x20000000 - use L1 as low power
2622 * X << 16 - exit dma coal when rx data exceeds X kB
2623 * Y - upper limit to stay in dma coal in units of 32usecs
2625 E1000_WRITE_REG(hw, E1000_DMACR,
2626 0xA0000006 | ((hwm << 6) & 0x00FF0000));
2628 /* set hwm to PBA - 2 * max frame size */
2629 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2631 * This sets the time to wait before requesting transition to
2632 * low power state to number of usecs needed to receive 1 512
2633 * byte frame at gigabit line rate
2635 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2637 /* free space in tx packet buffer to wake from DMA coal */
2638 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2639 (20480 - (2 * adapter->max_frame_size)) >> 6);
2641 /* make low power state decision controlled by DMA coal */
2642 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2643 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2644 reg | E1000_PCIEMISC_LX_DECISION);
2647 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2648 e1000_get_phy_info(hw);
2649 e1000_check_for_link(hw);
2653 /*********************************************************************
2655 * Setup networking device structure and register an interface.
2657 **********************************************************************/
2659 igb_setup_interface(device_t dev, struct adapter *adapter)
2663 INIT_DEBUGOUT("igb_setup_interface: begin");
2665 ifp = adapter->ifp = if_alloc(IFT_ETHER);
2667 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2668 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2669 ifp->if_mtu = ETHERMTU;
2670 ifp->if_init = igb_init;
2671 ifp->if_softc = adapter;
2672 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2673 ifp->if_ioctl = igb_ioctl;
2674 ifp->if_start = igb_start;
2675 #if __FreeBSD_version >= 800000
2676 ifp->if_transmit = igb_mq_start;
2677 ifp->if_qflush = igb_qflush;
2679 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2680 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2681 IFQ_SET_READY(&ifp->if_snd);
2683 ether_ifattach(ifp, adapter->hw.mac.addr);
2685 ifp->if_capabilities = ifp->if_capenable = 0;
2687 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2688 ifp->if_capabilities |= IFCAP_TSO4;
2689 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2690 if (igb_header_split)
2691 ifp->if_capabilities |= IFCAP_LRO;
2693 ifp->if_capenable = ifp->if_capabilities;
2694 #ifdef DEVICE_POLLING
2695 ifp->if_capabilities |= IFCAP_POLLING;
2699 * Tell the upper layer(s) we
2700 * support full VLAN capability.
2702 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2703 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2704 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2707 ** Dont turn this on by default, if vlans are
2708 ** created on another pseudo device (eg. lagg)
2709 ** then vlan events are not passed thru, breaking
2710 ** operation, but with HW FILTER off it works. If
2711 ** using vlans directly on the em driver you can
2712 ** enable this and get full hardware tag filtering.
2714 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2717 * Specify the media types supported by this adapter and register
2718 * callbacks to update media and link information
2720 ifmedia_init(&adapter->media, IFM_IMASK,
2721 igb_media_change, igb_media_status);
2722 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2723 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2724 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2726 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2728 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2729 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2731 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2733 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2735 if (adapter->hw.phy.type != e1000_phy_ife) {
2736 ifmedia_add(&adapter->media,
2737 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2738 ifmedia_add(&adapter->media,
2739 IFM_ETHER | IFM_1000_T, 0, NULL);
2742 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2743 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2748 * Manage DMA'able memory.
2751 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2755 *(bus_addr_t *) arg = segs[0].ds_addr;
2759 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2760 struct igb_dma_alloc *dma, int mapflags)
2764 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2765 IGB_DBA_ALIGN, 0, /* alignment, bounds */
2766 BUS_SPACE_MAXADDR, /* lowaddr */
2767 BUS_SPACE_MAXADDR, /* highaddr */
2768 NULL, NULL, /* filter, filterarg */
2771 size, /* maxsegsize */
2773 NULL, /* lockfunc */
2777 device_printf(adapter->dev,
2778 "%s: bus_dma_tag_create failed: %d\n",
2783 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2784 BUS_DMA_NOWAIT, &dma->dma_map);
2786 device_printf(adapter->dev,
2787 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2788 __func__, (uintmax_t)size, error);
2793 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2794 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2795 if (error || dma->dma_paddr == 0) {
2796 device_printf(adapter->dev,
2797 "%s: bus_dmamap_load failed: %d\n",
2805 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2807 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2808 bus_dma_tag_destroy(dma->dma_tag);
2810 dma->dma_map = NULL;
2811 dma->dma_tag = NULL;
2817 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2819 if (dma->dma_tag == NULL)
2821 if (dma->dma_map != NULL) {
2822 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2823 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2824 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2825 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2826 dma->dma_map = NULL;
2828 bus_dma_tag_destroy(dma->dma_tag);
2829 dma->dma_tag = NULL;
2833 /*********************************************************************
2835 * Allocate memory for the transmit and receive rings, and then
2836 * the descriptors associated with each, called only once at attach.
2838 **********************************************************************/
2840 igb_allocate_queues(struct adapter *adapter)
2842 device_t dev = adapter->dev;
2843 struct igb_queue *que = NULL;
2844 struct tx_ring *txr = NULL;
2845 struct rx_ring *rxr = NULL;
2846 int rsize, tsize, error = E1000_SUCCESS;
2847 int txconf = 0, rxconf = 0;
2849 /* First allocate the top level queue structs */
2850 if (!(adapter->queues =
2851 (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2852 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2853 device_printf(dev, "Unable to allocate queue memory\n");
2858 /* Next allocate the TX ring struct memory */
2859 if (!(adapter->tx_rings =
2860 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2861 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2862 device_printf(dev, "Unable to allocate TX ring memory\n");
2867 /* Now allocate the RX */
2868 if (!(adapter->rx_rings =
2869 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2870 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2871 device_printf(dev, "Unable to allocate RX ring memory\n");
2876 tsize = roundup2(adapter->num_tx_desc *
2877 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2879 * Now set up the TX queues, txconf is needed to handle the
2880 * possibility that things fail midcourse and we need to
2881 * undo memory gracefully
2883 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2884 /* Set up some basics */
2885 txr = &adapter->tx_rings[i];
2886 txr->adapter = adapter;
2889 /* Initialize the TX lock */
2890 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2891 device_get_nameunit(dev), txr->me);
2892 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2894 if (igb_dma_malloc(adapter, tsize,
2895 &txr->txdma, BUS_DMA_NOWAIT)) {
2897 "Unable to allocate TX Descriptor memory\n");
2901 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2902 bzero((void *)txr->tx_base, tsize);
2904 /* Now allocate transmit buffers for the ring */
2905 if (igb_allocate_transmit_buffers(txr)) {
2907 "Critical Failure setting up transmit buffers\n");
2911 #if __FreeBSD_version >= 800000
2912 /* Allocate a buf ring */
2913 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2914 M_WAITOK, &txr->tx_mtx);
2919 * Next the RX queues...
2921 rsize = roundup2(adapter->num_rx_desc *
2922 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2923 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2924 rxr = &adapter->rx_rings[i];
2925 rxr->adapter = adapter;
2928 /* Initialize the RX lock */
2929 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2930 device_get_nameunit(dev), txr->me);
2931 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2933 if (igb_dma_malloc(adapter, rsize,
2934 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2936 "Unable to allocate RxDescriptor memory\n");
2940 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2941 bzero((void *)rxr->rx_base, rsize);
2943 /* Allocate receive buffers for the ring*/
2944 if (igb_allocate_receive_buffers(rxr)) {
2946 "Critical Failure setting up receive buffers\n");
2953 ** Finally set up the queue holding structs
2955 for (int i = 0; i < adapter->num_queues; i++) {
2956 que = &adapter->queues[i];
2957 que->adapter = adapter;
2958 que->txr = &adapter->tx_rings[i];
2959 que->rxr = &adapter->rx_rings[i];
2965 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2966 igb_dma_free(adapter, &rxr->rxdma);
2968 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2969 igb_dma_free(adapter, &txr->txdma);
2970 free(adapter->rx_rings, M_DEVBUF);
2972 buf_ring_free(txr->br, M_DEVBUF);
2973 free(adapter->tx_rings, M_DEVBUF);
2975 free(adapter->queues, M_DEVBUF);
2980 /*********************************************************************
2982 * Allocate memory for tx_buffer structures. The tx_buffer stores all
2983 * the information needed to transmit a packet on the wire. This is
2984 * called only once at attach, setup is done every reset.
2986 **********************************************************************/
2988 igb_allocate_transmit_buffers(struct tx_ring *txr)
2990 struct adapter *adapter = txr->adapter;
2991 device_t dev = adapter->dev;
2992 struct igb_tx_buffer *txbuf;
2996 * Setup DMA descriptor areas.
2998 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2999 1, 0, /* alignment, bounds */
3000 BUS_SPACE_MAXADDR, /* lowaddr */
3001 BUS_SPACE_MAXADDR, /* highaddr */
3002 NULL, NULL, /* filter, filterarg */
3003 IGB_TSO_SIZE, /* maxsize */
3004 IGB_MAX_SCATTER, /* nsegments */
3005 PAGE_SIZE, /* maxsegsize */
3007 NULL, /* lockfunc */
3008 NULL, /* lockfuncarg */
3010 device_printf(dev,"Unable to allocate TX DMA tag\n");
3014 if (!(txr->tx_buffers =
3015 (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3016 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3017 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3022 /* Create the descriptor buffer dma maps */
3023 txbuf = txr->tx_buffers;
3024 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3025 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3027 device_printf(dev, "Unable to create TX DMA map\n");
3034 /* We free all, it handles case where we are in the middle */
3035 igb_free_transmit_structures(adapter);
3039 /*********************************************************************
3041 * Initialize a transmit ring.
3043 **********************************************************************/
3045 igb_setup_transmit_ring(struct tx_ring *txr)
3047 struct adapter *adapter = txr->adapter;
3048 struct igb_tx_buffer *txbuf;
3051 /* Clear the old descriptor contents */
3053 bzero((void *)txr->tx_base,
3054 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3056 txr->next_avail_desc = 0;
3057 txr->next_to_clean = 0;
3059 /* Free any existing tx buffers. */
3060 txbuf = txr->tx_buffers;
3061 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3062 if (txbuf->m_head != NULL) {
3063 bus_dmamap_sync(txr->txtag, txbuf->map,
3064 BUS_DMASYNC_POSTWRITE);
3065 bus_dmamap_unload(txr->txtag, txbuf->map);
3066 m_freem(txbuf->m_head);
3067 txbuf->m_head = NULL;
3069 /* clear the watch index */
3070 txbuf->next_eop = -1;
3073 /* Set number of descriptors available */
3074 txr->tx_avail = adapter->num_tx_desc;
3076 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3077 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3081 /*********************************************************************
3083 * Initialize all transmit rings.
3085 **********************************************************************/
3087 igb_setup_transmit_structures(struct adapter *adapter)
3089 struct tx_ring *txr = adapter->tx_rings;
3091 for (int i = 0; i < adapter->num_queues; i++, txr++)
3092 igb_setup_transmit_ring(txr);
3097 /*********************************************************************
3099 * Enable transmit unit.
3101 **********************************************************************/
3103 igb_initialize_transmit_units(struct adapter *adapter)
3105 struct tx_ring *txr = adapter->tx_rings;
3106 struct e1000_hw *hw = &adapter->hw;
3109 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3111 /* Setup the Tx Descriptor Rings */
3112 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3113 u64 bus_addr = txr->txdma.dma_paddr;
3115 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3116 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3117 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3118 (uint32_t)(bus_addr >> 32));
3119 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3120 (uint32_t)bus_addr);
3122 /* Setup the HW Tx Head and Tail descriptor pointers */
3123 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3124 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3126 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3127 E1000_READ_REG(hw, E1000_TDBAL(i)),
3128 E1000_READ_REG(hw, E1000_TDLEN(i)));
3130 txr->watchdog_check = FALSE;
3132 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3133 txdctl |= IGB_TX_PTHRESH;
3134 txdctl |= IGB_TX_HTHRESH << 8;
3135 txdctl |= IGB_TX_WTHRESH << 16;
3136 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3137 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3140 /* Program the Transmit Control Register */
3141 tctl = E1000_READ_REG(hw, E1000_TCTL);
3142 tctl &= ~E1000_TCTL_CT;
3143 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3144 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3146 e1000_config_collision_dist(hw);
3148 /* This write will effectively turn on the transmit unit. */
3149 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3152 /*********************************************************************
3154 * Free all transmit rings.
3156 **********************************************************************/
3158 igb_free_transmit_structures(struct adapter *adapter)
3160 struct tx_ring *txr = adapter->tx_rings;
3162 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3164 igb_free_transmit_buffers(txr);
3165 igb_dma_free(adapter, &txr->txdma);
3167 IGB_TX_LOCK_DESTROY(txr);
3169 free(adapter->tx_rings, M_DEVBUF);
3172 /*********************************************************************
3174 * Free transmit ring related data structures.
3176 **********************************************************************/
3178 igb_free_transmit_buffers(struct tx_ring *txr)
3180 struct adapter *adapter = txr->adapter;
3181 struct igb_tx_buffer *tx_buffer;
3184 INIT_DEBUGOUT("free_transmit_ring: begin");
3186 if (txr->tx_buffers == NULL)
3189 tx_buffer = txr->tx_buffers;
3190 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3191 if (tx_buffer->m_head != NULL) {
3192 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3193 BUS_DMASYNC_POSTWRITE);
3194 bus_dmamap_unload(txr->txtag,
3196 m_freem(tx_buffer->m_head);
3197 tx_buffer->m_head = NULL;
3198 if (tx_buffer->map != NULL) {
3199 bus_dmamap_destroy(txr->txtag,
3201 tx_buffer->map = NULL;
3203 } else if (tx_buffer->map != NULL) {
3204 bus_dmamap_unload(txr->txtag,
3206 bus_dmamap_destroy(txr->txtag,
3208 tx_buffer->map = NULL;
3211 #if __FreeBSD_version >= 800000
3212 if (txr->br != NULL)
3213 buf_ring_free(txr->br, M_DEVBUF);
3215 if (txr->tx_buffers != NULL) {
3216 free(txr->tx_buffers, M_DEVBUF);
3217 txr->tx_buffers = NULL;
3219 if (txr->txtag != NULL) {
3220 bus_dma_tag_destroy(txr->txtag);
3226 /**********************************************************************
3228 * Setup work for hardware segmentation offload (TSO)
3230 **********************************************************************/
3232 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3234 struct adapter *adapter = txr->adapter;
3235 struct e1000_adv_tx_context_desc *TXD;
3236 struct igb_tx_buffer *tx_buffer;
3237 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3238 u32 mss_l4len_idx = 0;
3240 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3241 struct ether_vlan_header *eh;
3247 * Determine where frame payload starts.
3248 * Jump over vlan headers if already present
3250 eh = mtod(mp, struct ether_vlan_header *);
3251 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3252 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3254 ehdrlen = ETHER_HDR_LEN;
3256 /* Ensure we have at least the IP+TCP header in the first mbuf. */
3257 if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3260 /* Only supports IPV4 for now */
3261 ctxd = txr->next_avail_desc;
3262 tx_buffer = &txr->tx_buffers[ctxd];
3263 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3265 ip = (struct ip *)(mp->m_data + ehdrlen);
3266 if (ip->ip_p != IPPROTO_TCP)
3267 return FALSE; /* 0 */
3269 ip_hlen = ip->ip_hl << 2;
3270 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3271 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3272 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3273 tcp_hlen = th->th_off << 2;
3275 * Calculate header length, this is used
3276 * in the transmit desc in igb_xmit
3278 *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3280 /* VLAN MACLEN IPLEN */
3281 if (mp->m_flags & M_VLANTAG) {
3282 vtag = htole16(mp->m_pkthdr.ether_vtag);
3283 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3286 vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3287 vlan_macip_lens |= ip_hlen;
3288 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3290 /* ADV DTYPE TUCMD */
3291 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3292 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3293 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3294 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3297 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3298 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3299 /* 82575 needs the queue index added */
3300 if (adapter->hw.mac.type == e1000_82575)
3301 mss_l4len_idx |= txr->me << 4;
3302 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3304 TXD->seqnum_seed = htole32(0);
3305 tx_buffer->m_head = NULL;
3306 tx_buffer->next_eop = -1;
3308 if (++ctxd == adapter->num_tx_desc)
3312 txr->next_avail_desc = ctxd;
3317 /*********************************************************************
3319 * Context Descriptor setup for VLAN or CSUM
3321 **********************************************************************/
3324 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3326 struct adapter *adapter = txr->adapter;
3327 struct e1000_adv_tx_context_desc *TXD;
3328 struct igb_tx_buffer *tx_buffer;
3329 u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3330 struct ether_vlan_header *eh;
3331 struct ip *ip = NULL;
3332 struct ip6_hdr *ip6;
3333 int ehdrlen, ctxd, ip_hlen = 0;
3334 u16 etype, vtag = 0;
3336 bool offload = TRUE;
3338 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3341 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3342 ctxd = txr->next_avail_desc;
3343 tx_buffer = &txr->tx_buffers[ctxd];
3344 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3347 ** In advanced descriptors the vlan tag must
3348 ** be placed into the context descriptor, thus
3349 ** we need to be here just for that setup.
3351 if (mp->m_flags & M_VLANTAG) {
3352 vtag = htole16(mp->m_pkthdr.ether_vtag);
3353 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3354 } else if (offload == FALSE)
3358 * Determine where frame payload starts.
3359 * Jump over vlan headers if already present,
3360 * helpful for QinQ too.
3362 eh = mtod(mp, struct ether_vlan_header *);
3363 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3364 etype = ntohs(eh->evl_proto);
3365 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3367 etype = ntohs(eh->evl_encap_proto);
3368 ehdrlen = ETHER_HDR_LEN;
3371 /* Set the ether header length */
3372 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3376 ip = (struct ip *)(mp->m_data + ehdrlen);
3377 ip_hlen = ip->ip_hl << 2;
3378 if (mp->m_len < ehdrlen + ip_hlen) {
3383 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3385 case ETHERTYPE_IPV6:
3386 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3387 ip_hlen = sizeof(struct ip6_hdr);
3388 if (mp->m_len < ehdrlen + ip_hlen)
3390 ipproto = ip6->ip6_nxt;
3391 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3398 vlan_macip_lens |= ip_hlen;
3399 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3403 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3404 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3407 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3408 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3410 #if __FreeBSD_version >= 800000
3412 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3413 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3421 /* 82575 needs the queue index added */
3422 if (adapter->hw.mac.type == e1000_82575)
3423 mss_l4len_idx = txr->me << 4;
3425 /* Now copy bits into descriptor */
3426 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3427 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3428 TXD->seqnum_seed = htole32(0);
3429 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3431 tx_buffer->m_head = NULL;
3432 tx_buffer->next_eop = -1;
3434 /* We've consumed the first desc, adjust counters */
3435 if (++ctxd == adapter->num_tx_desc)
3437 txr->next_avail_desc = ctxd;
3444 /**********************************************************************
3446 * Examine each tx_buffer in the used queue. If the hardware is done
3447 * processing the packet then free associated resources. The
3448 * tx_buffer is put back on the free queue.
3450 * TRUE return means there's work in the ring to clean, FALSE its empty.
3451 **********************************************************************/
3453 igb_txeof(struct tx_ring *txr)
3455 struct adapter *adapter = txr->adapter;
3456 int first, last, done;
3457 struct igb_tx_buffer *tx_buffer;
3458 struct e1000_tx_desc *tx_desc, *eop_desc;
3459 struct ifnet *ifp = adapter->ifp;
3461 IGB_TX_LOCK_ASSERT(txr);
3463 if (txr->tx_avail == adapter->num_tx_desc)
3466 first = txr->next_to_clean;
3467 tx_desc = &txr->tx_base[first];
3468 tx_buffer = &txr->tx_buffers[first];
3469 last = tx_buffer->next_eop;
3470 eop_desc = &txr->tx_base[last];
3473 * What this does is get the index of the
3474 * first descriptor AFTER the EOP of the
3475 * first packet, that way we can do the
3476 * simple comparison on the inner while loop.
3478 if (++last == adapter->num_tx_desc)
3482 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3483 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3485 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3486 /* We clean the range of the packet */
3487 while (first != done) {
3488 tx_desc->upper.data = 0;
3489 tx_desc->lower.data = 0;
3490 tx_desc->buffer_addr = 0;
3493 if (tx_buffer->m_head) {
3495 tx_buffer->m_head->m_pkthdr.len;
3496 bus_dmamap_sync(txr->txtag,
3498 BUS_DMASYNC_POSTWRITE);
3499 bus_dmamap_unload(txr->txtag,
3502 m_freem(tx_buffer->m_head);
3503 tx_buffer->m_head = NULL;
3505 tx_buffer->next_eop = -1;
3506 txr->watchdog_time = ticks;
3508 if (++first == adapter->num_tx_desc)
3511 tx_buffer = &txr->tx_buffers[first];
3512 tx_desc = &txr->tx_base[first];
3516 /* See if we can continue to the next packet */
3517 last = tx_buffer->next_eop;
3519 eop_desc = &txr->tx_base[last];
3520 /* Get new done point */
3521 if (++last == adapter->num_tx_desc) last = 0;
3526 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3527 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3529 txr->next_to_clean = first;
3532 * If we have enough room, clear IFF_DRV_OACTIVE
3533 * to tell the stack that it is OK to send packets.
3535 if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3536 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3537 /* All clean, turn off the watchdog */
3538 if (txr->tx_avail == adapter->num_tx_desc) {
3539 txr->watchdog_check = FALSE;
3548 /*********************************************************************
3550 * Refresh mbuf buffers for RX descriptor rings
3551 * - now keeps its own state so discards due to resource
3552 * exhaustion are unnecessary, if an mbuf cannot be obtained
3553 * it just returns, keeping its placeholder, thus it can simply
3554 * be recalled to try again.
3556 **********************************************************************/
3558 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3560 struct adapter *adapter = rxr->adapter;
3561 bus_dma_segment_t hseg[1];
3562 bus_dma_segment_t pseg[1];
3563 struct igb_rx_buf *rxbuf;
3564 struct mbuf *mh, *mp;
3565 int i, nsegs, error, cleaned;
3567 i = rxr->next_to_refresh;
3568 cleaned = -1; /* Signify no completions */
3569 while (i != limit) {
3570 rxbuf = &rxr->rx_buffers[i];
3571 if (rxbuf->m_head == NULL) {
3572 mh = m_gethdr(M_DONTWAIT, MT_DATA);
3575 mh->m_pkthdr.len = mh->m_len = MHLEN;
3577 mh->m_flags |= M_PKTHDR;
3578 m_adj(mh, ETHER_ALIGN);
3579 /* Get the memory mapping */
3580 error = bus_dmamap_load_mbuf_sg(rxr->htag,
3581 rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3583 printf("GET BUF: dmamap load"
3584 " failure - %d\n", error);
3589 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3590 BUS_DMASYNC_PREREAD);
3591 rxr->rx_base[i].read.hdr_addr =
3592 htole64(hseg[0].ds_addr);
3595 if (rxbuf->m_pack == NULL) {
3596 mp = m_getjcl(M_DONTWAIT, MT_DATA,
3597 M_PKTHDR, adapter->rx_mbuf_sz);
3600 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3601 /* Get the memory mapping */
3602 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3603 rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3605 printf("GET BUF: dmamap load"
3606 " failure - %d\n", error);
3611 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3612 BUS_DMASYNC_PREREAD);
3613 rxr->rx_base[i].read.pkt_addr =
3614 htole64(pseg[0].ds_addr);
3618 /* Calculate next index */
3619 if (++i == adapter->num_rx_desc)
3621 /* This is the work marker for refresh */
3622 rxr->next_to_refresh = i;
3625 if (cleaned != -1) /* If we refreshed some, bump tail */
3626 E1000_WRITE_REG(&adapter->hw,
3627 E1000_RDT(rxr->me), cleaned);
3632 /*********************************************************************
3634 * Allocate memory for rx_buffer structures. Since we use one
3635 * rx_buffer per received packet, the maximum number of rx_buffer's
3636 * that we'll need is equal to the number of receive descriptors
3637 * that we've allocated.
3639 **********************************************************************/
3641 igb_allocate_receive_buffers(struct rx_ring *rxr)
3643 struct adapter *adapter = rxr->adapter;
3644 device_t dev = adapter->dev;
3645 struct igb_rx_buf *rxbuf;
3646 int i, bsize, error;
3648 bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3649 if (!(rxr->rx_buffers =
3650 (struct igb_rx_buf *) malloc(bsize,
3651 M_DEVBUF, M_NOWAIT | M_ZERO))) {
3652 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3657 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3658 1, 0, /* alignment, bounds */
3659 BUS_SPACE_MAXADDR, /* lowaddr */
3660 BUS_SPACE_MAXADDR, /* highaddr */
3661 NULL, NULL, /* filter, filterarg */
3662 MSIZE, /* maxsize */
3664 MSIZE, /* maxsegsize */
3666 NULL, /* lockfunc */
3667 NULL, /* lockfuncarg */
3669 device_printf(dev, "Unable to create RX DMA tag\n");
3673 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3674 1, 0, /* alignment, bounds */
3675 BUS_SPACE_MAXADDR, /* lowaddr */
3676 BUS_SPACE_MAXADDR, /* highaddr */
3677 NULL, NULL, /* filter, filterarg */
3678 MJUMPAGESIZE, /* maxsize */
3680 MJUMPAGESIZE, /* maxsegsize */
3682 NULL, /* lockfunc */
3683 NULL, /* lockfuncarg */
3685 device_printf(dev, "Unable to create RX payload DMA tag\n");
3689 for (i = 0; i < adapter->num_rx_desc; i++) {
3690 rxbuf = &rxr->rx_buffers[i];
3691 error = bus_dmamap_create(rxr->htag,
3692 BUS_DMA_NOWAIT, &rxbuf->hmap);
3695 "Unable to create RX head DMA maps\n");
3698 error = bus_dmamap_create(rxr->ptag,
3699 BUS_DMA_NOWAIT, &rxbuf->pmap);
3702 "Unable to create RX packet DMA maps\n");
3710 /* Frees all, but can handle partial completion */
3711 igb_free_receive_structures(adapter);
3717 igb_free_receive_ring(struct rx_ring *rxr)
3719 struct adapter *adapter;
3720 struct igb_rx_buf *rxbuf;
3723 adapter = rxr->adapter;
3724 for (i = 0; i < adapter->num_rx_desc; i++) {
3725 rxbuf = &rxr->rx_buffers[i];
3726 if (rxbuf->m_head != NULL) {
3727 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3728 BUS_DMASYNC_POSTREAD);
3729 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3730 rxbuf->m_head->m_flags |= M_PKTHDR;
3731 m_freem(rxbuf->m_head);
3733 if (rxbuf->m_pack != NULL) {
3734 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3735 BUS_DMASYNC_POSTREAD);
3736 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3737 rxbuf->m_pack->m_flags |= M_PKTHDR;
3738 m_freem(rxbuf->m_pack);
3740 rxbuf->m_head = NULL;
3741 rxbuf->m_pack = NULL;
3746 /*********************************************************************
3748 * Initialize a receive ring and its buffers.
3750 **********************************************************************/
3752 igb_setup_receive_ring(struct rx_ring *rxr)
3754 struct adapter *adapter;
3757 struct igb_rx_buf *rxbuf;
3758 bus_dma_segment_t pseg[1], hseg[1];
3759 struct lro_ctrl *lro = &rxr->lro;
3760 int rsize, nsegs, error = 0;
3762 adapter = rxr->adapter;
3766 /* Clear the ring contents */
3768 rsize = roundup2(adapter->num_rx_desc *
3769 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3770 bzero((void *)rxr->rx_base, rsize);
3773 ** Free current RX buffer structures and their mbufs
3775 igb_free_receive_ring(rxr);
3777 /* Now replenish the ring mbufs */
3778 for (int j = 0; j != adapter->num_rx_desc; ++j) {
3779 struct mbuf *mh, *mp;
3781 rxbuf = &rxr->rx_buffers[j];
3783 /* First the header */
3784 rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3785 if (rxbuf->m_head == NULL)
3787 m_adj(rxbuf->m_head, ETHER_ALIGN);
3789 mh->m_len = mh->m_pkthdr.len = MHLEN;
3790 mh->m_flags |= M_PKTHDR;
3791 /* Get the memory mapping */
3792 error = bus_dmamap_load_mbuf_sg(rxr->htag,
3793 rxbuf->hmap, rxbuf->m_head, hseg,
3794 &nsegs, BUS_DMA_NOWAIT);
3795 if (error != 0) /* Nothing elegant to do here */
3797 bus_dmamap_sync(rxr->htag,
3798 rxbuf->hmap, BUS_DMASYNC_PREREAD);
3799 /* Update descriptor */
3800 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3802 /* Now the payload cluster */
3803 rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3804 M_PKTHDR, adapter->rx_mbuf_sz);
3805 if (rxbuf->m_pack == NULL)
3808 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3809 /* Get the memory mapping */
3810 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3811 rxbuf->pmap, mp, pseg,
3812 &nsegs, BUS_DMA_NOWAIT);
3815 bus_dmamap_sync(rxr->ptag,
3816 rxbuf->pmap, BUS_DMASYNC_PREREAD);
3817 /* Update descriptor */
3818 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3821 /* Setup our descriptor indices */
3822 rxr->next_to_check = 0;
3823 rxr->next_to_refresh = 0;
3824 rxr->lro_enabled = FALSE;
3826 if (igb_header_split)
3827 rxr->hdr_split = TRUE;
3829 ifp->if_capabilities &= ~IFCAP_LRO;
3833 rxr->discard = FALSE;
3835 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3836 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3839 ** Now set up the LRO interface, we
3840 ** also only do head split when LRO
3841 ** is enabled, since so often they
3842 ** are undesireable in similar setups.
3844 if (ifp->if_capenable & IFCAP_LRO) {
3845 int err = tcp_lro_init(lro);
3847 device_printf(dev, "LRO Initialization failed!\n");
3850 INIT_DEBUGOUT("RX LRO Initialized\n");
3851 rxr->lro_enabled = TRUE;
3852 lro->ifp = adapter->ifp;
3859 igb_free_receive_ring(rxr);
3864 /*********************************************************************
3866 * Initialize all receive rings.
3868 **********************************************************************/
3870 igb_setup_receive_structures(struct adapter *adapter)
3872 struct rx_ring *rxr = adapter->rx_rings;
3875 for (i = 0; i < adapter->num_queues; i++, rxr++)
3876 if (igb_setup_receive_ring(rxr))
3882 * Free RX buffers allocated so far, we will only handle
3883 * the rings that completed, the failing case will have
3884 * cleaned up for itself. The value of 'i' will be the
3885 * failed ring so we must pre-decrement it.
3887 rxr = adapter->rx_rings;
3888 for (--i; i > 0; i--, rxr++) {
3889 for (j = 0; j < adapter->num_rx_desc; j++)
3890 igb_free_receive_ring(rxr);
3896 /*********************************************************************
3898 * Enable receive unit.
3900 **********************************************************************/
3902 igb_initialize_receive_units(struct adapter *adapter)
3904 struct rx_ring *rxr = adapter->rx_rings;
3905 struct ifnet *ifp = adapter->ifp;
3906 struct e1000_hw *hw = &adapter->hw;
3907 u32 rctl, rxcsum, psize, srrctl = 0;
3909 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3912 * Make sure receives are disabled while setting
3913 * up the descriptor ring
3915 rctl = E1000_READ_REG(hw, E1000_RCTL);
3916 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3919 ** Set up for header split
3921 if (rxr->hdr_split) {
3922 /* Use a standard mbuf for the header */
3923 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3924 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3926 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3929 ** Set up for jumbo frames
3931 if (ifp->if_mtu > ETHERMTU) {
3932 rctl |= E1000_RCTL_LPE;
3933 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3934 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3936 /* Set maximum packet len */
3937 psize = adapter->max_frame_size;
3938 /* are we on a vlan? */
3939 if (adapter->ifp->if_vlantrunk != NULL)
3940 psize += VLAN_TAG_SIZE;
3941 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3943 rctl &= ~E1000_RCTL_LPE;
3944 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3945 rctl |= E1000_RCTL_SZ_2048;
3948 /* Setup the Base and Length of the Rx Descriptor Rings */
3949 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3950 u64 bus_addr = rxr->rxdma.dma_paddr;
3953 E1000_WRITE_REG(hw, E1000_RDLEN(i),
3954 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3955 E1000_WRITE_REG(hw, E1000_RDBAH(i),
3956 (uint32_t)(bus_addr >> 32));
3957 E1000_WRITE_REG(hw, E1000_RDBAL(i),
3958 (uint32_t)bus_addr);
3959 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3960 /* Enable this Queue */
3961 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3962 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3963 rxdctl &= 0xFFF00000;
3964 rxdctl |= IGB_RX_PTHRESH;
3965 rxdctl |= IGB_RX_HTHRESH << 8;
3966 rxdctl |= IGB_RX_WTHRESH << 16;
3967 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3971 ** Setup for RX MultiQueue
3973 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3974 if (adapter->num_queues >1) {
3975 u32 random[10], mrqc, shift = 0;
3981 arc4rand(&random, sizeof(random), 0);
3982 if (adapter->hw.mac.type == e1000_82575)
3984 /* Warning FM follows */
3985 for (int i = 0; i < 128; i++) {
3987 (i % adapter->num_queues) << shift;
3990 E1000_RETA(i >> 2), reta.dword);
3992 /* Now fill in hash table */
3993 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3994 for (int i = 0; i < 10; i++)
3995 E1000_WRITE_REG_ARRAY(hw,
3996 E1000_RSSRK(0), i, random[i]);
3998 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3999 E1000_MRQC_RSS_FIELD_IPV4_TCP);
4000 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4001 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4002 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4003 E1000_MRQC_RSS_FIELD_IPV6_UDP);
4004 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4005 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4007 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4010 ** NOTE: Receive Full-Packet Checksum Offload
4011 ** is mutually exclusive with Multiqueue. However
4012 ** this is not the same as TCP/IP checksums which
4015 rxcsum |= E1000_RXCSUM_PCSD;
4016 #if __FreeBSD_version >= 800000
4017 /* For SCTP Offload */
4018 if ((hw->mac.type == e1000_82576)
4019 && (ifp->if_capenable & IFCAP_RXCSUM))
4020 rxcsum |= E1000_RXCSUM_CRCOFL;
4024 if (ifp->if_capenable & IFCAP_RXCSUM) {
4025 rxcsum |= E1000_RXCSUM_IPPCSE;
4026 #if __FreeBSD_version >= 800000
4027 if (adapter->hw.mac.type == e1000_82576)
4028 rxcsum |= E1000_RXCSUM_CRCOFL;
4031 rxcsum &= ~E1000_RXCSUM_TUOFL;
4033 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4035 /* Setup the Receive Control Register */
4036 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4037 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4038 E1000_RCTL_RDMTS_HALF |
4039 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4040 /* Strip CRC bytes. */
4041 rctl |= E1000_RCTL_SECRC;
4042 /* Make sure VLAN Filters are off */
4043 rctl &= ~E1000_RCTL_VFE;
4044 /* Don't store bad packets */
4045 rctl &= ~E1000_RCTL_SBP;
4047 /* Enable Receives */
4048 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4051 * Setup the HW Rx Head and Tail Descriptor Pointers
4052 * - needs to be after enable
4054 for (int i = 0; i < adapter->num_queues; i++) {
4055 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4056 E1000_WRITE_REG(hw, E1000_RDT(i),
4057 adapter->num_rx_desc - 1);
4062 /*********************************************************************
4064 * Free receive rings.
4066 **********************************************************************/
4068 igb_free_receive_structures(struct adapter *adapter)
4070 struct rx_ring *rxr = adapter->rx_rings;
4072 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4073 struct lro_ctrl *lro = &rxr->lro;
4074 igb_free_receive_buffers(rxr);
4076 igb_dma_free(adapter, &rxr->rxdma);
4079 free(adapter->rx_rings, M_DEVBUF);
4082 /*********************************************************************
4084 * Free receive ring data structures.
4086 **********************************************************************/
4088 igb_free_receive_buffers(struct rx_ring *rxr)
4090 struct adapter *adapter = rxr->adapter;
4091 struct igb_rx_buf *rxbuf;
4094 INIT_DEBUGOUT("free_receive_structures: begin");
4096 /* Cleanup any existing buffers */
4097 if (rxr->rx_buffers != NULL) {
4098 for (i = 0; i < adapter->num_rx_desc; i++) {
4099 rxbuf = &rxr->rx_buffers[i];
4100 if (rxbuf->m_head != NULL) {
4101 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4102 BUS_DMASYNC_POSTREAD);
4103 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4104 rxbuf->m_head->m_flags |= M_PKTHDR;
4105 m_freem(rxbuf->m_head);
4107 if (rxbuf->m_pack != NULL) {
4108 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4109 BUS_DMASYNC_POSTREAD);
4110 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4111 rxbuf->m_pack->m_flags |= M_PKTHDR;
4112 m_freem(rxbuf->m_pack);
4114 rxbuf->m_head = NULL;
4115 rxbuf->m_pack = NULL;
4116 if (rxbuf->hmap != NULL) {
4117 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4120 if (rxbuf->pmap != NULL) {
4121 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4125 if (rxr->rx_buffers != NULL) {
4126 free(rxr->rx_buffers, M_DEVBUF);
4127 rxr->rx_buffers = NULL;
4131 if (rxr->htag != NULL) {
4132 bus_dma_tag_destroy(rxr->htag);
4135 if (rxr->ptag != NULL) {
4136 bus_dma_tag_destroy(rxr->ptag);
4141 static __inline void
4142 igb_rx_discard(struct rx_ring *rxr, int i)
4144 struct adapter *adapter = rxr->adapter;
4145 struct igb_rx_buf *rbuf;
4146 struct mbuf *mh, *mp;
4148 rbuf = &rxr->rx_buffers[i];
4149 if (rxr->fmp != NULL) {
4150 rxr->fmp->m_flags |= M_PKTHDR;
4159 /* Reuse loaded DMA map and just update mbuf chain */
4161 mh->m_flags |= M_PKTHDR;
4164 mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4165 mp->m_data = mp->m_ext.ext_buf;
4170 static __inline void
4171 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4175 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4176 * should be computed by hardware. Also it should not have VLAN tag in
4179 if (rxr->lro_enabled &&
4180 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4181 (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4182 (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4183 (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4184 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4185 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4187 * Send to the stack if:
4188 ** - LRO not enabled, or
4189 ** - no LRO resources, or
4190 ** - lro enqueue fails
4192 if (rxr->lro.lro_cnt != 0)
4193 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4196 (*ifp->if_input)(ifp, m);
4199 /*********************************************************************
4201 * This routine executes in interrupt context. It replenishes
4202 * the mbufs in the descriptor and sends data which has been
4203 * dma'ed into host memory to upper layer.
4205 * We loop at most count times if count is > 0, or until done if
4208 * Return TRUE if more to clean, FALSE otherwise
4209 *********************************************************************/
4211 igb_rxeof(struct igb_queue *que, int count)
4213 struct adapter *adapter = que->adapter;
4214 struct rx_ring *rxr = que->rxr;
4215 struct ifnet *ifp = adapter->ifp;
4216 struct lro_ctrl *lro = &rxr->lro;
4217 struct lro_entry *queued;
4218 int i, processed = 0;
4219 u32 ptype, staterr = 0;
4220 union e1000_adv_rx_desc *cur;
4223 /* Sync the ring. */
4224 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4225 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4227 /* Main clean loop */
4228 for (i = rxr->next_to_check; count != 0;) {
4229 struct mbuf *sendmp, *mh, *mp;
4230 struct igb_rx_buf *rxbuf;
4231 u16 hlen, plen, hdr, vtag;
4234 cur = &rxr->rx_base[i];
4235 staterr = le32toh(cur->wb.upper.status_error);
4236 if ((staterr & E1000_RXD_STAT_DD) == 0)
4238 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4241 sendmp = mh = mp = NULL;
4242 cur->wb.upper.status_error = 0;
4243 rxbuf = &rxr->rx_buffers[i];
4244 plen = le16toh(cur->wb.upper.length);
4245 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4246 vtag = le16toh(cur->wb.upper.vlan);
4247 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4248 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4250 /* Make sure all segments of a bad packet are discarded */
4251 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4254 ++rxr->rx_discarded;
4255 if (!eop) /* Catch subsequent segs */
4256 rxr->discard = TRUE;
4258 rxr->discard = FALSE;
4259 igb_rx_discard(rxr, i);
4264 ** The way the hardware is configured to
4265 ** split, it will ONLY use the header buffer
4266 ** when header split is enabled, otherwise we
4267 ** get normal behavior, ie, both header and
4268 ** payload are DMA'd into the payload buffer.
4270 ** The fmp test is to catch the case where a
4271 ** packet spans multiple descriptors, in that
4272 ** case only the first header is valid.
4274 if (rxr->hdr_split && rxr->fmp == NULL) {
4275 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4276 E1000_RXDADV_HDRBUFLEN_SHIFT;
4277 if (hlen > IGB_HDR_BUF)
4279 /* Handle the header mbuf */
4280 mh = rxr->rx_buffers[i].m_head;
4282 /* clear buf info for refresh */
4283 rxbuf->m_head = NULL;
4285 ** Get the payload length, this
4286 ** could be zero if its a small
4290 mp = rxr->rx_buffers[i].m_pack;
4293 /* clear buf info for refresh */
4294 rxbuf->m_pack = NULL;
4295 rxr->rx_split_packets++;
4299 ** Either no header split, or a
4300 ** secondary piece of a fragmented
4303 mh = rxr->rx_buffers[i].m_pack;
4305 /* clear buf info for refresh */
4306 rxbuf->m_pack = NULL;
4309 ++processed; /* So we know when to refresh */
4311 /* Initial frame - setup */
4312 if (rxr->fmp == NULL) {
4313 mh->m_pkthdr.len = mh->m_len;
4314 /* Store the first mbuf */
4318 /* Add payload if split */
4319 mh->m_pkthdr.len += mp->m_len;
4320 rxr->lmp = mh->m_next;
4323 /* Chain mbuf's together */
4324 rxr->lmp->m_next = mh;
4325 rxr->lmp = rxr->lmp->m_next;
4326 rxr->fmp->m_pkthdr.len += mh->m_len;
4330 rxr->fmp->m_pkthdr.rcvif = ifp;
4333 /* capture data for AIM */
4335 rxr->bytes += rxr->fmp->m_pkthdr.len;
4336 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4338 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4339 igb_rx_checksum(staterr, rxr->fmp, ptype);
4341 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4342 (staterr & E1000_RXD_STAT_VP) != 0) {
4343 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4344 rxr->fmp->m_flags |= M_VLANTAG;
4346 #if __FreeBSD_version >= 800000
4347 rxr->fmp->m_pkthdr.flowid = que->msix;
4348 rxr->fmp->m_flags |= M_FLOWID;
4351 /* Make sure to set M_PKTHDR. */
4352 sendmp->m_flags |= M_PKTHDR;
4358 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4359 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4361 /* Advance our pointers to the next descriptor. */
4362 if (++i == adapter->num_rx_desc)
4365 ** Send to the stack or LRO
4368 igb_rx_input(rxr, ifp, sendmp, ptype);
4370 /* Every 8 descriptors we go to refresh mbufs */
4371 if (processed == 8) {
4372 igb_refresh_mbufs(rxr, i);
4377 /* Catch any remainders */
4378 if (processed != 0) {
4379 igb_refresh_mbufs(rxr, i);
4383 rxr->next_to_check = i;
4386 * Flush any outstanding LRO work
4388 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4389 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4390 tcp_lro_flush(lro, queued);
4396 ** We still have cleaning to do?
4397 ** Schedule another interrupt if so.
4399 if ((staterr & E1000_RXD_STAT_DD) != 0)
4405 /*********************************************************************
4407 * Verify that the hardware indicated that the checksum is valid.
4408 * Inform the stack about the status of checksum so that stack
4409 * doesn't spend time verifying the checksum.
4411 *********************************************************************/
4413 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4415 u16 status = (u16)staterr;
4416 u8 errors = (u8) (staterr >> 24);
4419 /* Ignore Checksum bit is set */
4420 if (status & E1000_RXD_STAT_IXSM) {
4421 mp->m_pkthdr.csum_flags = 0;
4425 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4426 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4430 if (status & E1000_RXD_STAT_IPCS) {
4432 if (!(errors & E1000_RXD_ERR_IPE)) {
4433 /* IP Checksum Good */
4434 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4435 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4437 mp->m_pkthdr.csum_flags = 0;
4440 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4441 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4442 #if __FreeBSD_version >= 800000
4443 if (sctp) /* reassign */
4444 type = CSUM_SCTP_VALID;
4447 if (!(errors & E1000_RXD_ERR_TCPE)) {
4448 mp->m_pkthdr.csum_flags |= type;
4450 mp->m_pkthdr.csum_data = htons(0xffff);
4457 * This routine is run via an vlan
4461 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4463 struct adapter *adapter = ifp->if_softc;
4466 if (ifp->if_softc != arg) /* Not our event */
4469 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4472 index = (vtag >> 5) & 0x7F;
4474 igb_shadow_vfta[index] |= (1 << bit);
4475 ++adapter->num_vlans;
4476 /* Re-init to load the changes */
4481 * This routine is run via an vlan
4485 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4487 struct adapter *adapter = ifp->if_softc;
4490 if (ifp->if_softc != arg)
4493 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4496 index = (vtag >> 5) & 0x7F;
4498 igb_shadow_vfta[index] &= ~(1 << bit);
4499 --adapter->num_vlans;
4500 /* Re-init to load the changes */
4505 igb_setup_vlan_hw_support(struct adapter *adapter)
4507 struct e1000_hw *hw = &adapter->hw;
4511 ** We get here thru init_locked, meaning
4512 ** a soft reset, this has already cleared
4513 ** the VFTA and other state, so if there
4514 ** have been no vlan's registered do nothing.
4516 if (adapter->num_vlans == 0)
4520 ** A soft reset zero's out the VFTA, so
4521 ** we need to repopulate it now.
4523 for (int i = 0; i < IGB_VFTA_SIZE; i++)
4524 if (igb_shadow_vfta[i] != 0)
4525 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4526 i, igb_shadow_vfta[i]);
4528 reg = E1000_READ_REG(hw, E1000_CTRL);
4529 reg |= E1000_CTRL_VME;
4530 E1000_WRITE_REG(hw, E1000_CTRL, reg);
4532 /* Enable the Filter Table */
4533 reg = E1000_READ_REG(hw, E1000_RCTL);
4534 reg &= ~E1000_RCTL_CFIEN;
4535 reg |= E1000_RCTL_VFE;
4536 E1000_WRITE_REG(hw, E1000_RCTL, reg);
4538 /* Update the frame size */
4539 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4540 adapter->max_frame_size + VLAN_TAG_SIZE);
4544 igb_enable_intr(struct adapter *adapter)
4546 /* With RSS set up what to auto clear */
4547 if (adapter->msix_mem) {
4548 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4549 adapter->eims_mask);
4550 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4551 adapter->eims_mask);
4552 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4553 adapter->eims_mask);
4554 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4557 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4560 E1000_WRITE_FLUSH(&adapter->hw);
4566 igb_disable_intr(struct adapter *adapter)
4568 if (adapter->msix_mem) {
4569 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4570 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4572 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4573 E1000_WRITE_FLUSH(&adapter->hw);
4578 * Bit of a misnomer, what this really means is
4579 * to enable OS management of the system... aka
4580 * to disable special hardware management features
4583 igb_init_manageability(struct adapter *adapter)
4585 if (adapter->has_manage) {
4586 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4587 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4589 /* disable hardware interception of ARP */
4590 manc &= ~(E1000_MANC_ARP_EN);
4592 /* enable receiving management packets to the host */
4593 manc |= E1000_MANC_EN_MNG2HOST;
4594 manc2h |= 1 << 5; /* Mng Port 623 */
4595 manc2h |= 1 << 6; /* Mng Port 664 */
4596 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4597 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4602 * Give control back to hardware management
4603 * controller if there is one.
4606 igb_release_manageability(struct adapter *adapter)
4608 if (adapter->has_manage) {
4609 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4611 /* re-enable hardware interception of ARP */
4612 manc |= E1000_MANC_ARP_EN;
4613 manc &= ~E1000_MANC_EN_MNG2HOST;
4615 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4620 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4621 * For ASF and Pass Through versions of f/w this means that
4622 * the driver is loaded.
4626 igb_get_hw_control(struct adapter *adapter)
4630 /* Let firmware know the driver has taken over */
4631 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4632 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4633 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4637 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4638 * For ASF and Pass Through versions of f/w this means that the
4639 * driver is no longer loaded.
4643 igb_release_hw_control(struct adapter *adapter)
4647 /* Let firmware taken over control of h/w */
4648 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4649 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4650 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4654 igb_is_valid_ether_addr(uint8_t *addr)
4656 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4658 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4667 * Enable PCI Wake On Lan capability
4670 igb_enable_wakeup(device_t dev)
4675 /* First find the capabilities pointer*/
4676 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4677 /* Read the PM Capabilities */
4678 id = pci_read_config(dev, cap, 1);
4679 if (id != PCIY_PMG) /* Something wrong */
4681 /* OK, we have the power capabilities, so
4682 now get the status register */
4683 cap += PCIR_POWER_STATUS;
4684 status = pci_read_config(dev, cap, 2);
4685 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4686 pci_write_config(dev, cap, status, 2);
4691 igb_led_func(void *arg, int onoff)
4693 struct adapter *adapter = arg;
4695 IGB_CORE_LOCK(adapter);
4697 e1000_setup_led(&adapter->hw);
4698 e1000_led_on(&adapter->hw);
4700 e1000_led_off(&adapter->hw);
4701 e1000_cleanup_led(&adapter->hw);
4703 IGB_CORE_UNLOCK(adapter);
4706 /**********************************************************************
4708 * Update the board statistics counters.
4710 **********************************************************************/
4712 igb_update_stats_counters(struct adapter *adapter)
4716 if (adapter->hw.phy.media_type == e1000_media_type_copper ||
4717 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4718 adapter->stats.symerrs +=
4719 E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4720 adapter->stats.sec +=
4721 E1000_READ_REG(&adapter->hw, E1000_SEC);
4723 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4724 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4725 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4726 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4728 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4729 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4730 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4731 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4732 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4733 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4734 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4735 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4736 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4737 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4738 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4739 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4740 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4741 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4742 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4743 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4744 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4745 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4746 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4747 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4749 /* For the 64-bit byte counters the low dword must be read first. */
4750 /* Both registers clear on the read of the high dword */
4752 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4753 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4755 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4756 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4757 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4758 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4759 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4761 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4762 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4764 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4765 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4766 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4767 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4768 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4769 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4770 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4771 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4772 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4773 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4775 adapter->stats.algnerrc +=
4776 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4777 adapter->stats.rxerrc +=
4778 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4779 adapter->stats.tncrs +=
4780 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4781 adapter->stats.cexterr +=
4782 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4783 adapter->stats.tsctc +=
4784 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4785 adapter->stats.tsctfc +=
4786 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4789 ifp->if_collisions = adapter->stats.colc;
4792 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4793 adapter->stats.crcerrs + adapter->stats.algnerrc +
4794 adapter->stats.ruc + adapter->stats.roc +
4795 adapter->stats.mpc + adapter->stats.cexterr;
4798 ifp->if_oerrors = adapter->stats.ecol +
4799 adapter->stats.latecol + adapter->watchdog_events;
4803 /**********************************************************************
4805 * This routine is called only when igb_display_debug_stats is enabled.
4806 * This routine provides a way to take a look at important statistics
4807 * maintained by the driver and hardware.
4809 **********************************************************************/
4811 igb_print_debug_info(struct adapter *adapter)
4813 device_t dev = adapter->dev;
4814 struct igb_queue *que = adapter->queues;
4815 struct rx_ring *rxr = adapter->rx_rings;
4816 struct tx_ring *txr = adapter->tx_rings;
4817 uint8_t *hw_addr = adapter->hw.hw_addr;
4819 device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4820 device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4821 E1000_READ_REG(&adapter->hw, E1000_CTRL),
4822 E1000_READ_REG(&adapter->hw, E1000_RCTL));
4824 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4825 device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4826 E1000_READ_REG(&adapter->hw, E1000_IMS),
4827 E1000_READ_REG(&adapter->hw, E1000_EIMS));
4830 device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4831 ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4832 (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4833 device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4834 adapter->hw.fc.high_water,
4835 adapter->hw.fc.low_water);
4837 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4838 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d ", i,
4839 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4840 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4841 device_printf(dev, "rdh = %d, rdt = %d\n",
4842 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4843 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4844 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4845 txr->me, (long long)txr->no_desc_avail);
4846 device_printf(dev, "TX(%d) Packets sent = %lld\n",
4847 txr->me, (long long)txr->tx_packets);
4848 device_printf(dev, "RX(%d) Packets received = %lld ",
4849 rxr->me, (long long)rxr->rx_packets);
4852 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4853 struct lro_ctrl *lro = &rxr->lro;
4854 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4855 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4856 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4857 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4858 (long long)rxr->rx_packets);
4859 device_printf(dev, " Split Packets = %lld ",
4860 (long long)rxr->rx_split_packets);
4861 device_printf(dev, " Byte count = %lld\n",
4862 (long long)rxr->rx_bytes);
4863 device_printf(dev,"RX(%d) LRO Queued= %d ",
4864 i, lro->lro_queued);
4865 device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4868 for (int i = 0; i < adapter->num_queues; i++, que++)
4869 device_printf(dev,"QUE(%d) IRQs = %llx\n",
4870 i, (long long)que->irqs);
4872 device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4873 device_printf(dev, "Mbuf defrag failed = %ld\n",
4874 adapter->mbuf_defrag_failed);
4875 device_printf(dev, "Std mbuf header failed = %ld\n",
4876 adapter->mbuf_header_failed);
4877 device_printf(dev, "Std mbuf packet failed = %ld\n",
4878 adapter->mbuf_packet_failed);
4879 device_printf(dev, "Driver dropped packets = %ld\n",
4880 adapter->dropped_pkts);
4881 device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4882 adapter->no_tx_dma_setup);
4886 igb_print_hw_stats(struct adapter *adapter)
4888 device_t dev = adapter->dev;
4890 device_printf(dev, "Excessive collisions = %lld\n",
4891 (long long)adapter->stats.ecol);
4892 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4893 device_printf(dev, "Symbol errors = %lld\n",
4894 (long long)adapter->stats.symerrs);
4896 device_printf(dev, "Sequence errors = %lld\n",
4897 (long long)adapter->stats.sec);
4898 device_printf(dev, "Defer count = %lld\n",
4899 (long long)adapter->stats.dc);
4900 device_printf(dev, "Missed Packets = %lld\n",
4901 (long long)adapter->stats.mpc);
4902 device_printf(dev, "Receive No Buffers = %lld\n",
4903 (long long)adapter->stats.rnbc);
4904 /* RLEC is inaccurate on some hardware, calculate our own. */
4905 device_printf(dev, "Receive Length Errors = %lld\n",
4906 ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4907 device_printf(dev, "Receive errors = %lld\n",
4908 (long long)adapter->stats.rxerrc);
4909 device_printf(dev, "Crc errors = %lld\n",
4910 (long long)adapter->stats.crcerrs);
4911 device_printf(dev, "Alignment errors = %lld\n",
4912 (long long)adapter->stats.algnerrc);
4913 /* On 82575 these are collision counts */
4914 device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4915 (long long)adapter->stats.cexterr);
4916 device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4917 device_printf(dev, "watchdog timeouts = %ld\n",
4918 adapter->watchdog_events);
4919 device_printf(dev, "XON Rcvd = %lld\n",
4920 (long long)adapter->stats.xonrxc);
4921 device_printf(dev, "XON Xmtd = %lld\n",
4922 (long long)adapter->stats.xontxc);
4923 device_printf(dev, "XOFF Rcvd = %lld\n",
4924 (long long)adapter->stats.xoffrxc);
4925 device_printf(dev, "XOFF Xmtd = %lld\n",
4926 (long long)adapter->stats.xofftxc);
4927 device_printf(dev, "Good Packets Rcvd = %lld\n",
4928 (long long)adapter->stats.gprc);
4929 device_printf(dev, "Good Packets Xmtd = %lld\n",
4930 (long long)adapter->stats.gptc);
4931 device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4932 (long long)adapter->stats.tsctc);
4933 device_printf(dev, "TSO Contexts Failed = %lld\n",
4934 (long long)adapter->stats.tsctfc);
4937 /**********************************************************************
4939 * This routine provides a way to dump out the adapter eeprom,
4940 * often a useful debug/service tool. This only dumps the first
4941 * 32 words, stuff that matters is in that extent.
4943 **********************************************************************/
4945 igb_print_nvm_info(struct adapter *adapter)
4950 /* Its a bit crude, but it gets the job done */
4951 printf("\nInterface EEPROM Dump:\n");
4952 printf("Offset\n0x0000 ");
4953 for (i = 0, j = 0; i < 32; i++, j++) {
4954 if (j == 8) { /* Make the offset block */
4956 printf("\n0x00%x0 ",row);
4958 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4959 printf("%04x ", eeprom_data);
4965 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4967 struct adapter *adapter;
4972 error = sysctl_handle_int(oidp, &result, 0, req);
4974 if (error || !req->newptr)
4978 adapter = (struct adapter *)arg1;
4979 igb_print_debug_info(adapter);
4982 * This value will cause a hex dump of the
4983 * first 32 16-bit words of the EEPROM to
4987 adapter = (struct adapter *)arg1;
4988 igb_print_nvm_info(adapter);
4996 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4998 struct adapter *adapter;
5003 error = sysctl_handle_int(oidp, &result, 0, req);
5005 if (error || !req->newptr)
5009 adapter = (struct adapter *)arg1;
5010 igb_print_hw_stats(adapter);
5017 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5018 const char *description, int *limit, int value)
5021 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5022 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5023 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);