1 /******************************************************************************
3 Copyright (c) 2001-2010, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
53 #include <sys/module.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
62 #include <machine/smp.h>
63 #include <machine/bus.h>
64 #include <machine/resource.h>
67 #include <net/ethernet.h>
69 #include <net/if_arp.h>
70 #include <net/if_dl.h>
71 #include <net/if_media.h>
73 #include <net/if_types.h>
74 #include <net/if_vlan_var.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip6.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_lro.h>
83 #include <netinet/udp.h>
85 #include <machine/in_cksum.h>
86 #include <dev/led/led.h>
87 #include <dev/pci/pcivar.h>
88 #include <dev/pci/pcireg.h>
90 #include "e1000_api.h"
91 #include "e1000_82575.h"
94 /*********************************************************************
95 * Set this to one to display debug statistics
96 *********************************************************************/
97 int igb_display_debug_stats = 0;
99 /*********************************************************************
101 *********************************************************************/
102 char igb_driver_version[] = "version - 1.9.5";
105 /*********************************************************************
106 * PCI Device ID Table
108 * Used by probe to select devices to load on
109 * Last field stores an index into e1000_strings
110 * Last entry must be all 0s
112 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113 *********************************************************************/
115 static igb_vendor_info_t igb_vendor_info_array[] =
117 { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
118 { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119 PCI_ANY_ID, PCI_ANY_ID, 0},
120 { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121 PCI_ANY_ID, PCI_ANY_ID, 0},
122 { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0},
123 { 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0},
124 { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
125 { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
126 { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
127 { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128 PCI_ANY_ID, PCI_ANY_ID, 0},
129 { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130 PCI_ANY_ID, PCI_ANY_ID, 0},
131 { 0x8086, E1000_DEV_ID_82580_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82580_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82580_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
134 { 0x8086, E1000_DEV_ID_82580_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
135 { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
136 PCI_ANY_ID, PCI_ANY_ID, 0},
137 /* required last entry */
141 /*********************************************************************
142 * Table of branding strings for all supported NICs.
143 *********************************************************************/
145 static char *igb_strings[] = {
146 "Intel(R) PRO/1000 Network Connection"
149 /*********************************************************************
150 * Function prototypes
151 *********************************************************************/
152 static int igb_probe(device_t);
153 static int igb_attach(device_t);
154 static int igb_detach(device_t);
155 static int igb_shutdown(device_t);
156 static int igb_suspend(device_t);
157 static int igb_resume(device_t);
158 static void igb_start(struct ifnet *);
159 static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
160 #if __FreeBSD_version >= 800000
161 static int igb_mq_start(struct ifnet *, struct mbuf *);
162 static int igb_mq_start_locked(struct ifnet *,
163 struct tx_ring *, struct mbuf *);
164 static void igb_qflush(struct ifnet *);
166 static int igb_ioctl(struct ifnet *, u_long, caddr_t);
167 static void igb_init(void *);
168 static void igb_init_locked(struct adapter *);
169 static void igb_stop(void *);
170 static void igb_media_status(struct ifnet *, struct ifmediareq *);
171 static int igb_media_change(struct ifnet *);
172 static void igb_identify_hardware(struct adapter *);
173 static int igb_allocate_pci_resources(struct adapter *);
174 static int igb_allocate_msix(struct adapter *);
175 static int igb_allocate_legacy(struct adapter *);
176 static int igb_setup_msix(struct adapter *);
177 static void igb_free_pci_resources(struct adapter *);
178 static void igb_local_timer(void *);
179 static void igb_reset(struct adapter *);
180 static void igb_setup_interface(device_t, struct adapter *);
181 static int igb_allocate_queues(struct adapter *);
182 static void igb_configure_queues(struct adapter *);
184 static int igb_allocate_transmit_buffers(struct tx_ring *);
185 static void igb_setup_transmit_structures(struct adapter *);
186 static void igb_setup_transmit_ring(struct tx_ring *);
187 static void igb_initialize_transmit_units(struct adapter *);
188 static void igb_free_transmit_structures(struct adapter *);
189 static void igb_free_transmit_buffers(struct tx_ring *);
191 static int igb_allocate_receive_buffers(struct rx_ring *);
192 static int igb_setup_receive_structures(struct adapter *);
193 static int igb_setup_receive_ring(struct rx_ring *);
194 static void igb_initialize_receive_units(struct adapter *);
195 static void igb_free_receive_structures(struct adapter *);
196 static void igb_free_receive_buffers(struct rx_ring *);
197 static void igb_free_receive_ring(struct rx_ring *);
199 static void igb_enable_intr(struct adapter *);
200 static void igb_disable_intr(struct adapter *);
201 static void igb_update_stats_counters(struct adapter *);
202 static bool igb_txeof(struct tx_ring *);
204 static __inline void igb_rx_discard(struct rx_ring *, int);
205 static __inline void igb_rx_input(struct rx_ring *,
206 struct ifnet *, struct mbuf *, u32);
208 static bool igb_rxeof(struct igb_queue *, int);
209 static void igb_rx_checksum(u32, struct mbuf *, u32);
210 static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
211 static bool igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
212 static void igb_set_promisc(struct adapter *);
213 static void igb_disable_promisc(struct adapter *);
214 static void igb_set_multi(struct adapter *);
215 static void igb_print_hw_stats(struct adapter *);
216 static void igb_update_link_status(struct adapter *);
217 static void igb_refresh_mbufs(struct rx_ring *, int);
219 static void igb_register_vlan(void *, struct ifnet *, u16);
220 static void igb_unregister_vlan(void *, struct ifnet *, u16);
221 static void igb_setup_vlan_hw_support(struct adapter *);
223 static int igb_xmit(struct tx_ring *, struct mbuf **);
224 static int igb_dma_malloc(struct adapter *, bus_size_t,
225 struct igb_dma_alloc *, int);
226 static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
227 static void igb_print_debug_info(struct adapter *);
228 static void igb_print_nvm_info(struct adapter *);
229 static int igb_is_valid_ether_addr(u8 *);
230 static int igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
231 static int igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
232 /* Management and WOL Support */
233 static void igb_init_manageability(struct adapter *);
234 static void igb_release_manageability(struct adapter *);
235 static void igb_get_hw_control(struct adapter *);
236 static void igb_release_hw_control(struct adapter *);
237 static void igb_enable_wakeup(device_t);
238 static void igb_led_func(void *, int);
240 static int igb_irq_fast(void *);
241 static void igb_add_rx_process_limit(struct adapter *, const char *,
242 const char *, int *, int);
243 static void igb_handle_rxtx(void *context, int pending);
244 static void igb_handle_que(void *context, int pending);
245 static void igb_handle_link(void *context, int pending);
247 /* These are MSIX only irq handlers */
248 static void igb_msix_que(void *);
249 static void igb_msix_link(void *);
251 #ifdef DEVICE_POLLING
252 static poll_handler_t igb_poll;
255 /*********************************************************************
256 * FreeBSD Device Interface Entry Points
257 *********************************************************************/
259 static device_method_t igb_methods[] = {
260 /* Device interface */
261 DEVMETHOD(device_probe, igb_probe),
262 DEVMETHOD(device_attach, igb_attach),
263 DEVMETHOD(device_detach, igb_detach),
264 DEVMETHOD(device_shutdown, igb_shutdown),
265 DEVMETHOD(device_suspend, igb_suspend),
266 DEVMETHOD(device_resume, igb_resume),
270 static driver_t igb_driver = {
271 "igb", igb_methods, sizeof(struct adapter),
274 static devclass_t igb_devclass;
275 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
276 MODULE_DEPEND(igb, pci, 1, 1, 1);
277 MODULE_DEPEND(igb, ether, 1, 1, 1);
279 /*********************************************************************
280 * Tunable default values.
281 *********************************************************************/
283 /* Descriptor defaults */
284 static int igb_rxd = IGB_DEFAULT_RXD;
285 static int igb_txd = IGB_DEFAULT_TXD;
286 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
287 TUNABLE_INT("hw.igb.txd", &igb_txd);
290 ** AIM: Adaptive Interrupt Moderation
291 ** which means that the interrupt rate
292 ** is varied over time based on the
293 ** traffic for that interrupt vector
295 static int igb_enable_aim = TRUE;
296 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
299 * MSIX should be the default for best performance,
300 * but this allows it to be forced off for testing.
302 static int igb_enable_msix = 1;
303 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
306 * Header split has seemed to be beneficial in
307 * many circumstances tested, however there have
308 * been some stability issues, so the default is
311 static bool igb_header_split = FALSE;
312 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
315 ** This will autoconfigure based on
316 ** the number of CPUs if left at 0.
318 static int igb_num_queues = 0;
319 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
321 /* How many packets rxeof tries to clean at a time */
322 static int igb_rx_process_limit = 100;
323 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
325 /* Flow control setting - default to FULL */
326 static int igb_fc_setting = e1000_fc_full;
327 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
330 ** Shadow VFTA table, this is needed because
331 ** the real filter table gets cleared during
332 ** a soft reset and the driver needs to be able
335 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
338 /*********************************************************************
339 * Device identification routine
341 * igb_probe determines if the driver should be loaded on
342 * adapter based on PCI vendor/device id of the adapter.
344 * return BUS_PROBE_DEFAULT on success, positive on failure
345 *********************************************************************/
348 igb_probe(device_t dev)
350 char adapter_name[60];
351 uint16_t pci_vendor_id = 0;
352 uint16_t pci_device_id = 0;
353 uint16_t pci_subvendor_id = 0;
354 uint16_t pci_subdevice_id = 0;
355 igb_vendor_info_t *ent;
357 INIT_DEBUGOUT("igb_probe: begin");
359 pci_vendor_id = pci_get_vendor(dev);
360 if (pci_vendor_id != IGB_VENDOR_ID)
363 pci_device_id = pci_get_device(dev);
364 pci_subvendor_id = pci_get_subvendor(dev);
365 pci_subdevice_id = pci_get_subdevice(dev);
367 ent = igb_vendor_info_array;
368 while (ent->vendor_id != 0) {
369 if ((pci_vendor_id == ent->vendor_id) &&
370 (pci_device_id == ent->device_id) &&
372 ((pci_subvendor_id == ent->subvendor_id) ||
373 (ent->subvendor_id == PCI_ANY_ID)) &&
375 ((pci_subdevice_id == ent->subdevice_id) ||
376 (ent->subdevice_id == PCI_ANY_ID))) {
377 sprintf(adapter_name, "%s %s",
378 igb_strings[ent->index],
380 device_set_desc_copy(dev, adapter_name);
381 return (BUS_PROBE_DEFAULT);
389 /*********************************************************************
390 * Device initialization routine
392 * The attach entry point is called when the driver is being loaded.
393 * This routine identifies the type of hardware, allocates all resources
394 * and initializes the hardware.
396 * return 0 on success, positive on failure
397 *********************************************************************/
400 igb_attach(device_t dev)
402 struct adapter *adapter;
406 INIT_DEBUGOUT("igb_attach: begin");
408 adapter = device_get_softc(dev);
409 adapter->dev = adapter->osdep.dev = dev;
410 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
413 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416 igb_sysctl_debug_info, "I", "Debug Information");
418 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
419 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420 OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
421 igb_sysctl_stats, "I", "Statistics");
423 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
424 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
425 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
426 &igb_fc_setting, 0, "Flow Control");
428 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430 OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
431 &igb_enable_aim, 1, "Interrupt Moderation");
433 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
435 /* Determine hardware and mac info */
436 igb_identify_hardware(adapter);
438 /* Setup PCI resources */
439 if (igb_allocate_pci_resources(adapter)) {
440 device_printf(dev, "Allocation of PCI resources failed\n");
445 /* Do Shared Code initialization */
446 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447 device_printf(dev, "Setup of Shared code failed\n");
452 e1000_get_bus_info(&adapter->hw);
454 /* Sysctls for limiting the amount of work done in the taskqueue */
455 igb_add_rx_process_limit(adapter, "rx_processing_limit",
456 "max number of rx packets to process", &adapter->rx_process_limit,
457 igb_rx_process_limit);
460 * Validate number of transmit and receive descriptors. It
461 * must not exceed hardware maximum, and must be multiple
462 * of E1000_DBA_ALIGN.
464 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467 IGB_DEFAULT_TXD, igb_txd);
468 adapter->num_tx_desc = IGB_DEFAULT_TXD;
470 adapter->num_tx_desc = igb_txd;
471 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474 IGB_DEFAULT_RXD, igb_rxd);
475 adapter->num_rx_desc = IGB_DEFAULT_RXD;
477 adapter->num_rx_desc = igb_rxd;
479 adapter->hw.mac.autoneg = DO_AUTO_NEG;
480 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
484 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485 adapter->hw.phy.mdix = AUTO_ALL_MODES;
486 adapter->hw.phy.disable_polarity_correction = FALSE;
487 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
491 * Set the frame limits assuming
492 * standard ethernet sized frames.
494 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495 adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
498 ** Allocate and Setup Queues
500 if (igb_allocate_queues(adapter)) {
506 ** Start from a known state, this is
507 ** important in reading the nvm and
510 e1000_reset_hw(&adapter->hw);
512 /* Make sure we have a good EEPROM before we read from it */
513 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
515 ** Some PCI-E parts fail the first check due to
516 ** the link being in sleep state, call it again,
517 ** if it fails a second time its a real issue.
519 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
521 "The EEPROM Checksum Is Not Valid\n");
528 ** Copy the permanent MAC address out of the EEPROM
530 if (e1000_read_mac_addr(&adapter->hw) < 0) {
531 device_printf(dev, "EEPROM read error while reading MAC"
536 /* Check its sanity */
537 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538 device_printf(dev, "Invalid MAC address\n");
544 ** Configure Interrupts
546 if ((adapter->msix > 1) && (igb_enable_msix))
547 error = igb_allocate_msix(adapter);
548 else /* MSI or Legacy */
549 error = igb_allocate_legacy(adapter);
553 /* Setup OS specific network interface */
554 igb_setup_interface(dev, adapter);
556 /* Now get a good starting state */
559 /* Initialize statistics */
560 igb_update_stats_counters(adapter);
562 adapter->hw.mac.get_link_status = 1;
563 igb_update_link_status(adapter);
565 /* Indicate SOL/IDER usage */
566 if (e1000_check_reset_block(&adapter->hw))
568 "PHY reset is blocked due to SOL/IDER session.\n");
570 /* Determine if we have to control management hardware */
571 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
576 /* APME bit in EEPROM is mapped to WUC.APME */
577 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
579 adapter->wol = E1000_WUFC_MAG;
581 /* Register for VLAN events */
582 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
583 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
584 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
585 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
587 /* Tell the stack that the interface is not active */
588 adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
590 adapter->led_dev = led_create(igb_led_func, adapter,
591 device_get_nameunit(dev));
593 INIT_DEBUGOUT("igb_attach: end");
598 igb_free_transmit_structures(adapter);
599 igb_free_receive_structures(adapter);
600 igb_release_hw_control(adapter);
602 igb_free_pci_resources(adapter);
603 IGB_CORE_LOCK_DESTROY(adapter);
608 /*********************************************************************
609 * Device removal routine
611 * The detach entry point is called when the driver is being removed.
612 * This routine stops the adapter and deallocates all the resources
613 * that were allocated for driver operation.
615 * return 0 on success, positive on failure
616 *********************************************************************/
619 igb_detach(device_t dev)
621 struct adapter *adapter = device_get_softc(dev);
622 struct ifnet *ifp = adapter->ifp;
624 INIT_DEBUGOUT("igb_detach: begin");
626 /* Make sure VLANS are not using driver */
627 if (adapter->ifp->if_vlantrunk != NULL) {
628 device_printf(dev,"Vlan in use, detach first\n");
632 if (adapter->led_dev != NULL)
633 led_destroy(adapter->led_dev);
635 #ifdef DEVICE_POLLING
636 if (ifp->if_capenable & IFCAP_POLLING)
637 ether_poll_deregister(ifp);
640 IGB_CORE_LOCK(adapter);
641 adapter->in_detach = 1;
643 IGB_CORE_UNLOCK(adapter);
645 e1000_phy_hw_reset(&adapter->hw);
647 /* Give control back to firmware */
648 igb_release_manageability(adapter);
649 igb_release_hw_control(adapter);
652 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
653 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
654 igb_enable_wakeup(dev);
657 /* Unregister VLAN events */
658 if (adapter->vlan_attach != NULL)
659 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
660 if (adapter->vlan_detach != NULL)
661 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
663 ether_ifdetach(adapter->ifp);
665 callout_drain(&adapter->timer);
667 igb_free_pci_resources(adapter);
668 bus_generic_detach(dev);
671 igb_free_transmit_structures(adapter);
672 igb_free_receive_structures(adapter);
674 IGB_CORE_LOCK_DESTROY(adapter);
679 /*********************************************************************
681 * Shutdown entry point
683 **********************************************************************/
686 igb_shutdown(device_t dev)
688 return igb_suspend(dev);
692 * Suspend/resume device methods.
695 igb_suspend(device_t dev)
697 struct adapter *adapter = device_get_softc(dev);
699 IGB_CORE_LOCK(adapter);
703 igb_release_manageability(adapter);
704 igb_release_hw_control(adapter);
707 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
708 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
709 igb_enable_wakeup(dev);
712 IGB_CORE_UNLOCK(adapter);
714 return bus_generic_suspend(dev);
718 igb_resume(device_t dev)
720 struct adapter *adapter = device_get_softc(dev);
721 struct ifnet *ifp = adapter->ifp;
723 IGB_CORE_LOCK(adapter);
724 igb_init_locked(adapter);
725 igb_init_manageability(adapter);
727 if ((ifp->if_flags & IFF_UP) &&
728 (ifp->if_drv_flags & IFF_DRV_RUNNING))
731 IGB_CORE_UNLOCK(adapter);
733 return bus_generic_resume(dev);
737 /*********************************************************************
738 * Transmit entry point
740 * igb_start is called by the stack to initiate a transmit.
741 * The driver will remain in this routine as long as there are
742 * packets to transmit and transmit resources are available.
743 * In case resources are not available stack is notified and
744 * the packet is requeued.
745 **********************************************************************/
748 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
750 struct adapter *adapter = ifp->if_softc;
753 IGB_TX_LOCK_ASSERT(txr);
755 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
758 if (!adapter->link_active)
761 /* Call cleanup if number of TX descriptors low */
762 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
765 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
766 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
767 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
770 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
774 * Encapsulation can modify our pointer, and or make it
775 * NULL on failure. In that event, we can't requeue.
777 if (igb_xmit(txr, &m_head)) {
780 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
781 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
785 /* Send a copy of the frame to the BPF listener */
786 ETHER_BPF_MTAP(ifp, m_head);
788 /* Set watchdog on */
789 txr->watchdog_time = ticks;
790 txr->watchdog_check = TRUE;
795 * Legacy TX driver routine, called from the
796 * stack, always uses tx[0], and spins for it.
797 * Should not be used with multiqueue tx
800 igb_start(struct ifnet *ifp)
802 struct adapter *adapter = ifp->if_softc;
803 struct tx_ring *txr = adapter->tx_rings;
805 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
807 igb_start_locked(txr, ifp);
813 #if __FreeBSD_version >= 800000
815 ** Multiqueue Transmit driver
819 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
821 struct adapter *adapter = ifp->if_softc;
825 /* Which queue to use */
826 if ((m->m_flags & M_FLOWID) != 0)
827 i = m->m_pkthdr.flowid % adapter->num_queues;
829 txr = &adapter->tx_rings[i];
831 if (IGB_TX_TRYLOCK(txr)) {
832 err = igb_mq_start_locked(ifp, txr, m);
835 err = drbr_enqueue(ifp, txr->br, m);
841 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
843 struct adapter *adapter = txr->adapter;
847 IGB_TX_LOCK_ASSERT(txr);
849 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
850 IFF_DRV_RUNNING || adapter->link_active == 0) {
852 err = drbr_enqueue(ifp, txr->br, m);
856 /* Call cleanup if number of TX descriptors low */
857 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
862 next = drbr_dequeue(ifp, txr->br);
863 } else if (drbr_needs_enqueue(ifp, txr->br)) {
864 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
866 next = drbr_dequeue(ifp, txr->br);
870 /* Process the queue */
871 while (next != NULL) {
872 if ((err = igb_xmit(txr, &next)) != 0) {
874 err = drbr_enqueue(ifp, txr->br, next);
878 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
879 ETHER_BPF_MTAP(ifp, next);
880 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
882 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
883 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
886 next = drbr_dequeue(ifp, txr->br);
889 /* Set the watchdog */
890 txr->watchdog_check = TRUE;
891 txr->watchdog_time = ticks;
897 ** Flush all ring buffers
900 igb_qflush(struct ifnet *ifp)
902 struct adapter *adapter = ifp->if_softc;
903 struct tx_ring *txr = adapter->tx_rings;
906 for (int i = 0; i < adapter->num_queues; i++, txr++) {
908 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
914 #endif /* __FreeBSD_version >= 800000 */
916 /*********************************************************************
919 * igb_ioctl is called when the user wants to configure the
922 * return 0 on success, positive on failure
923 **********************************************************************/
926 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
928 struct adapter *adapter = ifp->if_softc;
929 struct ifreq *ifr = (struct ifreq *)data;
931 struct ifaddr *ifa = (struct ifaddr *)data;
935 if (adapter->in_detach)
941 if (ifa->ifa_addr->sa_family == AF_INET) {
944 * Since resetting hardware takes a very long time
945 * and results in link renegotiation we only
946 * initialize the hardware only when it is absolutely
949 ifp->if_flags |= IFF_UP;
950 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
951 IGB_CORE_LOCK(adapter);
952 igb_init_locked(adapter);
953 IGB_CORE_UNLOCK(adapter);
955 if (!(ifp->if_flags & IFF_NOARP))
956 arp_ifinit(ifp, ifa);
959 error = ether_ioctl(ifp, command, data);
965 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
967 IGB_CORE_LOCK(adapter);
968 max_frame_size = 9234;
969 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
971 IGB_CORE_UNLOCK(adapter);
976 ifp->if_mtu = ifr->ifr_mtu;
977 adapter->max_frame_size =
978 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
979 igb_init_locked(adapter);
980 IGB_CORE_UNLOCK(adapter);
984 IOCTL_DEBUGOUT("ioctl rcv'd:\
985 SIOCSIFFLAGS (Set Interface Flags)");
986 IGB_CORE_LOCK(adapter);
987 if (ifp->if_flags & IFF_UP) {
988 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
989 if ((ifp->if_flags ^ adapter->if_flags) &
990 (IFF_PROMISC | IFF_ALLMULTI)) {
991 igb_disable_promisc(adapter);
992 igb_set_promisc(adapter);
995 igb_init_locked(adapter);
997 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
999 adapter->if_flags = ifp->if_flags;
1000 IGB_CORE_UNLOCK(adapter);
1004 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1005 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1006 IGB_CORE_LOCK(adapter);
1007 igb_disable_intr(adapter);
1008 igb_set_multi(adapter);
1009 #ifdef DEVICE_POLLING
1010 if (!(ifp->if_capenable & IFCAP_POLLING))
1012 igb_enable_intr(adapter);
1013 IGB_CORE_UNLOCK(adapter);
1017 /* Check SOL/IDER usage */
1018 IGB_CORE_LOCK(adapter);
1019 if (e1000_check_reset_block(&adapter->hw)) {
1020 IGB_CORE_UNLOCK(adapter);
1021 device_printf(adapter->dev, "Media change is"
1022 " blocked due to SOL/IDER session.\n");
1025 IGB_CORE_UNLOCK(adapter);
1027 IOCTL_DEBUGOUT("ioctl rcv'd: \
1028 SIOCxIFMEDIA (Get/Set Interface Media)");
1029 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1035 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1037 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1038 #ifdef DEVICE_POLLING
1039 if (mask & IFCAP_POLLING) {
1040 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1041 error = ether_poll_register(igb_poll, ifp);
1044 IGB_CORE_LOCK(adapter);
1045 igb_disable_intr(adapter);
1046 ifp->if_capenable |= IFCAP_POLLING;
1047 IGB_CORE_UNLOCK(adapter);
1049 error = ether_poll_deregister(ifp);
1050 /* Enable interrupt even in error case */
1051 IGB_CORE_LOCK(adapter);
1052 igb_enable_intr(adapter);
1053 ifp->if_capenable &= ~IFCAP_POLLING;
1054 IGB_CORE_UNLOCK(adapter);
1058 if (mask & IFCAP_HWCSUM) {
1059 ifp->if_capenable ^= IFCAP_HWCSUM;
1062 if (mask & IFCAP_TSO4) {
1063 ifp->if_capenable ^= IFCAP_TSO4;
1066 if (mask & IFCAP_VLAN_HWTAGGING) {
1067 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1070 if (mask & IFCAP_VLAN_HWFILTER) {
1071 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1074 if (mask & IFCAP_LRO) {
1075 ifp->if_capenable ^= IFCAP_LRO;
1078 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1080 VLAN_CAPABILITIES(ifp);
1085 error = ether_ioctl(ifp, command, data);
1093 /*********************************************************************
1096 * This routine is used in two ways. It is used by the stack as
1097 * init entry point in network interface structure. It is also used
1098 * by the driver as a hw/sw initialization routine to get to a
1101 * return 0 on success, positive on failure
1102 **********************************************************************/
1105 igb_init_locked(struct adapter *adapter)
1107 struct ifnet *ifp = adapter->ifp;
1108 device_t dev = adapter->dev;
1110 INIT_DEBUGOUT("igb_init: begin");
1112 IGB_CORE_LOCK_ASSERT(adapter);
1114 igb_disable_intr(adapter);
1115 callout_stop(&adapter->timer);
1117 /* Get the latest mac address, User can use a LAA */
1118 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1121 /* Put the address into the Receive Address Array */
1122 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1125 igb_update_link_status(adapter);
1127 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1129 /* Use real VLAN Filter support? */
1130 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1131 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1132 /* Use real VLAN Filter support */
1133 igb_setup_vlan_hw_support(adapter);
1136 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1137 ctrl |= E1000_CTRL_VME;
1138 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1142 /* Set hardware offload abilities */
1143 ifp->if_hwassist = 0;
1144 if (ifp->if_capenable & IFCAP_TXCSUM) {
1145 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1146 #if __FreeBSD_version >= 800000
1147 if (adapter->hw.mac.type == e1000_82576)
1148 ifp->if_hwassist |= CSUM_SCTP;
1152 if (ifp->if_capenable & IFCAP_TSO4)
1153 ifp->if_hwassist |= CSUM_TSO;
1155 /* Configure for OS presence */
1156 igb_init_manageability(adapter);
1158 /* Prepare transmit descriptors and buffers */
1159 igb_setup_transmit_structures(adapter);
1160 igb_initialize_transmit_units(adapter);
1162 /* Setup Multicast table */
1163 igb_set_multi(adapter);
1166 ** Figure out the desired mbuf pool
1167 ** for doing jumbo/packetsplit
1169 if (ifp->if_mtu > ETHERMTU)
1170 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1172 adapter->rx_mbuf_sz = MCLBYTES;
1174 /* Prepare receive descriptors and buffers */
1175 if (igb_setup_receive_structures(adapter)) {
1176 device_printf(dev, "Could not setup receive structures\n");
1179 igb_initialize_receive_units(adapter);
1181 /* Don't lose promiscuous settings */
1182 igb_set_promisc(adapter);
1184 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1185 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1187 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1188 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1190 if (adapter->msix > 1) /* Set up queue routing */
1191 igb_configure_queues(adapter);
1193 /* Set up VLAN tag offload and filter */
1194 igb_setup_vlan_hw_support(adapter);
1196 /* this clears any pending interrupts */
1197 E1000_READ_REG(&adapter->hw, E1000_ICR);
1198 #ifdef DEVICE_POLLING
1200 * Only enable interrupts if we are not polling, make sure
1201 * they are off otherwise.
1203 if (ifp->if_capenable & IFCAP_POLLING)
1204 igb_disable_intr(adapter);
1206 #endif /* DEVICE_POLLING */
1208 igb_enable_intr(adapter);
1209 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1212 /* Don't reset the phy next time init gets called */
1213 adapter->hw.phy.reset_disable = TRUE;
1219 struct adapter *adapter = arg;
1221 IGB_CORE_LOCK(adapter);
1222 igb_init_locked(adapter);
1223 IGB_CORE_UNLOCK(adapter);
1228 igb_handle_rxtx(void *context, int pending)
1230 struct igb_queue *que = context;
1231 struct adapter *adapter = que->adapter;
1232 struct tx_ring *txr = adapter->tx_rings;
1237 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1238 if (igb_rxeof(que, adapter->rx_process_limit))
1239 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1243 #if __FreeBSD_version >= 800000
1244 if (!drbr_empty(ifp, txr->br))
1245 igb_mq_start_locked(ifp, txr, NULL);
1247 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1248 igb_start_locked(txr, ifp);
1253 igb_enable_intr(adapter);
1257 igb_handle_que(void *context, int pending)
1259 struct igb_queue *que = context;
1260 struct adapter *adapter = que->adapter;
1261 struct tx_ring *txr = que->txr;
1262 struct ifnet *ifp = adapter->ifp;
1265 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1266 more = igb_rxeof(que, -1);
1270 #if __FreeBSD_version >= 800000
1271 igb_mq_start_locked(ifp, txr, NULL);
1273 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1274 igb_start_locked(txr, ifp);
1278 taskqueue_enqueue(que->tq, &que->que_task);
1283 /* Reenable this interrupt */
1284 #ifdef DEVICE_POLLING
1285 if (!(ifp->if_capenable & IFCAP_POLLING))
1287 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1290 /* Deal with link in a sleepable context */
1292 igb_handle_link(void *context, int pending)
1294 struct adapter *adapter = context;
1296 adapter->hw.mac.get_link_status = 1;
1297 igb_update_link_status(adapter);
1300 /*********************************************************************
1302 * MSI/Legacy Deferred
1303 * Interrupt Service routine
1305 *********************************************************************/
1307 igb_irq_fast(void *arg)
1309 struct adapter *adapter = arg;
1313 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1316 if (reg_icr == 0xffffffff)
1317 return FILTER_STRAY;
1319 /* Definitely not our interrupt. */
1321 return FILTER_STRAY;
1323 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1324 return FILTER_STRAY;
1327 * Mask interrupts until the taskqueue is finished running. This is
1328 * cheap, just assume that it is needed. This also works around the
1329 * MSI message reordering errata on certain systems.
1331 igb_disable_intr(adapter);
1332 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1334 /* Link status change */
1335 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1336 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1338 if (reg_icr & E1000_ICR_RXO)
1339 adapter->rx_overruns++;
1340 return FILTER_HANDLED;
1343 #ifdef DEVICE_POLLING
1344 /*********************************************************************
1346 * Legacy polling routine : if using this code you MUST be sure that
1347 * multiqueue is not defined, ie, set igb_num_queues to 1.
1349 *********************************************************************/
1350 #if __FreeBSD_version >= 800000
1351 #define POLL_RETURN_COUNT(a) (a)
1354 #define POLL_RETURN_COUNT(a)
1357 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1359 struct adapter *adapter = ifp->if_softc;
1360 struct igb_queue *que = adapter->queues;
1361 struct tx_ring *txr = adapter->tx_rings;
1362 u32 reg_icr, rx_done = 0;
1363 u32 loop = IGB_MAX_LOOP;
1366 IGB_CORE_LOCK(adapter);
1367 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1368 IGB_CORE_UNLOCK(adapter);
1369 return POLL_RETURN_COUNT(rx_done);
1372 if (cmd == POLL_AND_CHECK_STATUS) {
1373 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1374 /* Link status change */
1375 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1376 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1378 if (reg_icr & E1000_ICR_RXO)
1379 adapter->rx_overruns++;
1381 IGB_CORE_UNLOCK(adapter);
1383 /* TODO: rx_count */
1384 rx_done = igb_rxeof(que, count) ? 1 : 0;
1388 more = igb_txeof(txr);
1389 } while (loop-- && more);
1390 #if __FreeBSD_version >= 800000
1391 if (!drbr_empty(ifp, txr->br))
1392 igb_mq_start_locked(ifp, txr, NULL);
1394 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1395 igb_start_locked(txr, ifp);
1398 return POLL_RETURN_COUNT(rx_done);
1400 #endif /* DEVICE_POLLING */
1402 /*********************************************************************
1404 * MSIX TX Interrupt Service routine
1406 **********************************************************************/
1408 igb_msix_que(void *arg)
1410 struct igb_queue *que = arg;
1411 struct adapter *adapter = que->adapter;
1412 struct tx_ring *txr = que->txr;
1413 struct rx_ring *rxr = que->rxr;
1415 bool more_tx, more_rx;
1417 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1421 more_tx = igb_txeof(txr);
1424 more_rx = igb_rxeof(que, adapter->rx_process_limit);
1426 if (igb_enable_aim == FALSE)
1429 ** Do Adaptive Interrupt Moderation:
1430 ** - Write out last calculated setting
1431 ** - Calculate based on average size over
1432 ** the last interval.
1434 if (que->eitr_setting)
1435 E1000_WRITE_REG(&adapter->hw,
1436 E1000_EITR(que->msix), que->eitr_setting);
1438 que->eitr_setting = 0;
1440 /* Idle, do nothing */
1441 if ((txr->bytes == 0) && (rxr->bytes == 0))
1444 /* Used half Default if sub-gig */
1445 if (adapter->link_speed != 1000)
1446 newitr = IGB_DEFAULT_ITR / 2;
1448 if ((txr->bytes) && (txr->packets))
1449 newitr = txr->bytes/txr->packets;
1450 if ((rxr->bytes) && (rxr->packets))
1451 newitr = max(newitr,
1452 (rxr->bytes / rxr->packets));
1453 newitr += 24; /* account for hardware frame, crc */
1454 /* set an upper boundary */
1455 newitr = min(newitr, 3000);
1456 /* Be nice to the mid range */
1457 if ((newitr > 300) && (newitr < 1200))
1458 newitr = (newitr / 3);
1460 newitr = (newitr / 2);
1462 newitr &= 0x7FFC; /* Mask invalid bits */
1463 if (adapter->hw.mac.type == e1000_82575)
1464 newitr |= newitr << 16;
1466 newitr |= E1000_EITR_CNT_IGNR;
1468 /* save for next interrupt */
1469 que->eitr_setting = newitr;
1478 /* Schedule a clean task if needed*/
1479 if (more_tx || more_rx)
1480 taskqueue_enqueue(que->tq, &que->que_task);
1482 /* Reenable this interrupt */
1483 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1488 /*********************************************************************
1490 * MSIX Link Interrupt Service routine
1492 **********************************************************************/
1495 igb_msix_link(void *arg)
1497 struct adapter *adapter = arg;
1500 ++adapter->link_irq;
1501 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1502 if (!(icr & E1000_ICR_LSC))
1504 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1508 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1509 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1514 /*********************************************************************
1516 * Media Ioctl callback
1518 * This routine is called whenever the user queries the status of
1519 * the interface using ifconfig.
1521 **********************************************************************/
1523 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1525 struct adapter *adapter = ifp->if_softc;
1526 u_char fiber_type = IFM_1000_SX;
1528 INIT_DEBUGOUT("igb_media_status: begin");
1530 IGB_CORE_LOCK(adapter);
1531 igb_update_link_status(adapter);
1533 ifmr->ifm_status = IFM_AVALID;
1534 ifmr->ifm_active = IFM_ETHER;
1536 if (!adapter->link_active) {
1537 IGB_CORE_UNLOCK(adapter);
1541 ifmr->ifm_status |= IFM_ACTIVE;
1543 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1544 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1545 ifmr->ifm_active |= fiber_type | IFM_FDX;
1547 switch (adapter->link_speed) {
1549 ifmr->ifm_active |= IFM_10_T;
1552 ifmr->ifm_active |= IFM_100_TX;
1555 ifmr->ifm_active |= IFM_1000_T;
1558 if (adapter->link_duplex == FULL_DUPLEX)
1559 ifmr->ifm_active |= IFM_FDX;
1561 ifmr->ifm_active |= IFM_HDX;
1563 IGB_CORE_UNLOCK(adapter);
1566 /*********************************************************************
1568 * Media Ioctl callback
1570 * This routine is called when the user changes speed/duplex using
1571 * media/mediopt option with ifconfig.
1573 **********************************************************************/
1575 igb_media_change(struct ifnet *ifp)
1577 struct adapter *adapter = ifp->if_softc;
1578 struct ifmedia *ifm = &adapter->media;
1580 INIT_DEBUGOUT("igb_media_change: begin");
1582 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1585 IGB_CORE_LOCK(adapter);
1586 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1588 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1589 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1594 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1595 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1598 adapter->hw.mac.autoneg = FALSE;
1599 adapter->hw.phy.autoneg_advertised = 0;
1600 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1601 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1603 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1606 adapter->hw.mac.autoneg = FALSE;
1607 adapter->hw.phy.autoneg_advertised = 0;
1608 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1609 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1611 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1614 device_printf(adapter->dev, "Unsupported media type\n");
1617 /* As the speed/duplex settings my have changed we need to
1620 adapter->hw.phy.reset_disable = FALSE;
1622 igb_init_locked(adapter);
1623 IGB_CORE_UNLOCK(adapter);
1629 /*********************************************************************
1631 * This routine maps the mbufs to Advanced TX descriptors.
1632 * used by the 82575 adapter.
1634 **********************************************************************/
1637 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1639 struct adapter *adapter = txr->adapter;
1640 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1642 struct igb_tx_buffer *tx_buffer, *tx_buffer_mapped;
1643 union e1000_adv_tx_desc *txd = NULL;
1644 struct mbuf *m_head;
1645 u32 olinfo_status = 0, cmd_type_len = 0;
1646 int nsegs, i, j, error, first, last = 0;
1652 /* Set basic descriptor constants */
1653 cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1654 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1655 if (m_head->m_flags & M_VLANTAG)
1656 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1659 * Force a cleanup if number of TX descriptors
1660 * available hits the threshold
1662 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1664 /* Now do we at least have a minimal? */
1665 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1666 txr->no_desc_avail++;
1672 * Map the packet for DMA.
1674 * Capture the first descriptor index,
1675 * this descriptor will have the index
1676 * of the EOP which is the only one that
1677 * now gets a DONE bit writeback.
1679 first = txr->next_avail_desc;
1680 tx_buffer = &txr->tx_buffers[first];
1681 tx_buffer_mapped = tx_buffer;
1682 map = tx_buffer->map;
1684 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1685 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1687 if (error == EFBIG) {
1690 m = m_defrag(*m_headp, M_DONTWAIT);
1692 adapter->mbuf_defrag_failed++;
1700 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1701 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1703 if (error == ENOMEM) {
1704 adapter->no_tx_dma_setup++;
1706 } else if (error != 0) {
1707 adapter->no_tx_dma_setup++;
1712 } else if (error == ENOMEM) {
1713 adapter->no_tx_dma_setup++;
1715 } else if (error != 0) {
1716 adapter->no_tx_dma_setup++;
1722 /* Check again to be sure we have enough descriptors */
1723 if (nsegs > (txr->tx_avail - 2)) {
1724 txr->no_desc_avail++;
1725 bus_dmamap_unload(txr->txtag, map);
1731 * Set up the context descriptor:
1732 * used when any hardware offload is done.
1733 * This includes CSUM, VLAN, and TSO. It
1734 * will use the first descriptor.
1736 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1737 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1738 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1739 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1740 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1743 } else if (igb_tx_ctx_setup(txr, m_head))
1744 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1746 /* Calculate payload length */
1747 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1748 << E1000_ADVTXD_PAYLEN_SHIFT);
1750 /* 82575 needs the queue index added */
1751 if (adapter->hw.mac.type == e1000_82575)
1752 olinfo_status |= txr->me << 4;
1754 /* Set up our transmit descriptors */
1755 i = txr->next_avail_desc;
1756 for (j = 0; j < nsegs; j++) {
1758 bus_addr_t seg_addr;
1760 tx_buffer = &txr->tx_buffers[i];
1761 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1762 seg_addr = segs[j].ds_addr;
1763 seg_len = segs[j].ds_len;
1765 txd->read.buffer_addr = htole64(seg_addr);
1766 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1767 txd->read.olinfo_status = htole32(olinfo_status);
1769 if (++i == adapter->num_tx_desc)
1771 tx_buffer->m_head = NULL;
1772 tx_buffer->next_eop = -1;
1775 txr->next_avail_desc = i;
1776 txr->tx_avail -= nsegs;
1778 tx_buffer->m_head = m_head;
1779 tx_buffer_mapped->map = tx_buffer->map;
1780 tx_buffer->map = map;
1781 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1784 * Last Descriptor of Packet
1785 * needs End Of Packet (EOP)
1786 * and Report Status (RS)
1788 txd->read.cmd_type_len |=
1789 htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1791 * Keep track in the first buffer which
1792 * descriptor will be written back
1794 tx_buffer = &txr->tx_buffers[first];
1795 tx_buffer->next_eop = last;
1796 txr->watchdog_time = ticks;
1799 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1800 * that this frame is available to transmit.
1802 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1803 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1804 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1812 igb_set_promisc(struct adapter *adapter)
1814 struct ifnet *ifp = adapter->ifp;
1817 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1819 if (ifp->if_flags & IFF_PROMISC) {
1820 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1821 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1822 } else if (ifp->if_flags & IFF_ALLMULTI) {
1823 reg_rctl |= E1000_RCTL_MPE;
1824 reg_rctl &= ~E1000_RCTL_UPE;
1825 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1830 igb_disable_promisc(struct adapter *adapter)
1834 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1836 reg_rctl &= (~E1000_RCTL_UPE);
1837 reg_rctl &= (~E1000_RCTL_MPE);
1838 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1842 /*********************************************************************
1845 * This routine is called whenever multicast address list is updated.
1847 **********************************************************************/
1850 igb_set_multi(struct adapter *adapter)
1852 struct ifnet *ifp = adapter->ifp;
1853 struct ifmultiaddr *ifma;
1855 u8 mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1859 IOCTL_DEBUGOUT("igb_set_multi: begin");
1861 #if __FreeBSD_version < 800000
1864 if_maddr_rlock(ifp);
1866 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1867 if (ifma->ifma_addr->sa_family != AF_LINK)
1870 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1873 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1874 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1877 #if __FreeBSD_version < 800000
1878 IF_ADDR_UNLOCK(ifp);
1880 if_maddr_runlock(ifp);
1883 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1884 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1885 reg_rctl |= E1000_RCTL_MPE;
1886 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1888 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1892 /*********************************************************************
1894 * This routine checks for link status,
1895 * updates statistics, and does the watchdog.
1897 **********************************************************************/
1900 igb_local_timer(void *arg)
1902 struct adapter *adapter = arg;
1903 struct ifnet *ifp = adapter->ifp;
1904 device_t dev = adapter->dev;
1905 struct tx_ring *txr = adapter->tx_rings;
1908 IGB_CORE_LOCK_ASSERT(adapter);
1910 igb_update_link_status(adapter);
1911 igb_update_stats_counters(adapter);
1913 if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1914 igb_print_hw_stats(adapter);
1917 ** Watchdog: check for time since any descriptor was cleaned
1919 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1920 if (txr->watchdog_check == FALSE)
1922 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1926 /* Trigger an RX interrupt on all queues */
1927 #ifdef DEVICE_POLLING
1928 if (!(ifp->if_capenable & IFCAP_POLLING))
1930 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1931 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1935 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1936 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1937 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1938 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1939 device_printf(dev,"TX(%d) desc avail = %d,"
1940 "Next TX to Clean = %d\n",
1941 txr->me, txr->tx_avail, txr->next_to_clean);
1942 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1943 adapter->watchdog_events++;
1944 igb_init_locked(adapter);
1948 igb_update_link_status(struct adapter *adapter)
1950 struct e1000_hw *hw = &adapter->hw;
1951 struct ifnet *ifp = adapter->ifp;
1952 device_t dev = adapter->dev;
1953 struct tx_ring *txr = adapter->tx_rings;
1956 /* Get the cached link value or read for real */
1957 switch (hw->phy.media_type) {
1958 case e1000_media_type_copper:
1959 if (hw->mac.get_link_status) {
1960 /* Do the work to read phy */
1961 e1000_check_for_link(hw);
1962 link_check = !hw->mac.get_link_status;
1966 case e1000_media_type_fiber:
1967 e1000_check_for_link(hw);
1968 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1971 case e1000_media_type_internal_serdes:
1972 e1000_check_for_link(hw);
1973 link_check = adapter->hw.mac.serdes_has_link;
1976 case e1000_media_type_unknown:
1980 /* Now we check if a transition has happened */
1981 if (link_check && (adapter->link_active == 0)) {
1982 e1000_get_speed_and_duplex(&adapter->hw,
1983 &adapter->link_speed, &adapter->link_duplex);
1985 device_printf(dev, "Link is up %d Mbps %s\n",
1986 adapter->link_speed,
1987 ((adapter->link_duplex == FULL_DUPLEX) ?
1988 "Full Duplex" : "Half Duplex"));
1989 adapter->link_active = 1;
1990 ifp->if_baudrate = adapter->link_speed * 1000000;
1991 /* This can sleep */
1992 if_link_state_change(ifp, LINK_STATE_UP);
1993 } else if (!link_check && (adapter->link_active == 1)) {
1994 ifp->if_baudrate = adapter->link_speed = 0;
1995 adapter->link_duplex = 0;
1997 device_printf(dev, "Link is Down\n");
1998 adapter->link_active = 0;
1999 /* This can sleep */
2000 if_link_state_change(ifp, LINK_STATE_DOWN);
2001 /* Turn off watchdogs */
2002 for (int i = 0; i < adapter->num_queues; i++, txr++)
2003 txr->watchdog_check = FALSE;
2007 /*********************************************************************
2009 * This routine disables all traffic on the adapter by issuing a
2010 * global reset on the MAC and deallocates TX/RX buffers.
2012 **********************************************************************/
2017 struct adapter *adapter = arg;
2018 struct ifnet *ifp = adapter->ifp;
2019 struct tx_ring *txr = adapter->tx_rings;
2021 IGB_CORE_LOCK_ASSERT(adapter);
2023 INIT_DEBUGOUT("igb_stop: begin");
2025 igb_disable_intr(adapter);
2027 callout_stop(&adapter->timer);
2029 /* Tell the stack that the interface is no longer active */
2030 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2032 /* Unarm watchdog timer. */
2033 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2035 txr->watchdog_check = FALSE;
2039 e1000_reset_hw(&adapter->hw);
2040 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2042 e1000_led_off(&adapter->hw);
2043 e1000_cleanup_led(&adapter->hw);
2047 /*********************************************************************
2049 * Determine hardware revision.
2051 **********************************************************************/
2053 igb_identify_hardware(struct adapter *adapter)
2055 device_t dev = adapter->dev;
2057 /* Make sure our PCI config space has the necessary stuff set */
2058 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2059 if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2060 (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2061 device_printf(dev, "Memory Access and/or Bus Master bits "
2063 adapter->hw.bus.pci_cmd_word |=
2064 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2065 pci_write_config(dev, PCIR_COMMAND,
2066 adapter->hw.bus.pci_cmd_word, 2);
2069 /* Save off the information about this board */
2070 adapter->hw.vendor_id = pci_get_vendor(dev);
2071 adapter->hw.device_id = pci_get_device(dev);
2072 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2073 adapter->hw.subsystem_vendor_id =
2074 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2075 adapter->hw.subsystem_device_id =
2076 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2078 /* Do Shared Code Init and Setup */
2079 if (e1000_set_mac_type(&adapter->hw)) {
2080 device_printf(dev, "Setup init failure\n");
2086 igb_allocate_pci_resources(struct adapter *adapter)
2088 device_t dev = adapter->dev;
2092 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2094 if (adapter->pci_mem == NULL) {
2095 device_printf(dev, "Unable to allocate bus resource: memory\n");
2098 adapter->osdep.mem_bus_space_tag =
2099 rman_get_bustag(adapter->pci_mem);
2100 adapter->osdep.mem_bus_space_handle =
2101 rman_get_bushandle(adapter->pci_mem);
2102 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2104 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2106 /* This will setup either MSI/X or MSI */
2107 adapter->msix = igb_setup_msix(adapter);
2108 adapter->hw.back = &adapter->osdep;
2113 /*********************************************************************
2115 * Setup the Legacy or MSI Interrupt handler
2117 **********************************************************************/
2119 igb_allocate_legacy(struct adapter *adapter)
2121 device_t dev = adapter->dev;
2122 struct igb_queue *que = adapter->queues;
2125 /* Turn off all interrupts */
2126 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2129 if (adapter->msix == 1)
2132 /* We allocate a single interrupt resource */
2133 adapter->res = bus_alloc_resource_any(dev,
2134 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2135 if (adapter->res == NULL) {
2136 device_printf(dev, "Unable to allocate bus resource: "
2142 * Try allocating a fast interrupt and the associated deferred
2143 * processing contexts.
2145 TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, que);
2146 /* Make tasklet for deferred link handling */
2147 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2148 adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2149 taskqueue_thread_enqueue, &adapter->tq);
2150 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2151 device_get_nameunit(adapter->dev));
2152 if ((error = bus_setup_intr(dev, adapter->res,
2153 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2154 adapter, &adapter->tag)) != 0) {
2155 device_printf(dev, "Failed to register fast interrupt "
2156 "handler: %d\n", error);
2157 taskqueue_free(adapter->tq);
2166 /*********************************************************************
2168 * Setup the MSIX Queue Interrupt handlers:
2170 **********************************************************************/
2172 igb_allocate_msix(struct adapter *adapter)
2174 device_t dev = adapter->dev;
2175 struct igb_queue *que = adapter->queues;
2176 int error, rid, vector = 0;
2179 for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2181 que->res = bus_alloc_resource_any(dev,
2182 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2183 if (que->res == NULL) {
2185 "Unable to allocate bus resource: "
2186 "MSIX Queue Interrupt\n");
2189 error = bus_setup_intr(dev, que->res,
2190 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2191 igb_msix_que, que, &que->tag);
2194 device_printf(dev, "Failed to register Queue handler");
2198 if (adapter->hw.mac.type == e1000_82575)
2199 que->eims = E1000_EICR_TX_QUEUE0 << i;
2201 que->eims = 1 << vector;
2203 ** Bind the msix vector, and thus the
2204 ** rings to the corresponding cpu.
2206 if (adapter->num_queues > 1)
2207 bus_bind_intr(dev, que->res, i);
2208 /* Make tasklet for deferred handling */
2209 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2210 que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2211 taskqueue_thread_enqueue, &que->tq);
2212 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2213 device_get_nameunit(adapter->dev));
2218 adapter->res = bus_alloc_resource_any(dev,
2219 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2220 if (adapter->res == NULL) {
2222 "Unable to allocate bus resource: "
2223 "MSIX Link Interrupt\n");
2226 if ((error = bus_setup_intr(dev, adapter->res,
2227 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2228 igb_msix_link, adapter, &adapter->tag)) != 0) {
2229 device_printf(dev, "Failed to register Link handler");
2232 adapter->linkvec = vector;
2234 /* Make tasklet for deferred handling */
2235 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2236 adapter->tq = taskqueue_create_fast("igb_link", M_NOWAIT,
2237 taskqueue_thread_enqueue, &adapter->tq);
2238 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s link",
2239 device_get_nameunit(adapter->dev));
2246 igb_configure_queues(struct adapter *adapter)
2248 struct e1000_hw *hw = &adapter->hw;
2249 struct igb_queue *que;
2251 u32 newitr = IGB_DEFAULT_ITR;
2253 /* First turn on RSS capability */
2254 if (adapter->hw.mac.type > e1000_82575)
2255 E1000_WRITE_REG(hw, E1000_GPIE,
2256 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2257 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2260 switch (adapter->hw.mac.type) {
2263 for (int i = 0; i < adapter->num_queues; i++) {
2265 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2266 que = &adapter->queues[i];
2269 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2272 ivar |= que->msix | E1000_IVAR_VALID;
2274 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2277 for (int i = 0; i < adapter->num_queues; i++) {
2279 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2280 que = &adapter->queues[i];
2283 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2286 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2288 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2289 adapter->eims_mask |= que->eims;
2292 /* And for the link interrupt */
2293 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2294 adapter->link_mask = 1 << adapter->linkvec;
2295 adapter->eims_mask |= adapter->link_mask;
2296 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2300 for (int i = 0; i < adapter->num_queues; i++) {
2301 u32 index = i & 0x7; /* Each IVAR has two entries */
2302 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2303 que = &adapter->queues[i];
2306 ivar |= que->msix | E1000_IVAR_VALID;
2309 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2311 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2312 adapter->eims_mask |= que->eims;
2315 for (int i = 0; i < adapter->num_queues; i++) {
2316 u32 index = i & 0x7; /* Each IVAR has two entries */
2317 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2318 que = &adapter->queues[i];
2321 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2324 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2326 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2327 adapter->eims_mask |= que->eims;
2330 /* And for the link interrupt */
2331 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2332 adapter->link_mask = 1 << adapter->linkvec;
2333 adapter->eims_mask |= adapter->link_mask;
2334 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2338 /* enable MSI-X support*/
2339 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2340 tmp |= E1000_CTRL_EXT_PBA_CLR;
2341 /* Auto-Mask interrupts upon ICR read. */
2342 tmp |= E1000_CTRL_EXT_EIAME;
2343 tmp |= E1000_CTRL_EXT_IRCA;
2344 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2347 for (int i = 0; i < adapter->num_queues; i++) {
2348 que = &adapter->queues[i];
2349 tmp = E1000_EICR_RX_QUEUE0 << i;
2350 tmp |= E1000_EICR_TX_QUEUE0 << i;
2352 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2354 adapter->eims_mask |= que->eims;
2358 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2360 adapter->link_mask |= E1000_EIMS_OTHER;
2361 adapter->eims_mask |= adapter->link_mask;
2366 /* Set the starting interrupt rate */
2367 if (hw->mac.type == e1000_82575)
2368 newitr |= newitr << 16;
2370 newitr |= E1000_EITR_CNT_IGNR;
2372 for (int i = 0; i < adapter->num_queues; i++) {
2373 que = &adapter->queues[i];
2374 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2382 igb_free_pci_resources(struct adapter *adapter)
2384 struct igb_queue *que = adapter->queues;
2385 device_t dev = adapter->dev;
2389 ** There is a slight possibility of a failure mode
2390 ** in attach that will result in entering this function
2391 ** before interrupt resources have been initialized, and
2392 ** in that case we do not want to execute the loops below
2393 ** We can detect this reliably by the state of the adapter
2396 if (adapter->res == NULL)
2400 * First release all the interrupt resources:
2402 for (int i = 0; i < adapter->num_queues; i++, que++) {
2403 rid = que->msix + 1;
2404 if (que->tag != NULL) {
2405 bus_teardown_intr(dev, que->res, que->tag);
2408 if (que->res != NULL)
2409 bus_release_resource(dev,
2410 SYS_RES_IRQ, rid, que->res);
2413 /* Clean the Legacy or Link interrupt last */
2414 if (adapter->linkvec) /* we are doing MSIX */
2415 rid = adapter->linkvec + 1;
2417 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2419 if (adapter->tag != NULL) {
2420 bus_teardown_intr(dev, adapter->res, adapter->tag);
2421 adapter->tag = NULL;
2423 if (adapter->res != NULL)
2424 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2428 pci_release_msi(dev);
2430 if (adapter->msix_mem != NULL)
2431 bus_release_resource(dev, SYS_RES_MEMORY,
2432 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2434 if (adapter->pci_mem != NULL)
2435 bus_release_resource(dev, SYS_RES_MEMORY,
2436 PCIR_BAR(0), adapter->pci_mem);
2441 * Setup Either MSI/X or MSI
2444 igb_setup_msix(struct adapter *adapter)
2446 device_t dev = adapter->dev;
2447 int rid, want, queues, msgs;
2449 /* tuneable override */
2450 if (igb_enable_msix == 0)
2453 /* First try MSI/X */
2454 rid = PCIR_BAR(IGB_MSIX_BAR);
2455 adapter->msix_mem = bus_alloc_resource_any(dev,
2456 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2457 if (!adapter->msix_mem) {
2458 /* May not be enabled */
2459 device_printf(adapter->dev,
2460 "Unable to map MSIX table \n");
2464 msgs = pci_msix_count(dev);
2465 if (msgs == 0) { /* system has msix disabled */
2466 bus_release_resource(dev, SYS_RES_MEMORY,
2467 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2468 adapter->msix_mem = NULL;
2472 /* Figure out a reasonable auto config value */
2473 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2475 /* Manual override */
2476 if (igb_num_queues != 0)
2477 queues = igb_num_queues;
2479 /* Can have max of 4 queues on 82575 */
2480 if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2484 ** One vector (RX/TX pair) per queue
2485 ** plus an additional for Link interrupt
2491 device_printf(adapter->dev,
2492 "MSIX Configuration Problem, "
2493 "%d vectors configured, but %d queues wanted!\n",
2497 if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2498 device_printf(adapter->dev,
2499 "Using MSIX interrupts with %d vectors\n", msgs);
2500 adapter->num_queues = queues;
2504 msgs = pci_msi_count(dev);
2505 if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2506 device_printf(adapter->dev,"Using MSI interrupt\n");
2510 /*********************************************************************
2512 * Set up an fresh starting state
2514 **********************************************************************/
2516 igb_reset(struct adapter *adapter)
2518 device_t dev = adapter->dev;
2519 struct e1000_hw *hw = &adapter->hw;
2520 struct e1000_fc_info *fc = &hw->fc;
2521 struct ifnet *ifp = adapter->ifp;
2525 INIT_DEBUGOUT("igb_reset: begin");
2527 /* Let the firmware know the OS is in control */
2528 igb_get_hw_control(adapter);
2531 * Packet Buffer Allocation (PBA)
2532 * Writing PBA sets the receive portion of the buffer
2533 * the remainder is used for the transmit buffer.
2535 switch (hw->mac.type) {
2537 pba = E1000_PBA_32K;
2540 pba = E1000_PBA_64K;
2543 pba = E1000_PBA_35K;
2548 /* Special needs in case of Jumbo frames */
2549 if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2550 u32 tx_space, min_tx, min_rx;
2551 pba = E1000_READ_REG(hw, E1000_PBA);
2552 tx_space = pba >> 16;
2554 min_tx = (adapter->max_frame_size +
2555 sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2556 min_tx = roundup2(min_tx, 1024);
2558 min_rx = adapter->max_frame_size;
2559 min_rx = roundup2(min_rx, 1024);
2561 if (tx_space < min_tx &&
2562 ((min_tx - tx_space) < pba)) {
2563 pba = pba - (min_tx - tx_space);
2565 * if short on rx space, rx wins
2566 * and must trump tx adjustment
2571 E1000_WRITE_REG(hw, E1000_PBA, pba);
2574 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2577 * These parameters control the automatic generation (Tx) and
2578 * response (Rx) to Ethernet PAUSE frames.
2579 * - High water mark should allow for at least two frames to be
2580 * received after sending an XOFF.
2581 * - Low water mark works best when it is very near the high water mark.
2582 * This allows the receiver to restart by sending XON when it has
2585 hwm = min(((pba << 10) * 9 / 10),
2586 ((pba << 10) - 2 * adapter->max_frame_size));
2588 if (hw->mac.type < e1000_82576) {
2589 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
2590 fc->low_water = fc->high_water - 8;
2592 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
2593 fc->low_water = fc->high_water - 16;
2596 fc->pause_time = IGB_FC_PAUSE_TIME;
2597 fc->send_xon = TRUE;
2599 /* Set Flow control, use the tunable location if sane */
2600 if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2601 fc->requested_mode = igb_fc_setting;
2603 fc->requested_mode = e1000_fc_none;
2605 fc->current_mode = fc->requested_mode;
2607 /* Issue a global reset */
2609 E1000_WRITE_REG(hw, E1000_WUC, 0);
2611 if (e1000_init_hw(hw) < 0)
2612 device_printf(dev, "Hardware Initialization Failed\n");
2614 if (hw->mac.type == e1000_82580) {
2617 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2619 * 0x80000000 - enable DMA COAL
2620 * 0x10000000 - use L0s as low power
2621 * 0x20000000 - use L1 as low power
2622 * X << 16 - exit dma coal when rx data exceeds X kB
2623 * Y - upper limit to stay in dma coal in units of 32usecs
2625 E1000_WRITE_REG(hw, E1000_DMACR,
2626 0xA0000006 | ((hwm << 6) & 0x00FF0000));
2628 /* set hwm to PBA - 2 * max frame size */
2629 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2631 * This sets the time to wait before requesting transition to
2632 * low power state to number of usecs needed to receive 1 512
2633 * byte frame at gigabit line rate
2635 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2637 /* free space in tx packet buffer to wake from DMA coal */
2638 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2639 (20480 - (2 * adapter->max_frame_size)) >> 6);
2641 /* make low power state decision controlled by DMA coal */
2642 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2643 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2644 reg | E1000_PCIEMISC_LX_DECISION);
2647 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2648 e1000_get_phy_info(hw);
2649 e1000_check_for_link(hw);
2653 /*********************************************************************
2655 * Setup networking device structure and register an interface.
2657 **********************************************************************/
2659 igb_setup_interface(device_t dev, struct adapter *adapter)
2663 INIT_DEBUGOUT("igb_setup_interface: begin");
2665 ifp = adapter->ifp = if_alloc(IFT_ETHER);
2667 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2668 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2669 ifp->if_mtu = ETHERMTU;
2670 ifp->if_init = igb_init;
2671 ifp->if_softc = adapter;
2672 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2673 ifp->if_ioctl = igb_ioctl;
2674 ifp->if_start = igb_start;
2675 #if __FreeBSD_version >= 800000
2676 ifp->if_transmit = igb_mq_start;
2677 ifp->if_qflush = igb_qflush;
2679 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2680 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2681 IFQ_SET_READY(&ifp->if_snd);
2683 ether_ifattach(ifp, adapter->hw.mac.addr);
2685 ifp->if_capabilities = ifp->if_capenable = 0;
2687 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2688 ifp->if_capabilities |= IFCAP_TSO4;
2689 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2690 if (igb_header_split)
2691 ifp->if_capabilities |= IFCAP_LRO;
2693 ifp->if_capenable = ifp->if_capabilities;
2694 #ifdef DEVICE_POLLING
2695 ifp->if_capabilities |= IFCAP_POLLING;
2699 * Tell the upper layer(s) we
2700 * support full VLAN capability.
2702 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2703 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2704 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2707 ** Dont turn this on by default, if vlans are
2708 ** created on another pseudo device (eg. lagg)
2709 ** then vlan events are not passed thru, breaking
2710 ** operation, but with HW FILTER off it works. If
2711 ** using vlans directly on the em driver you can
2712 ** enable this and get full hardware tag filtering.
2714 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2717 * Specify the media types supported by this adapter and register
2718 * callbacks to update media and link information
2720 ifmedia_init(&adapter->media, IFM_IMASK,
2721 igb_media_change, igb_media_status);
2722 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2723 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2724 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2726 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2728 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2729 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2731 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2733 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2735 if (adapter->hw.phy.type != e1000_phy_ife) {
2736 ifmedia_add(&adapter->media,
2737 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2738 ifmedia_add(&adapter->media,
2739 IFM_ETHER | IFM_1000_T, 0, NULL);
2742 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2743 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2748 * Manage DMA'able memory.
2751 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2755 *(bus_addr_t *) arg = segs[0].ds_addr;
2759 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2760 struct igb_dma_alloc *dma, int mapflags)
2764 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2765 IGB_DBA_ALIGN, 0, /* alignment, bounds */
2766 BUS_SPACE_MAXADDR, /* lowaddr */
2767 BUS_SPACE_MAXADDR, /* highaddr */
2768 NULL, NULL, /* filter, filterarg */
2771 size, /* maxsegsize */
2773 NULL, /* lockfunc */
2777 device_printf(adapter->dev,
2778 "%s: bus_dma_tag_create failed: %d\n",
2783 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2784 BUS_DMA_NOWAIT, &dma->dma_map);
2786 device_printf(adapter->dev,
2787 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2788 __func__, (uintmax_t)size, error);
2793 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2794 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2795 if (error || dma->dma_paddr == 0) {
2796 device_printf(adapter->dev,
2797 "%s: bus_dmamap_load failed: %d\n",
2805 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2807 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2808 bus_dma_tag_destroy(dma->dma_tag);
2810 dma->dma_map = NULL;
2811 dma->dma_tag = NULL;
2817 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2819 if (dma->dma_tag == NULL)
2821 if (dma->dma_map != NULL) {
2822 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2823 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2824 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2825 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2826 dma->dma_map = NULL;
2828 bus_dma_tag_destroy(dma->dma_tag);
2829 dma->dma_tag = NULL;
2833 /*********************************************************************
2835 * Allocate memory for the transmit and receive rings, and then
2836 * the descriptors associated with each, called only once at attach.
2838 **********************************************************************/
2840 igb_allocate_queues(struct adapter *adapter)
2842 device_t dev = adapter->dev;
2843 struct igb_queue *que = NULL;
2844 struct tx_ring *txr = NULL;
2845 struct rx_ring *rxr = NULL;
2846 int rsize, tsize, error = E1000_SUCCESS;
2847 int txconf = 0, rxconf = 0;
2849 /* First allocate the top level queue structs */
2850 if (!(adapter->queues =
2851 (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2852 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2853 device_printf(dev, "Unable to allocate queue memory\n");
2858 /* Next allocate the TX ring struct memory */
2859 if (!(adapter->tx_rings =
2860 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2861 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2862 device_printf(dev, "Unable to allocate TX ring memory\n");
2867 /* Now allocate the RX */
2868 if (!(adapter->rx_rings =
2869 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2870 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2871 device_printf(dev, "Unable to allocate RX ring memory\n");
2876 tsize = roundup2(adapter->num_tx_desc *
2877 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2879 * Now set up the TX queues, txconf is needed to handle the
2880 * possibility that things fail midcourse and we need to
2881 * undo memory gracefully
2883 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2884 /* Set up some basics */
2885 txr = &adapter->tx_rings[i];
2886 txr->adapter = adapter;
2889 /* Initialize the TX lock */
2890 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2891 device_get_nameunit(dev), txr->me);
2892 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2894 if (igb_dma_malloc(adapter, tsize,
2895 &txr->txdma, BUS_DMA_NOWAIT)) {
2897 "Unable to allocate TX Descriptor memory\n");
2901 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2902 bzero((void *)txr->tx_base, tsize);
2904 /* Now allocate transmit buffers for the ring */
2905 if (igb_allocate_transmit_buffers(txr)) {
2907 "Critical Failure setting up transmit buffers\n");
2911 #if __FreeBSD_version >= 800000
2912 /* Allocate a buf ring */
2913 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2914 M_WAITOK, &txr->tx_mtx);
2919 * Next the RX queues...
2921 rsize = roundup2(adapter->num_rx_desc *
2922 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2923 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2924 rxr = &adapter->rx_rings[i];
2925 rxr->adapter = adapter;
2928 /* Initialize the RX lock */
2929 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2930 device_get_nameunit(dev), txr->me);
2931 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2933 if (igb_dma_malloc(adapter, rsize,
2934 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2936 "Unable to allocate RxDescriptor memory\n");
2940 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2941 bzero((void *)rxr->rx_base, rsize);
2943 /* Allocate receive buffers for the ring*/
2944 if (igb_allocate_receive_buffers(rxr)) {
2946 "Critical Failure setting up receive buffers\n");
2953 ** Finally set up the queue holding structs
2955 for (int i = 0; i < adapter->num_queues; i++) {
2956 que = &adapter->queues[i];
2957 que->adapter = adapter;
2958 que->txr = &adapter->tx_rings[i];
2959 que->rxr = &adapter->rx_rings[i];
2965 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2966 igb_dma_free(adapter, &rxr->rxdma);
2968 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2969 igb_dma_free(adapter, &txr->txdma);
2970 free(adapter->rx_rings, M_DEVBUF);
2972 #if __FreeBSD_version >= 800000
2973 buf_ring_free(txr->br, M_DEVBUF);
2975 free(adapter->tx_rings, M_DEVBUF);
2977 free(adapter->queues, M_DEVBUF);
2982 /*********************************************************************
2984 * Allocate memory for tx_buffer structures. The tx_buffer stores all
2985 * the information needed to transmit a packet on the wire. This is
2986 * called only once at attach, setup is done every reset.
2988 **********************************************************************/
2990 igb_allocate_transmit_buffers(struct tx_ring *txr)
2992 struct adapter *adapter = txr->adapter;
2993 device_t dev = adapter->dev;
2994 struct igb_tx_buffer *txbuf;
2998 * Setup DMA descriptor areas.
3000 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3001 1, 0, /* alignment, bounds */
3002 BUS_SPACE_MAXADDR, /* lowaddr */
3003 BUS_SPACE_MAXADDR, /* highaddr */
3004 NULL, NULL, /* filter, filterarg */
3005 IGB_TSO_SIZE, /* maxsize */
3006 IGB_MAX_SCATTER, /* nsegments */
3007 PAGE_SIZE, /* maxsegsize */
3009 NULL, /* lockfunc */
3010 NULL, /* lockfuncarg */
3012 device_printf(dev,"Unable to allocate TX DMA tag\n");
3016 if (!(txr->tx_buffers =
3017 (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3018 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3019 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3024 /* Create the descriptor buffer dma maps */
3025 txbuf = txr->tx_buffers;
3026 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3027 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3029 device_printf(dev, "Unable to create TX DMA map\n");
3036 /* We free all, it handles case where we are in the middle */
3037 igb_free_transmit_structures(adapter);
3041 /*********************************************************************
3043 * Initialize a transmit ring.
3045 **********************************************************************/
3047 igb_setup_transmit_ring(struct tx_ring *txr)
3049 struct adapter *adapter = txr->adapter;
3050 struct igb_tx_buffer *txbuf;
3053 /* Clear the old descriptor contents */
3055 bzero((void *)txr->tx_base,
3056 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3058 txr->next_avail_desc = 0;
3059 txr->next_to_clean = 0;
3061 /* Free any existing tx buffers. */
3062 txbuf = txr->tx_buffers;
3063 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3064 if (txbuf->m_head != NULL) {
3065 bus_dmamap_sync(txr->txtag, txbuf->map,
3066 BUS_DMASYNC_POSTWRITE);
3067 bus_dmamap_unload(txr->txtag, txbuf->map);
3068 m_freem(txbuf->m_head);
3069 txbuf->m_head = NULL;
3071 /* clear the watch index */
3072 txbuf->next_eop = -1;
3075 /* Set number of descriptors available */
3076 txr->tx_avail = adapter->num_tx_desc;
3078 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3079 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3083 /*********************************************************************
3085 * Initialize all transmit rings.
3087 **********************************************************************/
3089 igb_setup_transmit_structures(struct adapter *adapter)
3091 struct tx_ring *txr = adapter->tx_rings;
3093 for (int i = 0; i < adapter->num_queues; i++, txr++)
3094 igb_setup_transmit_ring(txr);
3099 /*********************************************************************
3101 * Enable transmit unit.
3103 **********************************************************************/
3105 igb_initialize_transmit_units(struct adapter *adapter)
3107 struct tx_ring *txr = adapter->tx_rings;
3108 struct e1000_hw *hw = &adapter->hw;
3111 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3113 /* Setup the Tx Descriptor Rings */
3114 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3115 u64 bus_addr = txr->txdma.dma_paddr;
3117 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3118 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3119 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3120 (uint32_t)(bus_addr >> 32));
3121 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3122 (uint32_t)bus_addr);
3124 /* Setup the HW Tx Head and Tail descriptor pointers */
3125 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3126 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3128 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3129 E1000_READ_REG(hw, E1000_TDBAL(i)),
3130 E1000_READ_REG(hw, E1000_TDLEN(i)));
3132 txr->watchdog_check = FALSE;
3134 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3135 txdctl |= IGB_TX_PTHRESH;
3136 txdctl |= IGB_TX_HTHRESH << 8;
3137 txdctl |= IGB_TX_WTHRESH << 16;
3138 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3139 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3142 /* Program the Transmit Control Register */
3143 tctl = E1000_READ_REG(hw, E1000_TCTL);
3144 tctl &= ~E1000_TCTL_CT;
3145 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3146 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3148 e1000_config_collision_dist(hw);
3150 /* This write will effectively turn on the transmit unit. */
3151 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3154 /*********************************************************************
3156 * Free all transmit rings.
3158 **********************************************************************/
3160 igb_free_transmit_structures(struct adapter *adapter)
3162 struct tx_ring *txr = adapter->tx_rings;
3164 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3166 igb_free_transmit_buffers(txr);
3167 igb_dma_free(adapter, &txr->txdma);
3169 IGB_TX_LOCK_DESTROY(txr);
3171 free(adapter->tx_rings, M_DEVBUF);
3174 /*********************************************************************
3176 * Free transmit ring related data structures.
3178 **********************************************************************/
3180 igb_free_transmit_buffers(struct tx_ring *txr)
3182 struct adapter *adapter = txr->adapter;
3183 struct igb_tx_buffer *tx_buffer;
3186 INIT_DEBUGOUT("free_transmit_ring: begin");
3188 if (txr->tx_buffers == NULL)
3191 tx_buffer = txr->tx_buffers;
3192 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3193 if (tx_buffer->m_head != NULL) {
3194 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3195 BUS_DMASYNC_POSTWRITE);
3196 bus_dmamap_unload(txr->txtag,
3198 m_freem(tx_buffer->m_head);
3199 tx_buffer->m_head = NULL;
3200 if (tx_buffer->map != NULL) {
3201 bus_dmamap_destroy(txr->txtag,
3203 tx_buffer->map = NULL;
3205 } else if (tx_buffer->map != NULL) {
3206 bus_dmamap_unload(txr->txtag,
3208 bus_dmamap_destroy(txr->txtag,
3210 tx_buffer->map = NULL;
3213 #if __FreeBSD_version >= 800000
3214 if (txr->br != NULL)
3215 buf_ring_free(txr->br, M_DEVBUF);
3217 if (txr->tx_buffers != NULL) {
3218 free(txr->tx_buffers, M_DEVBUF);
3219 txr->tx_buffers = NULL;
3221 if (txr->txtag != NULL) {
3222 bus_dma_tag_destroy(txr->txtag);
3228 /**********************************************************************
3230 * Setup work for hardware segmentation offload (TSO)
3232 **********************************************************************/
3234 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3236 struct adapter *adapter = txr->adapter;
3237 struct e1000_adv_tx_context_desc *TXD;
3238 struct igb_tx_buffer *tx_buffer;
3239 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3240 u32 mss_l4len_idx = 0;
3242 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3243 struct ether_vlan_header *eh;
3249 * Determine where frame payload starts.
3250 * Jump over vlan headers if already present
3252 eh = mtod(mp, struct ether_vlan_header *);
3253 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3254 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3256 ehdrlen = ETHER_HDR_LEN;
3258 /* Ensure we have at least the IP+TCP header in the first mbuf. */
3259 if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3262 /* Only supports IPV4 for now */
3263 ctxd = txr->next_avail_desc;
3264 tx_buffer = &txr->tx_buffers[ctxd];
3265 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3267 ip = (struct ip *)(mp->m_data + ehdrlen);
3268 if (ip->ip_p != IPPROTO_TCP)
3269 return FALSE; /* 0 */
3271 ip_hlen = ip->ip_hl << 2;
3272 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3273 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3274 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3275 tcp_hlen = th->th_off << 2;
3277 * Calculate header length, this is used
3278 * in the transmit desc in igb_xmit
3280 *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3282 /* VLAN MACLEN IPLEN */
3283 if (mp->m_flags & M_VLANTAG) {
3284 vtag = htole16(mp->m_pkthdr.ether_vtag);
3285 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3288 vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3289 vlan_macip_lens |= ip_hlen;
3290 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3292 /* ADV DTYPE TUCMD */
3293 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3294 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3295 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3296 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3299 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3300 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3301 /* 82575 needs the queue index added */
3302 if (adapter->hw.mac.type == e1000_82575)
3303 mss_l4len_idx |= txr->me << 4;
3304 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3306 TXD->seqnum_seed = htole32(0);
3307 tx_buffer->m_head = NULL;
3308 tx_buffer->next_eop = -1;
3310 if (++ctxd == adapter->num_tx_desc)
3314 txr->next_avail_desc = ctxd;
3319 /*********************************************************************
3321 * Context Descriptor setup for VLAN or CSUM
3323 **********************************************************************/
3326 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3328 struct adapter *adapter = txr->adapter;
3329 struct e1000_adv_tx_context_desc *TXD;
3330 struct igb_tx_buffer *tx_buffer;
3331 u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3332 struct ether_vlan_header *eh;
3333 struct ip *ip = NULL;
3334 struct ip6_hdr *ip6;
3335 int ehdrlen, ctxd, ip_hlen = 0;
3336 u16 etype, vtag = 0;
3338 bool offload = TRUE;
3340 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3343 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3344 ctxd = txr->next_avail_desc;
3345 tx_buffer = &txr->tx_buffers[ctxd];
3346 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3349 ** In advanced descriptors the vlan tag must
3350 ** be placed into the context descriptor, thus
3351 ** we need to be here just for that setup.
3353 if (mp->m_flags & M_VLANTAG) {
3354 vtag = htole16(mp->m_pkthdr.ether_vtag);
3355 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3356 } else if (offload == FALSE)
3360 * Determine where frame payload starts.
3361 * Jump over vlan headers if already present,
3362 * helpful for QinQ too.
3364 eh = mtod(mp, struct ether_vlan_header *);
3365 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3366 etype = ntohs(eh->evl_proto);
3367 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3369 etype = ntohs(eh->evl_encap_proto);
3370 ehdrlen = ETHER_HDR_LEN;
3373 /* Set the ether header length */
3374 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3378 ip = (struct ip *)(mp->m_data + ehdrlen);
3379 ip_hlen = ip->ip_hl << 2;
3380 if (mp->m_len < ehdrlen + ip_hlen) {
3385 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3387 case ETHERTYPE_IPV6:
3388 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3389 ip_hlen = sizeof(struct ip6_hdr);
3390 if (mp->m_len < ehdrlen + ip_hlen)
3392 ipproto = ip6->ip6_nxt;
3393 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3400 vlan_macip_lens |= ip_hlen;
3401 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3405 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3406 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3409 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3410 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3412 #if __FreeBSD_version >= 800000
3414 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3415 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3423 /* 82575 needs the queue index added */
3424 if (adapter->hw.mac.type == e1000_82575)
3425 mss_l4len_idx = txr->me << 4;
3427 /* Now copy bits into descriptor */
3428 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3429 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3430 TXD->seqnum_seed = htole32(0);
3431 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3433 tx_buffer->m_head = NULL;
3434 tx_buffer->next_eop = -1;
3436 /* We've consumed the first desc, adjust counters */
3437 if (++ctxd == adapter->num_tx_desc)
3439 txr->next_avail_desc = ctxd;
3446 /**********************************************************************
3448 * Examine each tx_buffer in the used queue. If the hardware is done
3449 * processing the packet then free associated resources. The
3450 * tx_buffer is put back on the free queue.
3452 * TRUE return means there's work in the ring to clean, FALSE its empty.
3453 **********************************************************************/
3455 igb_txeof(struct tx_ring *txr)
3457 struct adapter *adapter = txr->adapter;
3458 int first, last, done;
3459 struct igb_tx_buffer *tx_buffer;
3460 struct e1000_tx_desc *tx_desc, *eop_desc;
3461 struct ifnet *ifp = adapter->ifp;
3463 IGB_TX_LOCK_ASSERT(txr);
3465 if (txr->tx_avail == adapter->num_tx_desc)
3468 first = txr->next_to_clean;
3469 tx_desc = &txr->tx_base[first];
3470 tx_buffer = &txr->tx_buffers[first];
3471 last = tx_buffer->next_eop;
3472 eop_desc = &txr->tx_base[last];
3475 * What this does is get the index of the
3476 * first descriptor AFTER the EOP of the
3477 * first packet, that way we can do the
3478 * simple comparison on the inner while loop.
3480 if (++last == adapter->num_tx_desc)
3484 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3485 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3487 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3488 /* We clean the range of the packet */
3489 while (first != done) {
3490 tx_desc->upper.data = 0;
3491 tx_desc->lower.data = 0;
3492 tx_desc->buffer_addr = 0;
3495 if (tx_buffer->m_head) {
3497 tx_buffer->m_head->m_pkthdr.len;
3498 bus_dmamap_sync(txr->txtag,
3500 BUS_DMASYNC_POSTWRITE);
3501 bus_dmamap_unload(txr->txtag,
3504 m_freem(tx_buffer->m_head);
3505 tx_buffer->m_head = NULL;
3507 tx_buffer->next_eop = -1;
3508 txr->watchdog_time = ticks;
3510 if (++first == adapter->num_tx_desc)
3513 tx_buffer = &txr->tx_buffers[first];
3514 tx_desc = &txr->tx_base[first];
3518 /* See if we can continue to the next packet */
3519 last = tx_buffer->next_eop;
3521 eop_desc = &txr->tx_base[last];
3522 /* Get new done point */
3523 if (++last == adapter->num_tx_desc) last = 0;
3528 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3529 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3531 txr->next_to_clean = first;
3534 * If we have enough room, clear IFF_DRV_OACTIVE
3535 * to tell the stack that it is OK to send packets.
3537 if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3538 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3539 /* All clean, turn off the watchdog */
3540 if (txr->tx_avail == adapter->num_tx_desc) {
3541 txr->watchdog_check = FALSE;
3550 /*********************************************************************
3552 * Refresh mbuf buffers for RX descriptor rings
3553 * - now keeps its own state so discards due to resource
3554 * exhaustion are unnecessary, if an mbuf cannot be obtained
3555 * it just returns, keeping its placeholder, thus it can simply
3556 * be recalled to try again.
3558 **********************************************************************/
3560 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3562 struct adapter *adapter = rxr->adapter;
3563 bus_dma_segment_t hseg[1];
3564 bus_dma_segment_t pseg[1];
3565 struct igb_rx_buf *rxbuf;
3566 struct mbuf *mh, *mp;
3567 int i, nsegs, error, cleaned;
3569 i = rxr->next_to_refresh;
3570 cleaned = -1; /* Signify no completions */
3571 while (i != limit) {
3572 rxbuf = &rxr->rx_buffers[i];
3573 if (rxbuf->m_head == NULL) {
3574 mh = m_gethdr(M_DONTWAIT, MT_DATA);
3577 mh->m_pkthdr.len = mh->m_len = MHLEN;
3579 mh->m_flags |= M_PKTHDR;
3580 m_adj(mh, ETHER_ALIGN);
3581 /* Get the memory mapping */
3582 error = bus_dmamap_load_mbuf_sg(rxr->htag,
3583 rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3585 printf("GET BUF: dmamap load"
3586 " failure - %d\n", error);
3591 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3592 BUS_DMASYNC_PREREAD);
3593 rxr->rx_base[i].read.hdr_addr =
3594 htole64(hseg[0].ds_addr);
3597 if (rxbuf->m_pack == NULL) {
3598 mp = m_getjcl(M_DONTWAIT, MT_DATA,
3599 M_PKTHDR, adapter->rx_mbuf_sz);
3602 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3603 /* Get the memory mapping */
3604 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3605 rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3607 printf("GET BUF: dmamap load"
3608 " failure - %d\n", error);
3613 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3614 BUS_DMASYNC_PREREAD);
3615 rxr->rx_base[i].read.pkt_addr =
3616 htole64(pseg[0].ds_addr);
3620 /* Calculate next index */
3621 if (++i == adapter->num_rx_desc)
3623 /* This is the work marker for refresh */
3624 rxr->next_to_refresh = i;
3627 if (cleaned != -1) /* If we refreshed some, bump tail */
3628 E1000_WRITE_REG(&adapter->hw,
3629 E1000_RDT(rxr->me), cleaned);
3634 /*********************************************************************
3636 * Allocate memory for rx_buffer structures. Since we use one
3637 * rx_buffer per received packet, the maximum number of rx_buffer's
3638 * that we'll need is equal to the number of receive descriptors
3639 * that we've allocated.
3641 **********************************************************************/
3643 igb_allocate_receive_buffers(struct rx_ring *rxr)
3645 struct adapter *adapter = rxr->adapter;
3646 device_t dev = adapter->dev;
3647 struct igb_rx_buf *rxbuf;
3648 int i, bsize, error;
3650 bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3651 if (!(rxr->rx_buffers =
3652 (struct igb_rx_buf *) malloc(bsize,
3653 M_DEVBUF, M_NOWAIT | M_ZERO))) {
3654 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3659 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3660 1, 0, /* alignment, bounds */
3661 BUS_SPACE_MAXADDR, /* lowaddr */
3662 BUS_SPACE_MAXADDR, /* highaddr */
3663 NULL, NULL, /* filter, filterarg */
3664 MSIZE, /* maxsize */
3666 MSIZE, /* maxsegsize */
3668 NULL, /* lockfunc */
3669 NULL, /* lockfuncarg */
3671 device_printf(dev, "Unable to create RX DMA tag\n");
3675 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3676 1, 0, /* alignment, bounds */
3677 BUS_SPACE_MAXADDR, /* lowaddr */
3678 BUS_SPACE_MAXADDR, /* highaddr */
3679 NULL, NULL, /* filter, filterarg */
3680 MJUMPAGESIZE, /* maxsize */
3682 MJUMPAGESIZE, /* maxsegsize */
3684 NULL, /* lockfunc */
3685 NULL, /* lockfuncarg */
3687 device_printf(dev, "Unable to create RX payload DMA tag\n");
3691 for (i = 0; i < adapter->num_rx_desc; i++) {
3692 rxbuf = &rxr->rx_buffers[i];
3693 error = bus_dmamap_create(rxr->htag,
3694 BUS_DMA_NOWAIT, &rxbuf->hmap);
3697 "Unable to create RX head DMA maps\n");
3700 error = bus_dmamap_create(rxr->ptag,
3701 BUS_DMA_NOWAIT, &rxbuf->pmap);
3704 "Unable to create RX packet DMA maps\n");
3712 /* Frees all, but can handle partial completion */
3713 igb_free_receive_structures(adapter);
3719 igb_free_receive_ring(struct rx_ring *rxr)
3721 struct adapter *adapter;
3722 struct igb_rx_buf *rxbuf;
3725 adapter = rxr->adapter;
3726 for (i = 0; i < adapter->num_rx_desc; i++) {
3727 rxbuf = &rxr->rx_buffers[i];
3728 if (rxbuf->m_head != NULL) {
3729 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3730 BUS_DMASYNC_POSTREAD);
3731 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3732 rxbuf->m_head->m_flags |= M_PKTHDR;
3733 m_freem(rxbuf->m_head);
3735 if (rxbuf->m_pack != NULL) {
3736 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3737 BUS_DMASYNC_POSTREAD);
3738 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3739 rxbuf->m_pack->m_flags |= M_PKTHDR;
3740 m_freem(rxbuf->m_pack);
3742 rxbuf->m_head = NULL;
3743 rxbuf->m_pack = NULL;
3748 /*********************************************************************
3750 * Initialize a receive ring and its buffers.
3752 **********************************************************************/
3754 igb_setup_receive_ring(struct rx_ring *rxr)
3756 struct adapter *adapter;
3759 struct igb_rx_buf *rxbuf;
3760 bus_dma_segment_t pseg[1], hseg[1];
3761 struct lro_ctrl *lro = &rxr->lro;
3762 int rsize, nsegs, error = 0;
3764 adapter = rxr->adapter;
3768 /* Clear the ring contents */
3770 rsize = roundup2(adapter->num_rx_desc *
3771 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3772 bzero((void *)rxr->rx_base, rsize);
3775 ** Free current RX buffer structures and their mbufs
3777 igb_free_receive_ring(rxr);
3779 /* Now replenish the ring mbufs */
3780 for (int j = 0; j != adapter->num_rx_desc; ++j) {
3781 struct mbuf *mh, *mp;
3783 rxbuf = &rxr->rx_buffers[j];
3785 /* First the header */
3786 rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3787 if (rxbuf->m_head == NULL)
3789 m_adj(rxbuf->m_head, ETHER_ALIGN);
3791 mh->m_len = mh->m_pkthdr.len = MHLEN;
3792 mh->m_flags |= M_PKTHDR;
3793 /* Get the memory mapping */
3794 error = bus_dmamap_load_mbuf_sg(rxr->htag,
3795 rxbuf->hmap, rxbuf->m_head, hseg,
3796 &nsegs, BUS_DMA_NOWAIT);
3797 if (error != 0) /* Nothing elegant to do here */
3799 bus_dmamap_sync(rxr->htag,
3800 rxbuf->hmap, BUS_DMASYNC_PREREAD);
3801 /* Update descriptor */
3802 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3804 /* Now the payload cluster */
3805 rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3806 M_PKTHDR, adapter->rx_mbuf_sz);
3807 if (rxbuf->m_pack == NULL)
3810 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3811 /* Get the memory mapping */
3812 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3813 rxbuf->pmap, mp, pseg,
3814 &nsegs, BUS_DMA_NOWAIT);
3817 bus_dmamap_sync(rxr->ptag,
3818 rxbuf->pmap, BUS_DMASYNC_PREREAD);
3819 /* Update descriptor */
3820 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3823 /* Setup our descriptor indices */
3824 rxr->next_to_check = 0;
3825 rxr->next_to_refresh = 0;
3826 rxr->lro_enabled = FALSE;
3828 if (igb_header_split)
3829 rxr->hdr_split = TRUE;
3831 ifp->if_capabilities &= ~IFCAP_LRO;
3835 rxr->discard = FALSE;
3837 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3838 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3841 ** Now set up the LRO interface, we
3842 ** also only do head split when LRO
3843 ** is enabled, since so often they
3844 ** are undesireable in similar setups.
3846 if (ifp->if_capenable & IFCAP_LRO) {
3847 int err = tcp_lro_init(lro);
3849 device_printf(dev, "LRO Initialization failed!\n");
3852 INIT_DEBUGOUT("RX LRO Initialized\n");
3853 rxr->lro_enabled = TRUE;
3854 lro->ifp = adapter->ifp;
3861 igb_free_receive_ring(rxr);
3866 /*********************************************************************
3868 * Initialize all receive rings.
3870 **********************************************************************/
3872 igb_setup_receive_structures(struct adapter *adapter)
3874 struct rx_ring *rxr = adapter->rx_rings;
3877 for (i = 0; i < adapter->num_queues; i++, rxr++)
3878 if (igb_setup_receive_ring(rxr))
3884 * Free RX buffers allocated so far, we will only handle
3885 * the rings that completed, the failing case will have
3886 * cleaned up for itself. The value of 'i' will be the
3887 * failed ring so we must pre-decrement it.
3889 rxr = adapter->rx_rings;
3890 for (--i; i > 0; i--, rxr++) {
3891 for (j = 0; j < adapter->num_rx_desc; j++)
3892 igb_free_receive_ring(rxr);
3898 /*********************************************************************
3900 * Enable receive unit.
3902 **********************************************************************/
3904 igb_initialize_receive_units(struct adapter *adapter)
3906 struct rx_ring *rxr = adapter->rx_rings;
3907 struct ifnet *ifp = adapter->ifp;
3908 struct e1000_hw *hw = &adapter->hw;
3909 u32 rctl, rxcsum, psize, srrctl = 0;
3911 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3914 * Make sure receives are disabled while setting
3915 * up the descriptor ring
3917 rctl = E1000_READ_REG(hw, E1000_RCTL);
3918 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3921 ** Set up for header split
3923 if (rxr->hdr_split) {
3924 /* Use a standard mbuf for the header */
3925 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3926 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3928 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3931 ** Set up for jumbo frames
3933 if (ifp->if_mtu > ETHERMTU) {
3934 rctl |= E1000_RCTL_LPE;
3935 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3936 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3938 /* Set maximum packet len */
3939 psize = adapter->max_frame_size;
3940 /* are we on a vlan? */
3941 if (adapter->ifp->if_vlantrunk != NULL)
3942 psize += VLAN_TAG_SIZE;
3943 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3945 rctl &= ~E1000_RCTL_LPE;
3946 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3947 rctl |= E1000_RCTL_SZ_2048;
3950 /* Setup the Base and Length of the Rx Descriptor Rings */
3951 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3952 u64 bus_addr = rxr->rxdma.dma_paddr;
3955 E1000_WRITE_REG(hw, E1000_RDLEN(i),
3956 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3957 E1000_WRITE_REG(hw, E1000_RDBAH(i),
3958 (uint32_t)(bus_addr >> 32));
3959 E1000_WRITE_REG(hw, E1000_RDBAL(i),
3960 (uint32_t)bus_addr);
3961 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3962 /* Enable this Queue */
3963 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3964 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3965 rxdctl &= 0xFFF00000;
3966 rxdctl |= IGB_RX_PTHRESH;
3967 rxdctl |= IGB_RX_HTHRESH << 8;
3968 rxdctl |= IGB_RX_WTHRESH << 16;
3969 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3973 ** Setup for RX MultiQueue
3975 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3976 if (adapter->num_queues >1) {
3977 u32 random[10], mrqc, shift = 0;
3983 arc4rand(&random, sizeof(random), 0);
3984 if (adapter->hw.mac.type == e1000_82575)
3986 /* Warning FM follows */
3987 for (int i = 0; i < 128; i++) {
3989 (i % adapter->num_queues) << shift;
3992 E1000_RETA(i >> 2), reta.dword);
3994 /* Now fill in hash table */
3995 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3996 for (int i = 0; i < 10; i++)
3997 E1000_WRITE_REG_ARRAY(hw,
3998 E1000_RSSRK(0), i, random[i]);
4000 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4001 E1000_MRQC_RSS_FIELD_IPV4_TCP);
4002 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4003 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4004 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4005 E1000_MRQC_RSS_FIELD_IPV6_UDP);
4006 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4007 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4009 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4012 ** NOTE: Receive Full-Packet Checksum Offload
4013 ** is mutually exclusive with Multiqueue. However
4014 ** this is not the same as TCP/IP checksums which
4017 rxcsum |= E1000_RXCSUM_PCSD;
4018 #if __FreeBSD_version >= 800000
4019 /* For SCTP Offload */
4020 if ((hw->mac.type == e1000_82576)
4021 && (ifp->if_capenable & IFCAP_RXCSUM))
4022 rxcsum |= E1000_RXCSUM_CRCOFL;
4026 if (ifp->if_capenable & IFCAP_RXCSUM) {
4027 rxcsum |= E1000_RXCSUM_IPPCSE;
4028 #if __FreeBSD_version >= 800000
4029 if (adapter->hw.mac.type == e1000_82576)
4030 rxcsum |= E1000_RXCSUM_CRCOFL;
4033 rxcsum &= ~E1000_RXCSUM_TUOFL;
4035 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4037 /* Setup the Receive Control Register */
4038 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4039 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4040 E1000_RCTL_RDMTS_HALF |
4041 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4042 /* Strip CRC bytes. */
4043 rctl |= E1000_RCTL_SECRC;
4044 /* Make sure VLAN Filters are off */
4045 rctl &= ~E1000_RCTL_VFE;
4046 /* Don't store bad packets */
4047 rctl &= ~E1000_RCTL_SBP;
4049 /* Enable Receives */
4050 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4053 * Setup the HW Rx Head and Tail Descriptor Pointers
4054 * - needs to be after enable
4056 for (int i = 0; i < adapter->num_queues; i++) {
4057 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4058 E1000_WRITE_REG(hw, E1000_RDT(i),
4059 adapter->num_rx_desc - 1);
4064 /*********************************************************************
4066 * Free receive rings.
4068 **********************************************************************/
4070 igb_free_receive_structures(struct adapter *adapter)
4072 struct rx_ring *rxr = adapter->rx_rings;
4074 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4075 struct lro_ctrl *lro = &rxr->lro;
4076 igb_free_receive_buffers(rxr);
4078 igb_dma_free(adapter, &rxr->rxdma);
4081 free(adapter->rx_rings, M_DEVBUF);
4084 /*********************************************************************
4086 * Free receive ring data structures.
4088 **********************************************************************/
4090 igb_free_receive_buffers(struct rx_ring *rxr)
4092 struct adapter *adapter = rxr->adapter;
4093 struct igb_rx_buf *rxbuf;
4096 INIT_DEBUGOUT("free_receive_structures: begin");
4098 /* Cleanup any existing buffers */
4099 if (rxr->rx_buffers != NULL) {
4100 for (i = 0; i < adapter->num_rx_desc; i++) {
4101 rxbuf = &rxr->rx_buffers[i];
4102 if (rxbuf->m_head != NULL) {
4103 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4104 BUS_DMASYNC_POSTREAD);
4105 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4106 rxbuf->m_head->m_flags |= M_PKTHDR;
4107 m_freem(rxbuf->m_head);
4109 if (rxbuf->m_pack != NULL) {
4110 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4111 BUS_DMASYNC_POSTREAD);
4112 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4113 rxbuf->m_pack->m_flags |= M_PKTHDR;
4114 m_freem(rxbuf->m_pack);
4116 rxbuf->m_head = NULL;
4117 rxbuf->m_pack = NULL;
4118 if (rxbuf->hmap != NULL) {
4119 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4122 if (rxbuf->pmap != NULL) {
4123 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4127 if (rxr->rx_buffers != NULL) {
4128 free(rxr->rx_buffers, M_DEVBUF);
4129 rxr->rx_buffers = NULL;
4133 if (rxr->htag != NULL) {
4134 bus_dma_tag_destroy(rxr->htag);
4137 if (rxr->ptag != NULL) {
4138 bus_dma_tag_destroy(rxr->ptag);
4143 static __inline void
4144 igb_rx_discard(struct rx_ring *rxr, int i)
4146 struct adapter *adapter = rxr->adapter;
4147 struct igb_rx_buf *rbuf;
4148 struct mbuf *mh, *mp;
4150 rbuf = &rxr->rx_buffers[i];
4151 if (rxr->fmp != NULL) {
4152 rxr->fmp->m_flags |= M_PKTHDR;
4161 /* Reuse loaded DMA map and just update mbuf chain */
4163 mh->m_flags |= M_PKTHDR;
4166 mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4167 mp->m_data = mp->m_ext.ext_buf;
4172 static __inline void
4173 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4177 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4178 * should be computed by hardware. Also it should not have VLAN tag in
4181 if (rxr->lro_enabled &&
4182 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4183 (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4184 (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4185 (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4186 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4187 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4189 * Send to the stack if:
4190 ** - LRO not enabled, or
4191 ** - no LRO resources, or
4192 ** - lro enqueue fails
4194 if (rxr->lro.lro_cnt != 0)
4195 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4198 (*ifp->if_input)(ifp, m);
4201 /*********************************************************************
4203 * This routine executes in interrupt context. It replenishes
4204 * the mbufs in the descriptor and sends data which has been
4205 * dma'ed into host memory to upper layer.
4207 * We loop at most count times if count is > 0, or until done if
4210 * Return TRUE if more to clean, FALSE otherwise
4211 *********************************************************************/
4213 igb_rxeof(struct igb_queue *que, int count)
4215 struct adapter *adapter = que->adapter;
4216 struct rx_ring *rxr = que->rxr;
4217 struct ifnet *ifp = adapter->ifp;
4218 struct lro_ctrl *lro = &rxr->lro;
4219 struct lro_entry *queued;
4220 int i, processed = 0;
4221 u32 ptype, staterr = 0;
4222 union e1000_adv_rx_desc *cur;
4225 /* Sync the ring. */
4226 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4227 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4229 /* Main clean loop */
4230 for (i = rxr->next_to_check; count != 0;) {
4231 struct mbuf *sendmp, *mh, *mp;
4232 struct igb_rx_buf *rxbuf;
4233 u16 hlen, plen, hdr, vtag;
4236 cur = &rxr->rx_base[i];
4237 staterr = le32toh(cur->wb.upper.status_error);
4238 if ((staterr & E1000_RXD_STAT_DD) == 0)
4240 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4243 sendmp = mh = mp = NULL;
4244 cur->wb.upper.status_error = 0;
4245 rxbuf = &rxr->rx_buffers[i];
4246 plen = le16toh(cur->wb.upper.length);
4247 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4248 vtag = le16toh(cur->wb.upper.vlan);
4249 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4250 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4252 /* Make sure all segments of a bad packet are discarded */
4253 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4256 ++rxr->rx_discarded;
4257 if (!eop) /* Catch subsequent segs */
4258 rxr->discard = TRUE;
4260 rxr->discard = FALSE;
4261 igb_rx_discard(rxr, i);
4266 ** The way the hardware is configured to
4267 ** split, it will ONLY use the header buffer
4268 ** when header split is enabled, otherwise we
4269 ** get normal behavior, ie, both header and
4270 ** payload are DMA'd into the payload buffer.
4272 ** The fmp test is to catch the case where a
4273 ** packet spans multiple descriptors, in that
4274 ** case only the first header is valid.
4276 if (rxr->hdr_split && rxr->fmp == NULL) {
4277 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4278 E1000_RXDADV_HDRBUFLEN_SHIFT;
4279 if (hlen > IGB_HDR_BUF)
4281 /* Handle the header mbuf */
4282 mh = rxr->rx_buffers[i].m_head;
4284 /* clear buf info for refresh */
4285 rxbuf->m_head = NULL;
4287 ** Get the payload length, this
4288 ** could be zero if its a small
4292 mp = rxr->rx_buffers[i].m_pack;
4295 /* clear buf info for refresh */
4296 rxbuf->m_pack = NULL;
4297 rxr->rx_split_packets++;
4301 ** Either no header split, or a
4302 ** secondary piece of a fragmented
4305 mh = rxr->rx_buffers[i].m_pack;
4307 /* clear buf info for refresh */
4308 rxbuf->m_pack = NULL;
4311 ++processed; /* So we know when to refresh */
4313 /* Initial frame - setup */
4314 if (rxr->fmp == NULL) {
4315 mh->m_pkthdr.len = mh->m_len;
4316 /* Store the first mbuf */
4320 /* Add payload if split */
4321 mh->m_pkthdr.len += mp->m_len;
4322 rxr->lmp = mh->m_next;
4325 /* Chain mbuf's together */
4326 rxr->lmp->m_next = mh;
4327 rxr->lmp = rxr->lmp->m_next;
4328 rxr->fmp->m_pkthdr.len += mh->m_len;
4332 rxr->fmp->m_pkthdr.rcvif = ifp;
4335 /* capture data for AIM */
4337 rxr->bytes += rxr->fmp->m_pkthdr.len;
4338 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4340 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4341 igb_rx_checksum(staterr, rxr->fmp, ptype);
4343 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4344 (staterr & E1000_RXD_STAT_VP) != 0) {
4345 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4346 rxr->fmp->m_flags |= M_VLANTAG;
4348 #if __FreeBSD_version >= 800000
4349 rxr->fmp->m_pkthdr.flowid = que->msix;
4350 rxr->fmp->m_flags |= M_FLOWID;
4353 /* Make sure to set M_PKTHDR. */
4354 sendmp->m_flags |= M_PKTHDR;
4360 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4361 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4363 /* Advance our pointers to the next descriptor. */
4364 if (++i == adapter->num_rx_desc)
4367 ** Send to the stack or LRO
4370 igb_rx_input(rxr, ifp, sendmp, ptype);
4372 /* Every 8 descriptors we go to refresh mbufs */
4373 if (processed == 8) {
4374 igb_refresh_mbufs(rxr, i);
4379 /* Catch any remainders */
4380 if (processed != 0) {
4381 igb_refresh_mbufs(rxr, i);
4385 rxr->next_to_check = i;
4388 * Flush any outstanding LRO work
4390 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4391 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4392 tcp_lro_flush(lro, queued);
4398 ** We still have cleaning to do?
4399 ** Schedule another interrupt if so.
4401 if ((staterr & E1000_RXD_STAT_DD) != 0)
4407 /*********************************************************************
4409 * Verify that the hardware indicated that the checksum is valid.
4410 * Inform the stack about the status of checksum so that stack
4411 * doesn't spend time verifying the checksum.
4413 *********************************************************************/
4415 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4417 u16 status = (u16)staterr;
4418 u8 errors = (u8) (staterr >> 24);
4421 /* Ignore Checksum bit is set */
4422 if (status & E1000_RXD_STAT_IXSM) {
4423 mp->m_pkthdr.csum_flags = 0;
4427 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4428 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4432 if (status & E1000_RXD_STAT_IPCS) {
4434 if (!(errors & E1000_RXD_ERR_IPE)) {
4435 /* IP Checksum Good */
4436 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4437 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4439 mp->m_pkthdr.csum_flags = 0;
4442 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4443 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4444 #if __FreeBSD_version >= 800000
4445 if (sctp) /* reassign */
4446 type = CSUM_SCTP_VALID;
4449 if (!(errors & E1000_RXD_ERR_TCPE)) {
4450 mp->m_pkthdr.csum_flags |= type;
4452 mp->m_pkthdr.csum_data = htons(0xffff);
4459 * This routine is run via an vlan
4463 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4465 struct adapter *adapter = ifp->if_softc;
4468 if (ifp->if_softc != arg) /* Not our event */
4471 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4474 index = (vtag >> 5) & 0x7F;
4476 igb_shadow_vfta[index] |= (1 << bit);
4477 ++adapter->num_vlans;
4478 /* Re-init to load the changes */
4483 * This routine is run via an vlan
4487 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4489 struct adapter *adapter = ifp->if_softc;
4492 if (ifp->if_softc != arg)
4495 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4498 index = (vtag >> 5) & 0x7F;
4500 igb_shadow_vfta[index] &= ~(1 << bit);
4501 --adapter->num_vlans;
4502 /* Re-init to load the changes */
4507 igb_setup_vlan_hw_support(struct adapter *adapter)
4509 struct e1000_hw *hw = &adapter->hw;
4513 ** We get here thru init_locked, meaning
4514 ** a soft reset, this has already cleared
4515 ** the VFTA and other state, so if there
4516 ** have been no vlan's registered do nothing.
4518 if (adapter->num_vlans == 0)
4522 ** A soft reset zero's out the VFTA, so
4523 ** we need to repopulate it now.
4525 for (int i = 0; i < IGB_VFTA_SIZE; i++)
4526 if (igb_shadow_vfta[i] != 0)
4527 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4528 i, igb_shadow_vfta[i]);
4530 reg = E1000_READ_REG(hw, E1000_CTRL);
4531 reg |= E1000_CTRL_VME;
4532 E1000_WRITE_REG(hw, E1000_CTRL, reg);
4534 /* Enable the Filter Table */
4535 reg = E1000_READ_REG(hw, E1000_RCTL);
4536 reg &= ~E1000_RCTL_CFIEN;
4537 reg |= E1000_RCTL_VFE;
4538 E1000_WRITE_REG(hw, E1000_RCTL, reg);
4540 /* Update the frame size */
4541 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4542 adapter->max_frame_size + VLAN_TAG_SIZE);
4546 igb_enable_intr(struct adapter *adapter)
4548 /* With RSS set up what to auto clear */
4549 if (adapter->msix_mem) {
4550 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4551 adapter->eims_mask);
4552 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4553 adapter->eims_mask);
4554 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4555 adapter->eims_mask);
4556 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4559 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4562 E1000_WRITE_FLUSH(&adapter->hw);
4568 igb_disable_intr(struct adapter *adapter)
4570 if (adapter->msix_mem) {
4571 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4572 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4574 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4575 E1000_WRITE_FLUSH(&adapter->hw);
4580 * Bit of a misnomer, what this really means is
4581 * to enable OS management of the system... aka
4582 * to disable special hardware management features
4585 igb_init_manageability(struct adapter *adapter)
4587 if (adapter->has_manage) {
4588 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4589 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4591 /* disable hardware interception of ARP */
4592 manc &= ~(E1000_MANC_ARP_EN);
4594 /* enable receiving management packets to the host */
4595 manc |= E1000_MANC_EN_MNG2HOST;
4596 manc2h |= 1 << 5; /* Mng Port 623 */
4597 manc2h |= 1 << 6; /* Mng Port 664 */
4598 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4599 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4604 * Give control back to hardware management
4605 * controller if there is one.
4608 igb_release_manageability(struct adapter *adapter)
4610 if (adapter->has_manage) {
4611 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4613 /* re-enable hardware interception of ARP */
4614 manc |= E1000_MANC_ARP_EN;
4615 manc &= ~E1000_MANC_EN_MNG2HOST;
4617 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4622 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4623 * For ASF and Pass Through versions of f/w this means that
4624 * the driver is loaded.
4628 igb_get_hw_control(struct adapter *adapter)
4632 /* Let firmware know the driver has taken over */
4633 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4634 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4635 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4639 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4640 * For ASF and Pass Through versions of f/w this means that the
4641 * driver is no longer loaded.
4645 igb_release_hw_control(struct adapter *adapter)
4649 /* Let firmware taken over control of h/w */
4650 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4651 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4652 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4656 igb_is_valid_ether_addr(uint8_t *addr)
4658 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4660 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4669 * Enable PCI Wake On Lan capability
4672 igb_enable_wakeup(device_t dev)
4677 /* First find the capabilities pointer*/
4678 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4679 /* Read the PM Capabilities */
4680 id = pci_read_config(dev, cap, 1);
4681 if (id != PCIY_PMG) /* Something wrong */
4683 /* OK, we have the power capabilities, so
4684 now get the status register */
4685 cap += PCIR_POWER_STATUS;
4686 status = pci_read_config(dev, cap, 2);
4687 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4688 pci_write_config(dev, cap, status, 2);
4693 igb_led_func(void *arg, int onoff)
4695 struct adapter *adapter = arg;
4697 IGB_CORE_LOCK(adapter);
4699 e1000_setup_led(&adapter->hw);
4700 e1000_led_on(&adapter->hw);
4702 e1000_led_off(&adapter->hw);
4703 e1000_cleanup_led(&adapter->hw);
4705 IGB_CORE_UNLOCK(adapter);
4708 /**********************************************************************
4710 * Update the board statistics counters.
4712 **********************************************************************/
4714 igb_update_stats_counters(struct adapter *adapter)
4718 if (adapter->hw.phy.media_type == e1000_media_type_copper ||
4719 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4720 adapter->stats.symerrs +=
4721 E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4722 adapter->stats.sec +=
4723 E1000_READ_REG(&adapter->hw, E1000_SEC);
4725 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4726 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4727 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4728 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4730 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4731 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4732 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4733 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4734 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4735 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4736 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4737 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4738 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4739 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4740 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4741 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4742 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4743 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4744 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4745 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4746 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4747 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4748 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4749 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4751 /* For the 64-bit byte counters the low dword must be read first. */
4752 /* Both registers clear on the read of the high dword */
4754 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4755 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4757 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4758 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4759 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4760 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4761 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4763 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4764 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4766 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4767 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4768 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4769 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4770 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4771 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4772 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4773 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4774 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4775 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4777 adapter->stats.algnerrc +=
4778 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4779 adapter->stats.rxerrc +=
4780 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4781 adapter->stats.tncrs +=
4782 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4783 adapter->stats.cexterr +=
4784 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4785 adapter->stats.tsctc +=
4786 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4787 adapter->stats.tsctfc +=
4788 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4791 ifp->if_collisions = adapter->stats.colc;
4794 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4795 adapter->stats.crcerrs + adapter->stats.algnerrc +
4796 adapter->stats.ruc + adapter->stats.roc +
4797 adapter->stats.mpc + adapter->stats.cexterr;
4800 ifp->if_oerrors = adapter->stats.ecol +
4801 adapter->stats.latecol + adapter->watchdog_events;
4805 /**********************************************************************
4807 * This routine is called only when igb_display_debug_stats is enabled.
4808 * This routine provides a way to take a look at important statistics
4809 * maintained by the driver and hardware.
4811 **********************************************************************/
4813 igb_print_debug_info(struct adapter *adapter)
4815 device_t dev = adapter->dev;
4816 struct igb_queue *que = adapter->queues;
4817 struct rx_ring *rxr = adapter->rx_rings;
4818 struct tx_ring *txr = adapter->tx_rings;
4819 uint8_t *hw_addr = adapter->hw.hw_addr;
4821 device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4822 device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4823 E1000_READ_REG(&adapter->hw, E1000_CTRL),
4824 E1000_READ_REG(&adapter->hw, E1000_RCTL));
4826 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4827 device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4828 E1000_READ_REG(&adapter->hw, E1000_IMS),
4829 E1000_READ_REG(&adapter->hw, E1000_EIMS));
4832 device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4833 ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4834 (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4835 device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4836 adapter->hw.fc.high_water,
4837 adapter->hw.fc.low_water);
4839 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4840 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d ", i,
4841 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4842 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4843 device_printf(dev, "rdh = %d, rdt = %d\n",
4844 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4845 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4846 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4847 txr->me, (long long)txr->no_desc_avail);
4848 device_printf(dev, "TX(%d) Packets sent = %lld\n",
4849 txr->me, (long long)txr->tx_packets);
4850 device_printf(dev, "RX(%d) Packets received = %lld ",
4851 rxr->me, (long long)rxr->rx_packets);
4854 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4855 struct lro_ctrl *lro = &rxr->lro;
4856 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4857 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4858 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4859 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4860 (long long)rxr->rx_packets);
4861 device_printf(dev, " Split Packets = %lld ",
4862 (long long)rxr->rx_split_packets);
4863 device_printf(dev, " Byte count = %lld\n",
4864 (long long)rxr->rx_bytes);
4865 device_printf(dev,"RX(%d) LRO Queued= %d ",
4866 i, lro->lro_queued);
4867 device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4870 for (int i = 0; i < adapter->num_queues; i++, que++)
4871 device_printf(dev,"QUE(%d) IRQs = %llx\n",
4872 i, (long long)que->irqs);
4874 device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4875 device_printf(dev, "Mbuf defrag failed = %ld\n",
4876 adapter->mbuf_defrag_failed);
4877 device_printf(dev, "Std mbuf header failed = %ld\n",
4878 adapter->mbuf_header_failed);
4879 device_printf(dev, "Std mbuf packet failed = %ld\n",
4880 adapter->mbuf_packet_failed);
4881 device_printf(dev, "Driver dropped packets = %ld\n",
4882 adapter->dropped_pkts);
4883 device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4884 adapter->no_tx_dma_setup);
4888 igb_print_hw_stats(struct adapter *adapter)
4890 device_t dev = adapter->dev;
4892 device_printf(dev, "Excessive collisions = %lld\n",
4893 (long long)adapter->stats.ecol);
4894 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4895 device_printf(dev, "Symbol errors = %lld\n",
4896 (long long)adapter->stats.symerrs);
4898 device_printf(dev, "Sequence errors = %lld\n",
4899 (long long)adapter->stats.sec);
4900 device_printf(dev, "Defer count = %lld\n",
4901 (long long)adapter->stats.dc);
4902 device_printf(dev, "Missed Packets = %lld\n",
4903 (long long)adapter->stats.mpc);
4904 device_printf(dev, "Receive No Buffers = %lld\n",
4905 (long long)adapter->stats.rnbc);
4906 /* RLEC is inaccurate on some hardware, calculate our own. */
4907 device_printf(dev, "Receive Length Errors = %lld\n",
4908 ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4909 device_printf(dev, "Receive errors = %lld\n",
4910 (long long)adapter->stats.rxerrc);
4911 device_printf(dev, "Crc errors = %lld\n",
4912 (long long)adapter->stats.crcerrs);
4913 device_printf(dev, "Alignment errors = %lld\n",
4914 (long long)adapter->stats.algnerrc);
4915 /* On 82575 these are collision counts */
4916 device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4917 (long long)adapter->stats.cexterr);
4918 device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4919 device_printf(dev, "watchdog timeouts = %ld\n",
4920 adapter->watchdog_events);
4921 device_printf(dev, "XON Rcvd = %lld\n",
4922 (long long)adapter->stats.xonrxc);
4923 device_printf(dev, "XON Xmtd = %lld\n",
4924 (long long)adapter->stats.xontxc);
4925 device_printf(dev, "XOFF Rcvd = %lld\n",
4926 (long long)adapter->stats.xoffrxc);
4927 device_printf(dev, "XOFF Xmtd = %lld\n",
4928 (long long)adapter->stats.xofftxc);
4929 device_printf(dev, "Good Packets Rcvd = %lld\n",
4930 (long long)adapter->stats.gprc);
4931 device_printf(dev, "Good Packets Xmtd = %lld\n",
4932 (long long)adapter->stats.gptc);
4933 device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4934 (long long)adapter->stats.tsctc);
4935 device_printf(dev, "TSO Contexts Failed = %lld\n",
4936 (long long)adapter->stats.tsctfc);
4939 /**********************************************************************
4941 * This routine provides a way to dump out the adapter eeprom,
4942 * often a useful debug/service tool. This only dumps the first
4943 * 32 words, stuff that matters is in that extent.
4945 **********************************************************************/
4947 igb_print_nvm_info(struct adapter *adapter)
4952 /* Its a bit crude, but it gets the job done */
4953 printf("\nInterface EEPROM Dump:\n");
4954 printf("Offset\n0x0000 ");
4955 for (i = 0, j = 0; i < 32; i++, j++) {
4956 if (j == 8) { /* Make the offset block */
4958 printf("\n0x00%x0 ",row);
4960 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4961 printf("%04x ", eeprom_data);
4967 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4969 struct adapter *adapter;
4974 error = sysctl_handle_int(oidp, &result, 0, req);
4976 if (error || !req->newptr)
4980 adapter = (struct adapter *)arg1;
4981 igb_print_debug_info(adapter);
4984 * This value will cause a hex dump of the
4985 * first 32 16-bit words of the EEPROM to
4989 adapter = (struct adapter *)arg1;
4990 igb_print_nvm_info(adapter);
4998 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
5000 struct adapter *adapter;
5005 error = sysctl_handle_int(oidp, &result, 0, req);
5007 if (error || !req->newptr)
5011 adapter = (struct adapter *)arg1;
5012 igb_print_hw_stats(adapter);
5019 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5020 const char *description, int *limit, int value)
5023 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5024 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5025 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);