1 /******************************************************************************
3 Copyright (c) 2001-2009, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
52 #include <sys/module.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
61 #include <machine/smp.h>
62 #include <machine/bus.h>
63 #include <machine/resource.h>
66 #include <sys/ieee1588.h>
70 #include <net/ethernet.h>
72 #include <net/if_arp.h>
73 #include <net/if_dl.h>
74 #include <net/if_media.h>
76 #include <net/if_types.h>
77 #include <net/if_vlan_var.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/in.h>
81 #include <netinet/if_ether.h>
82 #include <netinet/ip.h>
83 #include <netinet/ip6.h>
84 #include <netinet/tcp.h>
85 #include <netinet/tcp_lro.h>
86 #include <netinet/udp.h>
88 #include <machine/in_cksum.h>
89 #include <dev/pci/pcivar.h>
90 #include <dev/pci/pcireg.h>
92 #include "e1000_api.h"
93 #include "e1000_82575.h"
96 /*********************************************************************
97 * Set this to one to display debug statistics
98 *********************************************************************/
99 int igb_display_debug_stats = 0;
101 /*********************************************************************
103 *********************************************************************/
104 char igb_driver_version[] = "version - 1.7.3";
107 /*********************************************************************
108 * PCI Device ID Table
110 * Used by probe to select devices to load on
111 * Last field stores an index into e1000_strings
112 * Last entry must be all 0s
114 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115 *********************************************************************/
117 static igb_vendor_info_t igb_vendor_info_array[] =
119 { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
120 { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121 PCI_ANY_ID, PCI_ANY_ID, 0},
122 { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123 PCI_ANY_ID, PCI_ANY_ID, 0},
124 { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0},
125 { 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0},
126 { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
127 { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
128 { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129 PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131 PCI_ANY_ID, PCI_ANY_ID, 0},
132 /* required last entry */
136 /*********************************************************************
137 * Table of branding strings for all supported NICs.
138 *********************************************************************/
140 static char *igb_strings[] = {
141 "Intel(R) PRO/1000 Network Connection"
144 /*********************************************************************
145 * Function prototypes
146 *********************************************************************/
147 static int igb_probe(device_t);
148 static int igb_attach(device_t);
149 static int igb_detach(device_t);
150 static int igb_shutdown(device_t);
151 static int igb_suspend(device_t);
152 static int igb_resume(device_t);
153 static void igb_start(struct ifnet *);
154 static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
155 #if __FreeBSD_version >= 800000
156 static int igb_mq_start(struct ifnet *, struct mbuf *);
157 static int igb_mq_start_locked(struct ifnet *,
158 struct tx_ring *, struct mbuf *);
159 static void igb_qflush(struct ifnet *);
161 static int igb_ioctl(struct ifnet *, u_long, caddr_t);
162 static void igb_watchdog(struct adapter *);
163 static void igb_init(void *);
164 static void igb_init_locked(struct adapter *);
165 static void igb_stop(void *);
166 static void igb_media_status(struct ifnet *, struct ifmediareq *);
167 static int igb_media_change(struct ifnet *);
168 static void igb_identify_hardware(struct adapter *);
169 static int igb_allocate_pci_resources(struct adapter *);
170 static int igb_allocate_msix(struct adapter *);
171 static int igb_allocate_legacy(struct adapter *);
172 static int igb_setup_msix(struct adapter *);
173 static void igb_free_pci_resources(struct adapter *);
174 static void igb_local_timer(void *);
175 static int igb_hardware_init(struct adapter *);
176 static void igb_setup_interface(device_t, struct adapter *);
177 static int igb_allocate_queues(struct adapter *);
178 static void igb_configure_queues(struct adapter *);
180 static int igb_allocate_transmit_buffers(struct tx_ring *);
181 static void igb_setup_transmit_structures(struct adapter *);
182 static void igb_setup_transmit_ring(struct tx_ring *);
183 static void igb_initialize_transmit_units(struct adapter *);
184 static void igb_free_transmit_structures(struct adapter *);
185 static void igb_free_transmit_buffers(struct tx_ring *);
187 static int igb_allocate_receive_buffers(struct rx_ring *);
188 static int igb_setup_receive_structures(struct adapter *);
189 static int igb_setup_receive_ring(struct rx_ring *);
190 static void igb_initialize_receive_units(struct adapter *);
191 static void igb_free_receive_structures(struct adapter *);
192 static void igb_free_receive_buffers(struct rx_ring *);
194 static void igb_enable_intr(struct adapter *);
195 static void igb_disable_intr(struct adapter *);
196 static void igb_update_stats_counters(struct adapter *);
197 static bool igb_txeof(struct tx_ring *);
198 static bool igb_rxeof(struct rx_ring *, int);
199 static void igb_rx_checksum(u32, struct mbuf *, bool);
200 static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
201 static bool igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
202 static void igb_set_promisc(struct adapter *);
203 static void igb_disable_promisc(struct adapter *);
204 static void igb_set_multi(struct adapter *);
205 static void igb_print_hw_stats(struct adapter *);
206 static void igb_update_link_status(struct adapter *);
207 static int igb_get_buf(struct rx_ring *, int, u8);
209 static void igb_register_vlan(void *, struct ifnet *, u16);
210 static void igb_unregister_vlan(void *, struct ifnet *, u16);
211 static void igb_setup_vlan_hw_support(struct adapter *);
213 static int igb_xmit(struct tx_ring *, struct mbuf **);
214 static int igb_dma_malloc(struct adapter *, bus_size_t,
215 struct igb_dma_alloc *, int);
216 static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
217 static void igb_print_debug_info(struct adapter *);
218 static void igb_print_nvm_info(struct adapter *);
219 static int igb_is_valid_ether_addr(u8 *);
220 static int igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
221 static int igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
222 /* Management and WOL Support */
223 static void igb_init_manageability(struct adapter *);
224 static void igb_release_manageability(struct adapter *);
225 static void igb_get_hw_control(struct adapter *);
226 static void igb_release_hw_control(struct adapter *);
227 static void igb_enable_wakeup(device_t);
229 static int igb_irq_fast(void *);
230 static void igb_add_rx_process_limit(struct adapter *, const char *,
231 const char *, int *, int);
232 static void igb_handle_rxtx(void *context, int pending);
233 static void igb_handle_tx(void *context, int pending);
234 static void igb_handle_rx(void *context, int pending);
236 /* These are MSIX only irq handlers */
237 static void igb_msix_rx(void *);
238 static void igb_msix_tx(void *);
239 static void igb_msix_link(void *);
241 /* Adaptive Interrupt Moderation */
242 static void igb_update_aim(struct rx_ring *);
244 /*********************************************************************
245 * FreeBSD Device Interface Entry Points
246 *********************************************************************/
248 static device_method_t igb_methods[] = {
249 /* Device interface */
250 DEVMETHOD(device_probe, igb_probe),
251 DEVMETHOD(device_attach, igb_attach),
252 DEVMETHOD(device_detach, igb_detach),
253 DEVMETHOD(device_shutdown, igb_shutdown),
254 DEVMETHOD(device_suspend, igb_suspend),
255 DEVMETHOD(device_resume, igb_resume),
259 static driver_t igb_driver = {
260 "igb", igb_methods, sizeof(struct adapter),
263 static devclass_t igb_devclass;
264 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
265 MODULE_DEPEND(igb, pci, 1, 1, 1);
266 MODULE_DEPEND(igb, ether, 1, 1, 1);
268 /*********************************************************************
269 * Tunable default values.
270 *********************************************************************/
272 /* Descriptor defaults */
273 static int igb_rxd = IGB_DEFAULT_RXD;
274 static int igb_txd = IGB_DEFAULT_TXD;
275 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
276 TUNABLE_INT("hw.igb.txd", &igb_txd);
279 ** These parameters are used in Adaptive
280 ** Interrupt Moderation. The value is set
281 ** into EITR and controls the interrupt
282 ** frequency. A variable static scheme can
283 ** be created by changing the assigned value
284 ** of igb_ave_latency to the desired value,
285 ** and then set igb_enable_aim to FALSE.
286 ** This will result in all EITR registers
287 ** getting set to that value statically.
289 static int igb_enable_aim = TRUE;
290 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
291 static int igb_low_latency = IGB_LOW_LATENCY;
292 TUNABLE_INT("hw.igb.low_latency", &igb_low_latency);
293 static int igb_ave_latency = IGB_AVE_LATENCY;
294 TUNABLE_INT("hw.igb.ave_latency", &igb_ave_latency);
295 static int igb_bulk_latency = IGB_BULK_LATENCY;
296 TUNABLE_INT("hw.igb.bulk_latency", &igb_bulk_latency);
299 ** This will autoconfigure based on the number
300 ** of CPUs if set to 0. Only a matched pair of
301 ** TX and RX rings are allowed.
303 static int igb_num_queues = 1;
304 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
306 /* How many packets rxeof tries to clean at a time */
307 static int igb_rx_process_limit = 100;
308 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
310 /* Flow control setting - default to FULL */
311 static int igb_fc_setting = e1000_fc_full;
312 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
315 ** Shadow VFTA table, this is needed because
316 ** the real filter table gets cleared during
317 ** a soft reset and the driver needs to be able
320 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
323 /*********************************************************************
324 * Device identification routine
326 * igb_probe determines if the driver should be loaded on
327 * adapter based on PCI vendor/device id of the adapter.
329 * return BUS_PROBE_DEFAULT on success, positive on failure
330 *********************************************************************/
333 igb_probe(device_t dev)
335 char adapter_name[60];
336 uint16_t pci_vendor_id = 0;
337 uint16_t pci_device_id = 0;
338 uint16_t pci_subvendor_id = 0;
339 uint16_t pci_subdevice_id = 0;
340 igb_vendor_info_t *ent;
342 INIT_DEBUGOUT("igb_probe: begin");
344 pci_vendor_id = pci_get_vendor(dev);
345 if (pci_vendor_id != IGB_VENDOR_ID)
348 pci_device_id = pci_get_device(dev);
349 pci_subvendor_id = pci_get_subvendor(dev);
350 pci_subdevice_id = pci_get_subdevice(dev);
352 ent = igb_vendor_info_array;
353 while (ent->vendor_id != 0) {
354 if ((pci_vendor_id == ent->vendor_id) &&
355 (pci_device_id == ent->device_id) &&
357 ((pci_subvendor_id == ent->subvendor_id) ||
358 (ent->subvendor_id == PCI_ANY_ID)) &&
360 ((pci_subdevice_id == ent->subdevice_id) ||
361 (ent->subdevice_id == PCI_ANY_ID))) {
362 sprintf(adapter_name, "%s %s",
363 igb_strings[ent->index],
365 device_set_desc_copy(dev, adapter_name);
366 return (BUS_PROBE_DEFAULT);
374 /*********************************************************************
375 * Device initialization routine
377 * The attach entry point is called when the driver is being loaded.
378 * This routine identifies the type of hardware, allocates all resources
379 * and initializes the hardware.
381 * return 0 on success, positive on failure
382 *********************************************************************/
385 igb_attach(device_t dev)
387 struct adapter *adapter;
391 INIT_DEBUGOUT("igb_attach: begin");
393 adapter = device_get_softc(dev);
394 adapter->dev = adapter->osdep.dev = dev;
395 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
398 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
399 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
400 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
401 igb_sysctl_debug_info, "I", "Debug Information");
403 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
404 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
405 OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
406 igb_sysctl_stats, "I", "Statistics");
408 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
409 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
410 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
411 &igb_fc_setting, 0, "Flow Control");
413 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
414 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415 OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
416 &igb_enable_aim, 1, "Interrupt Moderation");
418 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
419 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420 OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
421 &igb_low_latency, 1, "Low Latency");
423 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
424 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
425 OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
426 &igb_ave_latency, 1, "Average Latency");
428 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430 OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
431 &igb_bulk_latency, 1, "Bulk Latency");
433 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
435 /* Determine hardware and mac info */
436 igb_identify_hardware(adapter);
438 /* Setup PCI resources */
439 if (igb_allocate_pci_resources(adapter)) {
440 device_printf(dev, "Allocation of PCI resources failed\n");
445 /* Do Shared Code initialization */
446 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447 device_printf(dev, "Setup of Shared code failed\n");
452 e1000_get_bus_info(&adapter->hw);
454 /* Sysctls for limiting the amount of work done in the taskqueue */
455 igb_add_rx_process_limit(adapter, "rx_processing_limit",
456 "max number of rx packets to process", &adapter->rx_process_limit,
457 igb_rx_process_limit);
460 * Validate number of transmit and receive descriptors. It
461 * must not exceed hardware maximum, and must be multiple
462 * of E1000_DBA_ALIGN.
464 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467 IGB_DEFAULT_TXD, igb_txd);
468 adapter->num_tx_desc = IGB_DEFAULT_TXD;
470 adapter->num_tx_desc = igb_txd;
471 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474 IGB_DEFAULT_RXD, igb_rxd);
475 adapter->num_rx_desc = IGB_DEFAULT_RXD;
477 adapter->num_rx_desc = igb_rxd;
479 adapter->hw.mac.autoneg = DO_AUTO_NEG;
480 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
484 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485 adapter->hw.phy.mdix = AUTO_ALL_MODES;
486 adapter->hw.phy.disable_polarity_correction = FALSE;
487 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
491 * Set the frame limits assuming
492 * standard ethernet sized frames.
494 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495 adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
498 ** Allocate and Setup Queues
500 if (igb_allocate_queues(adapter)) {
506 ** Start from a known state, this is
507 ** important in reading the nvm and
510 e1000_reset_hw(&adapter->hw);
512 /* Make sure we have a good EEPROM before we read from it */
513 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
515 ** Some PCI-E parts fail the first check due to
516 ** the link being in sleep state, call it again,
517 ** if it fails a second time its a real issue.
519 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
521 "The EEPROM Checksum Is Not Valid\n");
528 ** Copy the permanent MAC address out of the EEPROM
530 if (e1000_read_mac_addr(&adapter->hw) < 0) {
531 device_printf(dev, "EEPROM read error while reading MAC"
536 /* Check its sanity */
537 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538 device_printf(dev, "Invalid MAC address\n");
543 /* Now Initialize the hardware */
544 if (igb_hardware_init(adapter)) {
545 device_printf(dev, "Unable to initialize the hardware\n");
551 ** Configure Interrupts
553 if (adapter->msix > 1) /* MSIX */
554 error = igb_allocate_msix(adapter);
555 else /* MSI or Legacy */
556 error = igb_allocate_legacy(adapter);
560 /* Setup OS specific network interface */
561 igb_setup_interface(dev, adapter);
565 ** Setup the timer: IEEE 1588 support
567 adapter->cycles.read = igb_read_clock;
568 adapter->cycles.mask = (u64)-1;
569 adapter->cycles.mult = 1;
570 adapter->cycles.shift = IGB_TSYNC_SHIFT;
571 E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
572 IGB_TSYNC_CYCLE_TIME * IGB_TSYNC_SHIFT);
573 E1000_WRITE_REG(&adapter->hw, E1000_SYSTIML, 0x00000000);
574 E1000_WRITE_REG(&adapter->hw, E1000_SYSTIMH, 0xFF800000);
576 // JFV - this is not complete yet
579 /* Initialize statistics */
580 igb_update_stats_counters(adapter);
582 adapter->hw.mac.get_link_status = 1;
583 igb_update_link_status(adapter);
585 /* Indicate SOL/IDER usage */
586 if (e1000_check_reset_block(&adapter->hw))
588 "PHY reset is blocked due to SOL/IDER session.\n");
590 /* Determine if we have to control management hardware */
591 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
596 /* APME bit in EEPROM is mapped to WUC.APME */
597 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
599 adapter->wol = E1000_WUFC_MAG;
601 /* Register for VLAN events */
602 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
603 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
604 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
605 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
607 /* Tell the stack that the interface is not active */
608 adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
610 INIT_DEBUGOUT("igb_attach: end");
615 igb_free_transmit_structures(adapter);
616 igb_free_receive_structures(adapter);
617 igb_release_hw_control(adapter);
619 igb_free_pci_resources(adapter);
620 IGB_CORE_LOCK_DESTROY(adapter);
625 /*********************************************************************
626 * Device removal routine
628 * The detach entry point is called when the driver is being removed.
629 * This routine stops the adapter and deallocates all the resources
630 * that were allocated for driver operation.
632 * return 0 on success, positive on failure
633 *********************************************************************/
636 igb_detach(device_t dev)
638 struct adapter *adapter = device_get_softc(dev);
639 struct ifnet *ifp = adapter->ifp;
641 INIT_DEBUGOUT("igb_detach: begin");
643 /* Make sure VLANS are not using driver */
644 if (adapter->ifp->if_vlantrunk != NULL) {
645 device_printf(dev,"Vlan in use, detach first\n");
649 IGB_CORE_LOCK(adapter);
650 adapter->in_detach = 1;
652 IGB_CORE_UNLOCK(adapter);
654 e1000_phy_hw_reset(&adapter->hw);
656 /* Give control back to firmware */
657 igb_release_manageability(adapter);
658 igb_release_hw_control(adapter);
661 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
662 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
663 igb_enable_wakeup(dev);
666 /* Unregister VLAN events */
667 if (adapter->vlan_attach != NULL)
668 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
669 if (adapter->vlan_detach != NULL)
670 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
672 ether_ifdetach(adapter->ifp);
674 callout_drain(&adapter->timer);
676 igb_free_pci_resources(adapter);
677 bus_generic_detach(dev);
680 igb_free_transmit_structures(adapter);
681 igb_free_receive_structures(adapter);
683 IGB_CORE_LOCK_DESTROY(adapter);
688 /*********************************************************************
690 * Shutdown entry point
692 **********************************************************************/
695 igb_shutdown(device_t dev)
697 return igb_suspend(dev);
701 * Suspend/resume device methods.
704 igb_suspend(device_t dev)
706 struct adapter *adapter = device_get_softc(dev);
708 IGB_CORE_LOCK(adapter);
712 igb_release_manageability(adapter);
713 igb_release_hw_control(adapter);
716 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
717 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
718 igb_enable_wakeup(dev);
721 IGB_CORE_UNLOCK(adapter);
723 return bus_generic_suspend(dev);
727 igb_resume(device_t dev)
729 struct adapter *adapter = device_get_softc(dev);
730 struct ifnet *ifp = adapter->ifp;
732 IGB_CORE_LOCK(adapter);
733 igb_init_locked(adapter);
734 igb_init_manageability(adapter);
736 if ((ifp->if_flags & IFF_UP) &&
737 (ifp->if_drv_flags & IFF_DRV_RUNNING))
740 IGB_CORE_UNLOCK(adapter);
742 return bus_generic_resume(dev);
746 /*********************************************************************
747 * Transmit entry point
749 * igb_start is called by the stack to initiate a transmit.
750 * The driver will remain in this routine as long as there are
751 * packets to transmit and transmit resources are available.
752 * In case resources are not available stack is notified and
753 * the packet is requeued.
754 **********************************************************************/
757 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
759 struct adapter *adapter = ifp->if_softc;
762 IGB_TX_LOCK_ASSERT(txr);
764 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
767 if (!adapter->link_active)
770 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
772 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
776 * Encapsulation can modify our pointer, and or make it
777 * NULL on failure. In that event, we can't requeue.
779 if (igb_xmit(txr, &m_head)) {
782 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
783 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
787 /* Send a copy of the frame to the BPF listener */
788 ETHER_BPF_MTAP(ifp, m_head);
790 /* Set timeout in case hardware has problems transmitting. */
791 txr->watchdog_timer = IGB_TX_TIMEOUT;
796 * Legacy TX driver routine, called from the
797 * stack, always uses tx[0], and spins for it.
798 * Should not be used with multiqueue tx
801 igb_start(struct ifnet *ifp)
803 struct adapter *adapter = ifp->if_softc;
804 struct tx_ring *txr = adapter->tx_rings;
806 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
808 igb_start_locked(txr, ifp);
814 #if __FreeBSD_version >= 800000
816 ** Multiqueue Transmit driver
820 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
822 struct adapter *adapter = ifp->if_softc;
826 /* Which queue to use */
827 if ((m->m_flags & M_FLOWID) != 0)
828 i = m->m_pkthdr.flowid % adapter->num_queues;
829 txr = &adapter->tx_rings[i];
831 if (IGB_TX_TRYLOCK(txr)) {
832 err = igb_mq_start_locked(ifp, txr, m);
835 err = drbr_enqueue(ifp, txr->br, m);
841 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
843 struct adapter *adapter = txr->adapter;
847 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
848 err = drbr_enqueue(ifp, txr->br, m);
852 if (m == NULL) /* Called by tasklet */
855 /* If nothing queued go right to xmit */
856 if (drbr_empty(ifp, txr->br)) {
857 if (igb_xmit(txr, &m)) {
858 if (m && (err = drbr_enqueue(ifp, txr->br, m)) != 0)
861 /* Success, update stats */
862 drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
863 /* Send a copy of the frame to the BPF listener */
864 ETHER_BPF_MTAP(ifp, m);
865 /* Set the watchdog */
866 txr->watchdog_timer = IGB_TX_TIMEOUT;
869 } else if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
873 if (drbr_empty(ifp, txr->br))
876 /* Process the queue */
878 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
880 next = drbr_dequeue(ifp, txr->br);
883 if (igb_xmit(txr, &next))
885 ETHER_BPF_MTAP(ifp, next);
886 /* Set the watchdog */
887 txr->watchdog_timer = IGB_TX_TIMEOUT;
890 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD)
891 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
897 ** Flush all ring buffers
900 igb_qflush(struct ifnet *ifp)
902 struct adapter *adapter = ifp->if_softc;
903 struct tx_ring *txr = adapter->tx_rings;
906 for (int i = 0; i < adapter->num_queues; i++, txr++) {
908 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
914 #endif /* __FreeBSD_version >= 800000 */
916 /*********************************************************************
919 * igb_ioctl is called when the user wants to configure the
922 * return 0 on success, positive on failure
923 **********************************************************************/
926 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
928 struct adapter *adapter = ifp->if_softc;
929 struct ifreq *ifr = (struct ifreq *)data;
931 struct ifaddr *ifa = (struct ifaddr *)data;
935 if (adapter->in_detach)
941 if (ifa->ifa_addr->sa_family == AF_INET) {
944 * Since resetting hardware takes a very long time
945 * and results in link renegotiation we only
946 * initialize the hardware only when it is absolutely
949 ifp->if_flags |= IFF_UP;
950 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
951 IGB_CORE_LOCK(adapter);
952 igb_init_locked(adapter);
953 IGB_CORE_UNLOCK(adapter);
955 arp_ifinit(ifp, ifa);
958 error = ether_ioctl(ifp, command, data);
964 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
966 IGB_CORE_LOCK(adapter);
967 max_frame_size = 9234;
968 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
970 IGB_CORE_UNLOCK(adapter);
975 ifp->if_mtu = ifr->ifr_mtu;
976 adapter->max_frame_size =
977 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
978 igb_init_locked(adapter);
979 IGB_CORE_UNLOCK(adapter);
983 IOCTL_DEBUGOUT("ioctl rcv'd:\
984 SIOCSIFFLAGS (Set Interface Flags)");
985 IGB_CORE_LOCK(adapter);
986 if (ifp->if_flags & IFF_UP) {
987 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
988 if ((ifp->if_flags ^ adapter->if_flags) &
989 (IFF_PROMISC | IFF_ALLMULTI)) {
990 igb_disable_promisc(adapter);
991 igb_set_promisc(adapter);
994 igb_init_locked(adapter);
996 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
998 adapter->if_flags = ifp->if_flags;
999 IGB_CORE_UNLOCK(adapter);
1003 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1004 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1005 IGB_CORE_LOCK(adapter);
1006 igb_disable_intr(adapter);
1007 igb_set_multi(adapter);
1008 igb_enable_intr(adapter);
1009 IGB_CORE_UNLOCK(adapter);
1013 /* Check SOL/IDER usage */
1014 IGB_CORE_LOCK(adapter);
1015 if (e1000_check_reset_block(&adapter->hw)) {
1016 IGB_CORE_UNLOCK(adapter);
1017 device_printf(adapter->dev, "Media change is"
1018 " blocked due to SOL/IDER session.\n");
1021 IGB_CORE_UNLOCK(adapter);
1023 IOCTL_DEBUGOUT("ioctl rcv'd: \
1024 SIOCxIFMEDIA (Get/Set Interface Media)");
1025 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1031 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1033 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1034 if (mask & IFCAP_HWCSUM) {
1035 ifp->if_capenable ^= IFCAP_HWCSUM;
1038 if (mask & IFCAP_TSO4) {
1039 ifp->if_capenable ^= IFCAP_TSO4;
1042 if (mask & IFCAP_VLAN_HWTAGGING) {
1043 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1046 if (mask & IFCAP_LRO) {
1047 ifp->if_capenable ^= IFCAP_LRO;
1050 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1052 VLAN_CAPABILITIES(ifp);
1058 ** IOCTL support for Precision Time (IEEE 1588) Support
1061 error = igb_hwtstamp_ioctl(adapter, ifp);
1066 error = ether_ioctl(ifp, command, data);
1073 /*********************************************************************
1076 * This routine is called from the local timer every second.
1077 * As long as transmit descriptors are being cleaned the value
1078 * is non-zero and we do nothing. Reaching 0 indicates a tx hang
1079 * and we then reset the device.
1081 **********************************************************************/
1084 igb_watchdog(struct adapter *adapter)
1086 struct tx_ring *txr = adapter->tx_rings;
1087 bool tx_hang = FALSE;
1089 IGB_CORE_LOCK_ASSERT(adapter);
1092 ** The timer is set to 5 every time start() queues a packet.
1093 ** Then txeof keeps resetting it as long as it cleans at
1094 ** least one descriptor.
1095 ** Finally, anytime all descriptors are clean the timer is
1098 ** With TX Multiqueue we need to check every queue's timer,
1099 ** if any time out we do the reset.
1101 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1103 if (txr->watchdog_timer == 0 ||
1104 (--txr->watchdog_timer)) {
1113 if (tx_hang == FALSE)
1116 /* If we are in this routine because of pause frames, then
1117 * don't reset the hardware.
1119 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1120 E1000_STATUS_TXOFF) {
1121 txr = adapter->tx_rings; /* reset pointer */
1122 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1124 txr->watchdog_timer = IGB_TX_TIMEOUT;
1130 if (e1000_check_for_link(&adapter->hw) == 0)
1131 device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1133 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1134 device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
1135 i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
1136 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
1137 device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
1138 " Next Desc to Clean = %d\n", i, txr->tx_avail,
1139 txr->next_to_clean);
1142 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1143 adapter->watchdog_events++;
1145 igb_init_locked(adapter);
1148 /*********************************************************************
1151 * This routine is used in two ways. It is used by the stack as
1152 * init entry point in network interface structure. It is also used
1153 * by the driver as a hw/sw initialization routine to get to a
1156 * return 0 on success, positive on failure
1157 **********************************************************************/
1160 igb_init_locked(struct adapter *adapter)
1162 struct rx_ring *rxr = adapter->rx_rings;
1163 struct tx_ring *txr = adapter->tx_rings;
1164 struct ifnet *ifp = adapter->ifp;
1165 device_t dev = adapter->dev;
1168 INIT_DEBUGOUT("igb_init: begin");
1170 IGB_CORE_LOCK_ASSERT(adapter);
1175 * Packet Buffer Allocation (PBA)
1176 * Writing PBA sets the receive portion of the buffer
1177 * the remainder is used for the transmit buffer.
1179 if (adapter->hw.mac.type == e1000_82575) {
1180 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1181 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1182 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1185 /* Get the latest mac address, User can use a LAA */
1186 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1189 /* Put the address into the Receive Address Array */
1190 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1192 /* Initialize the hardware */
1193 if (igb_hardware_init(adapter)) {
1194 device_printf(dev, "Unable to initialize the hardware\n");
1197 igb_update_link_status(adapter);
1199 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1201 /* Set hardware offload abilities */
1202 ifp->if_hwassist = 0;
1203 if (ifp->if_capenable & IFCAP_TXCSUM) {
1204 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1205 #if __FreeBSD_version >= 800000
1206 if (adapter->hw.mac.type == e1000_82576)
1207 ifp->if_hwassist |= CSUM_SCTP;
1211 if (ifp->if_capenable & IFCAP_TSO4)
1212 ifp->if_hwassist |= CSUM_TSO;
1214 /* Configure for OS presence */
1215 igb_init_manageability(adapter);
1217 /* Prepare transmit descriptors and buffers */
1218 igb_setup_transmit_structures(adapter);
1219 igb_initialize_transmit_units(adapter);
1221 /* Setup Multicast table */
1222 igb_set_multi(adapter);
1225 ** Figure out the desired mbuf pool
1226 ** for doing jumbo/packetsplit
1228 if (ifp->if_mtu > ETHERMTU)
1229 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1231 adapter->rx_mbuf_sz = MCLBYTES;
1233 /* Prepare receive descriptors and buffers */
1234 if (igb_setup_receive_structures(adapter)) {
1235 device_printf(dev, "Could not setup receive structures\n");
1239 igb_initialize_receive_units(adapter);
1241 /* Don't lose promiscuous settings */
1242 igb_set_promisc(adapter);
1244 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1245 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1247 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1248 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1250 if (adapter->msix > 1) /* Set up queue routing */
1251 igb_configure_queues(adapter);
1253 /* Set up VLAN tag offload and filter */
1254 igb_setup_vlan_hw_support(adapter);
1256 /* Set default RX interrupt moderation */
1257 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1258 E1000_WRITE_REG(&adapter->hw,
1259 E1000_EITR(rxr->msix), igb_ave_latency);
1260 rxr->eitr_setting = igb_ave_latency;
1263 /* Set TX interrupt rate & reset TX watchdog */
1264 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1265 E1000_WRITE_REG(&adapter->hw,
1266 E1000_EITR(txr->msix), igb_ave_latency);
1267 txr->watchdog_timer = FALSE;
1271 /* this clears any pending interrupts */
1272 E1000_READ_REG(&adapter->hw, E1000_ICR);
1273 igb_enable_intr(adapter);
1274 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1277 /* Don't reset the phy next time init gets called */
1278 adapter->hw.phy.reset_disable = TRUE;
1284 struct adapter *adapter = arg;
1286 IGB_CORE_LOCK(adapter);
1287 igb_init_locked(adapter);
1288 IGB_CORE_UNLOCK(adapter);
1293 igb_handle_rxtx(void *context, int pending)
1295 struct adapter *adapter = context;
1296 struct tx_ring *txr = adapter->tx_rings;
1297 struct rx_ring *rxr = adapter->rx_rings;
1302 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1303 if (igb_rxeof(rxr, adapter->rx_process_limit))
1304 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1308 #if __FreeBSD_version >= 800000
1309 if (!drbr_empty(ifp, txr->br))
1310 igb_mq_start_locked(ifp, txr, NULL);
1312 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1313 igb_start_locked(txr, ifp);
1318 igb_enable_intr(adapter);
1322 igb_handle_rx(void *context, int pending)
1324 struct rx_ring *rxr = context;
1325 struct adapter *adapter = rxr->adapter;
1326 struct ifnet *ifp = adapter->ifp;
1328 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1329 if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1330 /* More to clean, schedule another task */
1331 taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1336 igb_handle_tx(void *context, int pending)
1338 struct tx_ring *txr = context;
1339 struct adapter *adapter = txr->adapter;
1340 struct ifnet *ifp = adapter->ifp;
1342 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1345 #if __FreeBSD_version >= 800000
1346 if (!drbr_empty(ifp, txr->br))
1347 igb_mq_start_locked(ifp, txr, NULL);
1349 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1350 igb_start_locked(txr, ifp);
1357 /*********************************************************************
1359 * MSI/Legacy Deferred
1360 * Interrupt Service routine
1362 *********************************************************************/
1364 igb_irq_fast(void *arg)
1366 struct adapter *adapter = arg;
1370 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1373 if (reg_icr == 0xffffffff)
1374 return FILTER_STRAY;
1376 /* Definitely not our interrupt. */
1378 return FILTER_STRAY;
1380 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1381 return FILTER_STRAY;
1384 * Mask interrupts until the taskqueue is finished running. This is
1385 * cheap, just assume that it is needed. This also works around the
1386 * MSI message reordering errata on certain systems.
1388 igb_disable_intr(adapter);
1389 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1391 /* Link status change */
1392 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1393 adapter->hw.mac.get_link_status = 1;
1394 igb_update_link_status(adapter);
1397 if (reg_icr & E1000_ICR_RXO)
1398 adapter->rx_overruns++;
1399 return FILTER_HANDLED;
1403 /*********************************************************************
1405 * MSIX TX Interrupt Service routine
1407 **********************************************************************/
1409 igb_msix_tx(void *arg)
1411 struct tx_ring *txr = arg;
1412 struct adapter *adapter = txr->adapter;
1413 u32 loop = IGB_MAX_LOOP;
1420 more = igb_txeof(txr);
1421 } while (loop-- && more);
1425 /* Schedule a clean task */
1426 taskqueue_enqueue(adapter->tq, &txr->tx_task);
1428 /* Reenable this interrupt */
1429 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1433 /*********************************************************************
1435 * MSIX RX Interrupt Service routine
1437 **********************************************************************/
1440 igb_msix_rx(void *arg)
1442 struct rx_ring *rxr = arg;
1443 struct adapter *adapter = rxr->adapter;
1444 u32 loop = IGB_MAX_LOOP;
1449 more = igb_rxeof(rxr, adapter->rx_process_limit);
1450 } while (loop-- && more);
1452 /* Update interrupt rate */
1453 if (igb_enable_aim == TRUE)
1454 igb_update_aim(rxr);
1456 /* Schedule another clean */
1457 taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1459 /* Reenable this interrupt */
1460 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1465 /*********************************************************************
1467 * MSIX Link Interrupt Service routine
1469 **********************************************************************/
1472 igb_msix_link(void *arg)
1474 struct adapter *adapter = arg;
1477 ++adapter->link_irq;
1478 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1479 if (!(icr & E1000_ICR_LSC))
1481 adapter->hw.mac.get_link_status = 1;
1482 igb_update_link_status(adapter);
1486 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1487 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1493 ** Routine to adjust the RX EITR value based on traffic,
1494 ** its a simple three state model, but seems to help.
1496 ** Note that the three EITR values are tuneable using
1497 ** sysctl in real time. The feature can be effectively
1498 ** nullified by setting them equal.
1500 #define BULK_THRESHOLD 10000
1501 #define AVE_THRESHOLD 1600
1504 igb_update_aim(struct rx_ring *rxr)
1506 struct adapter *adapter = rxr->adapter;
1509 /* Update interrupt moderation based on traffic */
1510 olditr = rxr->eitr_setting;
1513 /* Idle, don't change setting */
1514 if (rxr->bytes == 0)
1517 if (olditr == igb_low_latency) {
1518 if (rxr->bytes > AVE_THRESHOLD)
1519 newitr = igb_ave_latency;
1520 } else if (olditr == igb_ave_latency) {
1521 if (rxr->bytes < AVE_THRESHOLD)
1522 newitr = igb_low_latency;
1523 else if (rxr->bytes > BULK_THRESHOLD)
1524 newitr = igb_bulk_latency;
1525 } else if (olditr == igb_bulk_latency) {
1526 if (rxr->bytes < BULK_THRESHOLD)
1527 newitr = igb_ave_latency;
1530 if (olditr != newitr) {
1531 /* Change interrupt rate */
1532 rxr->eitr_setting = newitr;
1533 E1000_WRITE_REG(&adapter->hw, E1000_EITR(rxr->me),
1534 newitr | (newitr << 16));
1542 /*********************************************************************
1544 * Media Ioctl callback
1546 * This routine is called whenever the user queries the status of
1547 * the interface using ifconfig.
1549 **********************************************************************/
1551 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1553 struct adapter *adapter = ifp->if_softc;
1554 u_char fiber_type = IFM_1000_SX;
1556 INIT_DEBUGOUT("igb_media_status: begin");
1558 IGB_CORE_LOCK(adapter);
1559 igb_update_link_status(adapter);
1561 ifmr->ifm_status = IFM_AVALID;
1562 ifmr->ifm_active = IFM_ETHER;
1564 if (!adapter->link_active) {
1565 IGB_CORE_UNLOCK(adapter);
1569 ifmr->ifm_status |= IFM_ACTIVE;
1571 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1572 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1573 ifmr->ifm_active |= fiber_type | IFM_FDX;
1575 switch (adapter->link_speed) {
1577 ifmr->ifm_active |= IFM_10_T;
1580 ifmr->ifm_active |= IFM_100_TX;
1583 ifmr->ifm_active |= IFM_1000_T;
1586 if (adapter->link_duplex == FULL_DUPLEX)
1587 ifmr->ifm_active |= IFM_FDX;
1589 ifmr->ifm_active |= IFM_HDX;
1591 IGB_CORE_UNLOCK(adapter);
1594 /*********************************************************************
1596 * Media Ioctl callback
1598 * This routine is called when the user changes speed/duplex using
1599 * media/mediopt option with ifconfig.
1601 **********************************************************************/
1603 igb_media_change(struct ifnet *ifp)
1605 struct adapter *adapter = ifp->if_softc;
1606 struct ifmedia *ifm = &adapter->media;
1608 INIT_DEBUGOUT("igb_media_change: begin");
1610 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1613 IGB_CORE_LOCK(adapter);
1614 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1616 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1617 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1622 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1623 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1626 adapter->hw.mac.autoneg = FALSE;
1627 adapter->hw.phy.autoneg_advertised = 0;
1628 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1629 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1631 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1634 adapter->hw.mac.autoneg = FALSE;
1635 adapter->hw.phy.autoneg_advertised = 0;
1636 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1637 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1639 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1642 device_printf(adapter->dev, "Unsupported media type\n");
1645 /* As the speed/duplex settings my have changed we need to
1648 adapter->hw.phy.reset_disable = FALSE;
1650 igb_init_locked(adapter);
1651 IGB_CORE_UNLOCK(adapter);
1657 /*********************************************************************
1659 * This routine maps the mbufs to Advanced TX descriptors.
1660 * used by the 82575 adapter.
1662 **********************************************************************/
1665 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1667 struct adapter *adapter = txr->adapter;
1668 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1670 struct igb_tx_buffer *tx_buffer, *tx_buffer_mapped;
1671 union e1000_adv_tx_desc *txd = NULL;
1672 struct mbuf *m_head;
1673 u32 olinfo_status = 0, cmd_type_len = 0;
1674 int nsegs, i, j, error, first, last = 0;
1680 /* Set basic descriptor constants */
1681 cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1682 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1683 if (m_head->m_flags & M_VLANTAG)
1684 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1687 * Force a cleanup if number of TX descriptors
1688 * available hits the threshold
1690 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1692 /* Now do we at least have a minimal? */
1693 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1694 txr->no_desc_avail++;
1700 * Map the packet for DMA.
1702 * Capture the first descriptor index,
1703 * this descriptor will have the index
1704 * of the EOP which is the only one that
1705 * now gets a DONE bit writeback.
1707 first = txr->next_avail_desc;
1708 tx_buffer = &txr->tx_buffers[first];
1709 tx_buffer_mapped = tx_buffer;
1710 map = tx_buffer->map;
1712 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1713 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1715 if (error == EFBIG) {
1718 m = m_defrag(*m_headp, M_DONTWAIT);
1720 adapter->mbuf_defrag_failed++;
1728 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1729 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1731 if (error == ENOMEM) {
1732 adapter->no_tx_dma_setup++;
1734 } else if (error != 0) {
1735 adapter->no_tx_dma_setup++;
1740 } else if (error == ENOMEM) {
1741 adapter->no_tx_dma_setup++;
1743 } else if (error != 0) {
1744 adapter->no_tx_dma_setup++;
1750 /* Check again to be sure we have enough descriptors */
1751 if (nsegs > (txr->tx_avail - 2)) {
1752 txr->no_desc_avail++;
1753 bus_dmamap_unload(txr->txtag, map);
1759 * Set up the context descriptor:
1760 * used when any hardware offload is done.
1761 * This includes CSUM, VLAN, and TSO. It
1762 * will use the first descriptor.
1764 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1765 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1766 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1767 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1768 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1771 } else if (igb_tx_ctx_setup(txr, m_head))
1772 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1775 /* This is changing soon to an mtag detection */
1776 if (we detect this mbuf has a TSTAMP mtag)
1777 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
1779 /* Calculate payload length */
1780 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1781 << E1000_ADVTXD_PAYLEN_SHIFT);
1783 /* Set up our transmit descriptors */
1784 i = txr->next_avail_desc;
1785 for (j = 0; j < nsegs; j++) {
1787 bus_addr_t seg_addr;
1789 tx_buffer = &txr->tx_buffers[i];
1790 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1791 seg_addr = segs[j].ds_addr;
1792 seg_len = segs[j].ds_len;
1794 txd->read.buffer_addr = htole64(seg_addr);
1795 txd->read.cmd_type_len = htole32(
1796 adapter->txd_cmd | cmd_type_len | seg_len);
1797 txd->read.olinfo_status = htole32(olinfo_status);
1799 if (++i == adapter->num_tx_desc)
1801 tx_buffer->m_head = NULL;
1802 tx_buffer->next_eop = -1;
1805 txr->next_avail_desc = i;
1806 txr->tx_avail -= nsegs;
1808 tx_buffer->m_head = m_head;
1809 tx_buffer_mapped->map = tx_buffer->map;
1810 tx_buffer->map = map;
1811 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1814 * Last Descriptor of Packet
1815 * needs End Of Packet (EOP)
1816 * and Report Status (RS)
1818 txd->read.cmd_type_len |=
1819 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1821 * Keep track in the first buffer which
1822 * descriptor will be written back
1824 tx_buffer = &txr->tx_buffers[first];
1825 tx_buffer->next_eop = last;
1828 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1829 * that this frame is available to transmit.
1831 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1832 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1833 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1841 igb_set_promisc(struct adapter *adapter)
1843 struct ifnet *ifp = adapter->ifp;
1846 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1848 if (ifp->if_flags & IFF_PROMISC) {
1849 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1850 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1851 } else if (ifp->if_flags & IFF_ALLMULTI) {
1852 reg_rctl |= E1000_RCTL_MPE;
1853 reg_rctl &= ~E1000_RCTL_UPE;
1854 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1859 igb_disable_promisc(struct adapter *adapter)
1863 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1865 reg_rctl &= (~E1000_RCTL_UPE);
1866 reg_rctl &= (~E1000_RCTL_MPE);
1867 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1871 /*********************************************************************
1874 * This routine is called whenever multicast address list is updated.
1876 **********************************************************************/
1879 igb_set_multi(struct adapter *adapter)
1881 struct ifnet *ifp = adapter->ifp;
1882 struct ifmultiaddr *ifma;
1884 u8 mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1888 IOCTL_DEBUGOUT("igb_set_multi: begin");
1890 if_maddr_rlock(ifp);
1891 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1892 if (ifma->ifma_addr->sa_family != AF_LINK)
1895 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1898 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1899 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1902 if_maddr_runlock(ifp);
1904 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1905 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1906 reg_rctl |= E1000_RCTL_MPE;
1907 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1909 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1913 /*********************************************************************
1916 * This routine checks for link status and updates statistics.
1918 **********************************************************************/
1921 igb_local_timer(void *arg)
1923 struct adapter *adapter = arg;
1924 struct ifnet *ifp = adapter->ifp;
1926 IGB_CORE_LOCK_ASSERT(adapter);
1928 igb_update_link_status(adapter);
1929 igb_update_stats_counters(adapter);
1931 if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1932 igb_print_hw_stats(adapter);
1935 * Each second we check the watchdog to
1936 * protect against hardware hangs.
1938 igb_watchdog(adapter);
1940 /* Trigger an RX interrupt on all queues */
1941 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1943 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1948 igb_update_link_status(struct adapter *adapter)
1950 struct e1000_hw *hw = &adapter->hw;
1951 struct ifnet *ifp = adapter->ifp;
1952 device_t dev = adapter->dev;
1953 struct tx_ring *txr = adapter->tx_rings;
1956 /* Get the cached link value or read for real */
1957 switch (hw->phy.media_type) {
1958 case e1000_media_type_copper:
1959 if (hw->mac.get_link_status) {
1960 /* Do the work to read phy */
1961 e1000_check_for_link(hw);
1962 link_check = !hw->mac.get_link_status;
1966 case e1000_media_type_fiber:
1967 e1000_check_for_link(hw);
1968 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1971 case e1000_media_type_internal_serdes:
1972 e1000_check_for_link(hw);
1973 link_check = adapter->hw.mac.serdes_has_link;
1976 case e1000_media_type_unknown:
1980 /* Now we check if a transition has happened */
1981 if (link_check && (adapter->link_active == 0)) {
1982 e1000_get_speed_and_duplex(&adapter->hw,
1983 &adapter->link_speed, &adapter->link_duplex);
1985 device_printf(dev, "Link is up %d Mbps %s\n",
1986 adapter->link_speed,
1987 ((adapter->link_duplex == FULL_DUPLEX) ?
1988 "Full Duplex" : "Half Duplex"));
1989 adapter->link_active = 1;
1990 ifp->if_baudrate = adapter->link_speed * 1000000;
1991 if_link_state_change(ifp, LINK_STATE_UP);
1992 } else if (!link_check && (adapter->link_active == 1)) {
1993 ifp->if_baudrate = adapter->link_speed = 0;
1994 adapter->link_duplex = 0;
1996 device_printf(dev, "Link is Down\n");
1997 adapter->link_active = 0;
1998 if_link_state_change(ifp, LINK_STATE_DOWN);
1999 /* Turn off watchdogs */
2000 for (int i = 0; i < adapter->num_queues; i++, txr++)
2001 txr->watchdog_timer = FALSE;
2005 /*********************************************************************
2007 * This routine disables all traffic on the adapter by issuing a
2008 * global reset on the MAC and deallocates TX/RX buffers.
2010 **********************************************************************/
2015 struct adapter *adapter = arg;
2016 struct ifnet *ifp = adapter->ifp;
2018 IGB_CORE_LOCK_ASSERT(adapter);
2020 INIT_DEBUGOUT("igb_stop: begin");
2022 igb_disable_intr(adapter);
2024 callout_stop(&adapter->timer);
2026 /* Tell the stack that the interface is no longer active */
2027 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2029 e1000_reset_hw(&adapter->hw);
2030 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2034 /*********************************************************************
2036 * Determine hardware revision.
2038 **********************************************************************/
2040 igb_identify_hardware(struct adapter *adapter)
2042 device_t dev = adapter->dev;
2044 /* Make sure our PCI config space has the necessary stuff set */
2045 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2046 if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2047 (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2048 device_printf(dev, "Memory Access and/or Bus Master bits "
2050 adapter->hw.bus.pci_cmd_word |=
2051 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2052 pci_write_config(dev, PCIR_COMMAND,
2053 adapter->hw.bus.pci_cmd_word, 2);
2056 /* Save off the information about this board */
2057 adapter->hw.vendor_id = pci_get_vendor(dev);
2058 adapter->hw.device_id = pci_get_device(dev);
2059 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2060 adapter->hw.subsystem_vendor_id =
2061 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2062 adapter->hw.subsystem_device_id =
2063 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2065 /* Do Shared Code Init and Setup */
2066 if (e1000_set_mac_type(&adapter->hw)) {
2067 device_printf(dev, "Setup init failure\n");
2073 igb_allocate_pci_resources(struct adapter *adapter)
2075 device_t dev = adapter->dev;
2079 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2081 if (adapter->pci_mem == NULL) {
2082 device_printf(dev, "Unable to allocate bus resource: memory\n");
2085 adapter->osdep.mem_bus_space_tag =
2086 rman_get_bustag(adapter->pci_mem);
2087 adapter->osdep.mem_bus_space_handle =
2088 rman_get_bushandle(adapter->pci_mem);
2089 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2091 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2093 /* This will setup either MSI/X or MSI */
2094 adapter->msix = igb_setup_msix(adapter);
2095 adapter->hw.back = &adapter->osdep;
2100 /*********************************************************************
2102 * Setup the Legacy or MSI Interrupt handler
2104 **********************************************************************/
2106 igb_allocate_legacy(struct adapter *adapter)
2108 device_t dev = adapter->dev;
2111 /* Turn off all interrupts */
2112 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2115 if (adapter->msix == 1)
2118 /* We allocate a single interrupt resource */
2119 adapter->res = bus_alloc_resource_any(dev,
2120 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2121 if (adapter->res == NULL) {
2122 device_printf(dev, "Unable to allocate bus resource: "
2128 * Try allocating a fast interrupt and the associated deferred
2129 * processing contexts.
2131 TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2132 adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2133 taskqueue_thread_enqueue, &adapter->tq);
2134 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2135 device_get_nameunit(adapter->dev));
2136 if ((error = bus_setup_intr(dev, adapter->res,
2137 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2138 adapter, &adapter->tag)) != 0) {
2139 device_printf(dev, "Failed to register fast interrupt "
2140 "handler: %d\n", error);
2141 taskqueue_free(adapter->tq);
2150 /*********************************************************************
2152 * Setup the MSIX Interrupt handlers:
2154 **********************************************************************/
2156 igb_allocate_msix(struct adapter *adapter)
2158 device_t dev = adapter->dev;
2159 struct tx_ring *txr = adapter->tx_rings;
2160 struct rx_ring *rxr = adapter->rx_rings;
2161 int error, rid, vector = 0;
2164 * Setup the interrupt handlers
2168 for (int i = 0; i < adapter->num_queues; i++, vector++, txr++) {
2170 txr->res = bus_alloc_resource_any(dev,
2171 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2172 if (txr->res == NULL) {
2174 "Unable to allocate bus resource: "
2175 "MSIX TX Interrupt\n");
2178 error = bus_setup_intr(dev, txr->res,
2179 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2180 igb_msix_tx, txr, &txr->tag);
2183 device_printf(dev, "Failed to register TX handler");
2186 /* Make tasklet for deferred handling - one per queue */
2187 TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2189 if (adapter->hw.mac.type == e1000_82575)
2190 txr->eims = E1000_EICR_TX_QUEUE0 << i;
2192 txr->eims = 1 << vector;
2194 ** Bind the msix vector, and thus the
2195 ** ring to the corresponding cpu.
2197 if (adapter->num_queues > 1)
2198 bus_bind_intr(dev, txr->res, i);
2202 for (int i = 0; i < adapter->num_queues; i++, vector++, rxr++) {
2204 rxr->res = bus_alloc_resource_any(dev,
2205 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2206 if (rxr->res == NULL) {
2208 "Unable to allocate bus resource: "
2209 "MSIX RX Interrupt\n");
2212 error = bus_setup_intr(dev, rxr->res,
2213 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2214 igb_msix_rx, rxr, &rxr->tag);
2217 device_printf(dev, "Failed to register RX handler");
2220 /* Make tasklet for deferred handling - one per queue */
2221 TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2223 if (adapter->hw.mac.type == e1000_82575)
2224 rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2226 rxr->eims = 1 << vector;
2227 /* Get a mask for local timer */
2228 adapter->rx_mask |= rxr->eims;
2230 ** Bind the msix vector, and thus the
2231 ** ring to the corresponding cpu.
2232 ** Notice that this makes an RX/TX pair
2233 ** bound to each CPU, limited by the MSIX
2236 if (adapter->num_queues > 1)
2237 bus_bind_intr(dev, rxr->res, i);
2242 adapter->res = bus_alloc_resource_any(dev,
2243 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2244 if (adapter->res == NULL) {
2246 "Unable to allocate bus resource: "
2247 "MSIX Link Interrupt\n");
2250 if ((error = bus_setup_intr(dev, adapter->res,
2251 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2252 igb_msix_link, adapter, &adapter->tag)) != 0) {
2253 device_printf(dev, "Failed to register Link handler");
2256 adapter->linkvec = vector;
2257 adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2258 taskqueue_thread_enqueue, &adapter->tq);
2259 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2260 device_get_nameunit(adapter->dev));
2267 igb_configure_queues(struct adapter *adapter)
2269 struct e1000_hw *hw = &adapter->hw;
2270 struct tx_ring *txr;
2271 struct rx_ring *rxr;
2275 ** 82576 uses IVARs to route MSI/X
2276 ** interrupts, its not very intuitive,
2277 ** study the code carefully :)
2279 if (adapter->hw.mac.type == e1000_82576) {
2281 /* First turn on the capability */
2282 E1000_WRITE_REG(hw, E1000_GPIE,
2283 E1000_GPIE_MSIX_MODE |
2285 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2287 for (int i = 0; i < adapter->num_queues; i++) {
2288 u32 index = i & 0x7; /* Each IVAR has two entries */
2289 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2290 rxr = &adapter->rx_rings[i];
2293 ivar |= rxr->msix | E1000_IVAR_VALID;
2296 ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2298 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2299 adapter->eims_mask |= rxr->eims;
2302 for (int i = 0; i < adapter->num_queues; i++) {
2303 u32 index = i & 0x7; /* Each IVAR has two entries */
2304 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2305 txr = &adapter->tx_rings[i];
2308 ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2311 ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2313 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2314 adapter->eims_mask |= txr->eims;
2317 /* And for the link interrupt */
2318 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2319 adapter->link_mask = 1 << adapter->linkvec;
2320 adapter->eims_mask |= adapter->link_mask;
2321 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2326 /* enable MSI-X PBA support*/
2327 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2328 tmp |= E1000_CTRL_EXT_PBA_CLR;
2329 /* Auto-Mask interrupts upon ICR read. */
2330 tmp |= E1000_CTRL_EXT_EIAME;
2331 tmp |= E1000_CTRL_EXT_IRCA;
2332 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2335 for (int i = 0; i < adapter->num_queues; i++) {
2336 txr = &adapter->tx_rings[i];
2337 E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2339 adapter->eims_mask |= txr->eims;
2343 for (int i = 0; i < adapter->num_queues; i++) {
2344 rxr = &adapter->rx_rings[i];
2345 E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2347 adapter->eims_mask |= rxr->eims;
2351 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2353 adapter->link_mask |= E1000_EIMS_OTHER;
2354 adapter->eims_mask |= adapter->link_mask;
2361 igb_free_pci_resources(struct adapter *adapter)
2363 struct tx_ring *txr = adapter->tx_rings;
2364 struct rx_ring *rxr = adapter->rx_rings;
2365 device_t dev = adapter->dev;
2369 ** There is a slight possibility of a failure mode
2370 ** in attach that will result in entering this function
2371 ** before interrupt resources have been initialized, and
2372 ** in that case we do not want to execute the loops below
2373 ** We can detect this reliably by the state of the adapter
2376 if (adapter->res == NULL)
2380 * First release all the TX/RX interrupt resources:
2382 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2383 rid = txr->msix + 1;
2384 if (txr->tag != NULL) {
2385 bus_teardown_intr(dev, txr->res, txr->tag);
2388 if (txr->res != NULL)
2389 bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res);
2392 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
2393 rid = rxr->msix + 1;
2394 if (rxr->tag != NULL) {
2395 bus_teardown_intr(dev, rxr->res, rxr->tag);
2398 if (rxr->res != NULL)
2399 bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res);
2402 /* Clean the Legacy or Link interrupt last */
2403 if (adapter->linkvec) /* we are doing MSIX */
2404 rid = adapter->linkvec + 1;
2406 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2408 if (adapter->tag != NULL) {
2409 bus_teardown_intr(dev, adapter->res, adapter->tag);
2410 adapter->tag = NULL;
2412 if (adapter->res != NULL)
2413 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2417 pci_release_msi(dev);
2419 if (adapter->msix_mem != NULL)
2420 bus_release_resource(dev, SYS_RES_MEMORY,
2421 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2423 if (adapter->pci_mem != NULL)
2424 bus_release_resource(dev, SYS_RES_MEMORY,
2425 PCIR_BAR(0), adapter->pci_mem);
2430 * Setup Either MSI/X or MSI
2433 igb_setup_msix(struct adapter *adapter)
2435 device_t dev = adapter->dev;
2436 int rid, want, queues, msgs;
2438 /* First try MSI/X */
2439 rid = PCIR_BAR(IGB_MSIX_BAR);
2440 adapter->msix_mem = bus_alloc_resource_any(dev,
2441 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2442 if (!adapter->msix_mem) {
2443 /* May not be enabled */
2444 device_printf(adapter->dev,
2445 "Unable to map MSIX table \n");
2449 msgs = pci_msix_count(dev);
2450 if (msgs == 0) { /* system has msix disabled */
2451 bus_release_resource(dev, SYS_RES_MEMORY,
2452 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2453 adapter->msix_mem = NULL;
2457 /* Figure out a reasonable auto config value */
2458 queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2460 if (igb_num_queues == 0)
2461 igb_num_queues = queues;
2463 ** Two vectors (RX/TX pair) per queue
2464 ** plus an additional for Link interrupt
2466 want = (igb_num_queues * 2) + 1;
2470 device_printf(adapter->dev,
2471 "MSIX Configuration Problem, "
2472 "%d vectors configured, but %d queues wanted!\n",
2476 if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2477 device_printf(adapter->dev,
2478 "Using MSIX interrupts with %d vectors\n", msgs);
2479 adapter->num_queues = igb_num_queues;
2483 msgs = pci_msi_count(dev);
2484 if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2485 device_printf(adapter->dev,"Using MSI interrupt\n");
2489 /*********************************************************************
2491 * Initialize the hardware to a configuration
2492 * as specified by the adapter structure.
2494 **********************************************************************/
2496 igb_hardware_init(struct adapter *adapter)
2498 device_t dev = adapter->dev;
2501 INIT_DEBUGOUT("igb_hardware_init: begin");
2503 /* Issue a global reset */
2504 e1000_reset_hw(&adapter->hw);
2506 /* Let the firmware know the OS is in control */
2507 igb_get_hw_control(adapter);
2510 * These parameters control the automatic generation (Tx) and
2511 * response (Rx) to Ethernet PAUSE frames.
2512 * - High water mark should allow for at least two frames to be
2513 * received after sending an XOFF.
2514 * - Low water mark works best when it is very near the high water mark.
2515 * This allows the receiver to restart by sending XON when it has
2516 * drained a bit. Here we use an arbitary value of 1500 which will
2517 * restart after one full frame is pulled from the buffer. There
2518 * could be several smaller frames in the buffer and if so they will
2519 * not trigger the XON until their total number reduces the buffer
2521 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2523 if (adapter->hw.mac.type == e1000_82576)
2524 rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2525 E1000_RXPBS) & 0xffff) << 10 );
2527 rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2528 E1000_PBA) & 0xffff) << 10 );
2530 adapter->hw.fc.high_water = rx_buffer_size -
2531 roundup2(adapter->max_frame_size, 1024);
2532 adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2534 adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2535 adapter->hw.fc.send_xon = TRUE;
2537 /* Set Flow control, use the tunable location if sane */
2538 if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2539 adapter->hw.fc.requested_mode = igb_fc_setting;
2541 adapter->hw.fc.requested_mode = e1000_fc_none;
2543 if (e1000_init_hw(&adapter->hw) < 0) {
2544 device_printf(dev, "Hardware Initialization Failed\n");
2548 e1000_check_for_link(&adapter->hw);
2553 /*********************************************************************
2555 * Setup networking device structure and register an interface.
2557 **********************************************************************/
2559 igb_setup_interface(device_t dev, struct adapter *adapter)
2563 INIT_DEBUGOUT("igb_setup_interface: begin");
2565 ifp = adapter->ifp = if_alloc(IFT_ETHER);
2567 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2568 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2569 ifp->if_mtu = ETHERMTU;
2570 ifp->if_init = igb_init;
2571 ifp->if_softc = adapter;
2572 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2573 ifp->if_ioctl = igb_ioctl;
2574 ifp->if_start = igb_start;
2575 #if __FreeBSD_version >= 800000
2576 ifp->if_transmit = igb_mq_start;
2577 ifp->if_qflush = igb_qflush;
2579 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2580 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2581 IFQ_SET_READY(&ifp->if_snd);
2583 ether_ifattach(ifp, adapter->hw.mac.addr);
2585 ifp->if_capabilities = ifp->if_capenable = 0;
2587 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2588 ifp->if_capabilities |= IFCAP_TSO4;
2589 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2590 ifp->if_capenable = ifp->if_capabilities;
2593 * Tell the upper layer(s) we support long frames.
2595 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2596 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2597 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2600 * Specify the media types supported by this adapter and register
2601 * callbacks to update media and link information
2603 ifmedia_init(&adapter->media, IFM_IMASK,
2604 igb_media_change, igb_media_status);
2605 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2606 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2607 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2609 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2611 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2612 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2614 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2616 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2618 if (adapter->hw.phy.type != e1000_phy_ife) {
2619 ifmedia_add(&adapter->media,
2620 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2621 ifmedia_add(&adapter->media,
2622 IFM_ETHER | IFM_1000_T, 0, NULL);
2625 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2626 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2631 * Manage DMA'able memory.
2634 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2638 *(bus_addr_t *) arg = segs[0].ds_addr;
2642 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2643 struct igb_dma_alloc *dma, int mapflags)
2647 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2648 IGB_DBA_ALIGN, 0, /* alignment, bounds */
2649 BUS_SPACE_MAXADDR, /* lowaddr */
2650 BUS_SPACE_MAXADDR, /* highaddr */
2651 NULL, NULL, /* filter, filterarg */
2654 size, /* maxsegsize */
2656 NULL, /* lockfunc */
2660 device_printf(adapter->dev,
2661 "%s: bus_dma_tag_create failed: %d\n",
2666 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2667 BUS_DMA_NOWAIT, &dma->dma_map);
2669 device_printf(adapter->dev,
2670 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2671 __func__, (uintmax_t)size, error);
2676 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2677 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2678 if (error || dma->dma_paddr == 0) {
2679 device_printf(adapter->dev,
2680 "%s: bus_dmamap_load failed: %d\n",
2688 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2690 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2691 bus_dma_tag_destroy(dma->dma_tag);
2693 dma->dma_map = NULL;
2694 dma->dma_tag = NULL;
2700 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2702 if (dma->dma_tag == NULL)
2704 if (dma->dma_map != NULL) {
2705 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2706 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2707 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2708 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2709 dma->dma_map = NULL;
2711 bus_dma_tag_destroy(dma->dma_tag);
2712 dma->dma_tag = NULL;
2716 /*********************************************************************
2718 * Allocate memory for the transmit and receive rings, and then
2719 * the descriptors associated with each, called only once at attach.
2721 **********************************************************************/
2723 igb_allocate_queues(struct adapter *adapter)
2725 device_t dev = adapter->dev;
2726 struct tx_ring *txr;
2727 struct rx_ring *rxr;
2728 int rsize, tsize, error = E1000_SUCCESS;
2729 int txconf = 0, rxconf = 0;
2731 /* First allocate the TX ring struct memory */
2732 if (!(adapter->tx_rings =
2733 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2734 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2735 device_printf(dev, "Unable to allocate TX ring memory\n");
2739 txr = adapter->tx_rings;
2741 /* Next allocate the RX */
2742 if (!(adapter->rx_rings =
2743 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2744 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2745 device_printf(dev, "Unable to allocate RX ring memory\n");
2749 rxr = adapter->rx_rings;
2751 tsize = roundup2(adapter->num_tx_desc *
2752 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2754 * Now set up the TX queues, txconf is needed to handle the
2755 * possibility that things fail midcourse and we need to
2756 * undo memory gracefully
2758 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2759 /* Set up some basics */
2760 txr = &adapter->tx_rings[i];
2761 txr->adapter = adapter;
2764 /* Initialize the TX lock */
2765 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2766 device_get_nameunit(dev), txr->me);
2767 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2769 if (igb_dma_malloc(adapter, tsize,
2770 &txr->txdma, BUS_DMA_NOWAIT)) {
2772 "Unable to allocate TX Descriptor memory\n");
2776 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2777 bzero((void *)txr->tx_base, tsize);
2779 /* Now allocate transmit buffers for the ring */
2780 if (igb_allocate_transmit_buffers(txr)) {
2782 "Critical Failure setting up transmit buffers\n");
2786 #if __FreeBSD_version >= 800000
2787 /* Allocate a buf ring */
2788 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2789 M_WAITOK, &txr->tx_mtx);
2794 * Next the RX queues...
2796 rsize = roundup2(adapter->num_rx_desc *
2797 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2798 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2799 rxr = &adapter->rx_rings[i];
2800 rxr->adapter = adapter;
2803 /* Initialize the RX lock */
2804 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2805 device_get_nameunit(dev), txr->me);
2806 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2808 if (igb_dma_malloc(adapter, rsize,
2809 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2811 "Unable to allocate RxDescriptor memory\n");
2815 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2816 bzero((void *)rxr->rx_base, rsize);
2818 /* Allocate receive buffers for the ring*/
2819 if (igb_allocate_receive_buffers(rxr)) {
2821 "Critical Failure setting up receive buffers\n");
2830 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2831 igb_dma_free(adapter, &rxr->rxdma);
2833 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2834 igb_dma_free(adapter, &txr->txdma);
2835 free(adapter->rx_rings, M_DEVBUF);
2837 free(adapter->tx_rings, M_DEVBUF);
2842 /*********************************************************************
2844 * Allocate memory for tx_buffer structures. The tx_buffer stores all
2845 * the information needed to transmit a packet on the wire. This is
2846 * called only once at attach, setup is done every reset.
2848 **********************************************************************/
2850 igb_allocate_transmit_buffers(struct tx_ring *txr)
2852 struct adapter *adapter = txr->adapter;
2853 device_t dev = adapter->dev;
2854 struct igb_tx_buffer *txbuf;
2858 * Setup DMA descriptor areas.
2860 if ((error = bus_dma_tag_create(NULL, /* parent */
2861 PAGE_SIZE, 0, /* alignment, bounds */
2862 BUS_SPACE_MAXADDR, /* lowaddr */
2863 BUS_SPACE_MAXADDR, /* highaddr */
2864 NULL, NULL, /* filter, filterarg */
2865 IGB_TSO_SIZE, /* maxsize */
2866 IGB_MAX_SCATTER, /* nsegments */
2867 PAGE_SIZE, /* maxsegsize */
2869 NULL, /* lockfunc */
2870 NULL, /* lockfuncarg */
2872 device_printf(dev,"Unable to allocate TX DMA tag\n");
2876 if (!(txr->tx_buffers =
2877 (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
2878 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2879 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2884 /* Create the descriptor buffer dma maps */
2885 txbuf = txr->tx_buffers;
2886 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2887 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2889 device_printf(dev, "Unable to create TX DMA map\n");
2896 /* We free all, it handles case where we are in the middle */
2897 igb_free_transmit_structures(adapter);
2901 /*********************************************************************
2903 * Initialize a transmit ring.
2905 **********************************************************************/
2907 igb_setup_transmit_ring(struct tx_ring *txr)
2909 struct adapter *adapter = txr->adapter;
2910 struct igb_tx_buffer *txbuf;
2913 /* Clear the old descriptor contents */
2914 bzero((void *)txr->tx_base,
2915 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2917 txr->next_avail_desc = 0;
2918 txr->next_to_clean = 0;
2920 /* Free any existing tx buffers. */
2921 txbuf = txr->tx_buffers;
2922 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2923 if (txbuf->m_head != NULL) {
2924 bus_dmamap_sync(txr->txtag, txbuf->map,
2925 BUS_DMASYNC_POSTWRITE);
2926 bus_dmamap_unload(txr->txtag, txbuf->map);
2927 m_freem(txbuf->m_head);
2928 txbuf->m_head = NULL;
2930 /* clear the watch index */
2931 txbuf->next_eop = -1;
2934 /* Set number of descriptors available */
2935 txr->tx_avail = adapter->num_tx_desc;
2937 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2938 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2942 /*********************************************************************
2944 * Initialize all transmit rings.
2946 **********************************************************************/
2948 igb_setup_transmit_structures(struct adapter *adapter)
2950 struct tx_ring *txr = adapter->tx_rings;
2952 for (int i = 0; i < adapter->num_queues; i++, txr++)
2953 igb_setup_transmit_ring(txr);
2958 /*********************************************************************
2960 * Enable transmit unit.
2962 **********************************************************************/
2964 igb_initialize_transmit_units(struct adapter *adapter)
2966 struct tx_ring *txr = adapter->tx_rings;
2969 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2971 /* Setup the Base and Length of the Tx Descriptor Rings */
2972 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2973 u64 bus_addr = txr->txdma.dma_paddr;
2975 E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2976 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2977 E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2978 (uint32_t)(bus_addr >> 32));
2979 E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2980 (uint32_t)bus_addr);
2982 /* Setup the HW Tx Head and Tail descriptor pointers */
2983 E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2984 E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2986 HW_DEBUGOUT2("Base = %x, Length = %x\n",
2987 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2988 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2990 /* Setup Transmit Descriptor Base Settings */
2991 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2993 txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
2994 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2995 E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
2998 /* Program the Transmit Control Register */
2999 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3000 tctl &= ~E1000_TCTL_CT;
3001 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3002 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3004 e1000_config_collision_dist(&adapter->hw);
3006 /* This write will effectively turn on the transmit unit. */
3007 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3011 /*********************************************************************
3013 * Free all transmit rings.
3015 **********************************************************************/
3017 igb_free_transmit_structures(struct adapter *adapter)
3019 struct tx_ring *txr = adapter->tx_rings;
3021 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3023 igb_free_transmit_buffers(txr);
3024 igb_dma_free(adapter, &txr->txdma);
3026 IGB_TX_LOCK_DESTROY(txr);
3028 free(adapter->tx_rings, M_DEVBUF);
3031 /*********************************************************************
3033 * Free transmit ring related data structures.
3035 **********************************************************************/
3037 igb_free_transmit_buffers(struct tx_ring *txr)
3039 struct adapter *adapter = txr->adapter;
3040 struct igb_tx_buffer *tx_buffer;
3043 INIT_DEBUGOUT("free_transmit_ring: begin");
3045 if (txr->tx_buffers == NULL)
3048 tx_buffer = txr->tx_buffers;
3049 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3050 if (tx_buffer->m_head != NULL) {
3051 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3052 BUS_DMASYNC_POSTWRITE);
3053 bus_dmamap_unload(txr->txtag,
3055 m_freem(tx_buffer->m_head);
3056 tx_buffer->m_head = NULL;
3057 if (tx_buffer->map != NULL) {
3058 bus_dmamap_destroy(txr->txtag,
3060 tx_buffer->map = NULL;
3062 } else if (tx_buffer->map != NULL) {
3063 bus_dmamap_unload(txr->txtag,
3065 bus_dmamap_destroy(txr->txtag,
3067 tx_buffer->map = NULL;
3070 #if __FreeBSD_version >= 800000
3071 if (txr->br != NULL)
3072 buf_ring_free(txr->br, M_DEVBUF);
3074 if (txr->tx_buffers != NULL) {
3075 free(txr->tx_buffers, M_DEVBUF);
3076 txr->tx_buffers = NULL;
3078 if (txr->txtag != NULL) {
3079 bus_dma_tag_destroy(txr->txtag);
3085 /**********************************************************************
3087 * Setup work for hardware segmentation offload (TSO) on
3088 * adapters using advanced tx descriptors (82575)
3090 **********************************************************************/
3092 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3094 struct adapter *adapter = txr->adapter;
3095 struct e1000_adv_tx_context_desc *TXD;
3096 struct igb_tx_buffer *tx_buffer;
3097 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3098 u32 mss_l4len_idx = 0;
3100 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3101 struct ether_vlan_header *eh;
3107 * Determine where frame payload starts.
3108 * Jump over vlan headers if already present
3110 eh = mtod(mp, struct ether_vlan_header *);
3111 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3112 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3114 ehdrlen = ETHER_HDR_LEN;
3116 /* Ensure we have at least the IP+TCP header in the first mbuf. */
3117 if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3120 /* Only supports IPV4 for now */
3121 ctxd = txr->next_avail_desc;
3122 tx_buffer = &txr->tx_buffers[ctxd];
3123 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3125 ip = (struct ip *)(mp->m_data + ehdrlen);
3126 if (ip->ip_p != IPPROTO_TCP)
3127 return FALSE; /* 0 */
3129 ip_hlen = ip->ip_hl << 2;
3130 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3131 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3132 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3133 tcp_hlen = th->th_off << 2;
3135 * Calculate header length, this is used
3136 * in the transmit desc in igb_xmit
3138 *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3140 /* VLAN MACLEN IPLEN */
3141 if (mp->m_flags & M_VLANTAG) {
3142 vtag = htole16(mp->m_pkthdr.ether_vtag);
3143 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3146 vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3147 vlan_macip_lens |= ip_hlen;
3148 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3150 /* ADV DTYPE TUCMD */
3151 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3152 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3153 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3154 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3157 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3158 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3159 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3161 TXD->seqnum_seed = htole32(0);
3162 tx_buffer->m_head = NULL;
3163 tx_buffer->next_eop = -1;
3165 if (++ctxd == adapter->num_tx_desc)
3169 txr->next_avail_desc = ctxd;
3174 /*********************************************************************
3176 * Context Descriptor setup for VLAN or CSUM
3178 **********************************************************************/
3181 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3183 struct adapter *adapter = txr->adapter;
3184 struct e1000_adv_tx_context_desc *TXD;
3185 struct igb_tx_buffer *tx_buffer;
3186 uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3187 struct ether_vlan_header *eh;
3188 struct ip *ip = NULL;
3189 struct ip6_hdr *ip6;
3190 int ehdrlen, ctxd, ip_hlen = 0;
3191 u16 etype, vtag = 0;
3193 bool offload = TRUE;
3195 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3198 ctxd = txr->next_avail_desc;
3199 tx_buffer = &txr->tx_buffers[ctxd];
3200 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3203 ** In advanced descriptors the vlan tag must
3204 ** be placed into the context descriptor, thus
3205 ** we need to be here just for that setup.
3207 if (mp->m_flags & M_VLANTAG) {
3208 vtag = htole16(mp->m_pkthdr.ether_vtag);
3209 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3210 } else if (offload == FALSE)
3214 * Determine where frame payload starts.
3215 * Jump over vlan headers if already present,
3216 * helpful for QinQ too.
3218 eh = mtod(mp, struct ether_vlan_header *);
3219 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3220 etype = ntohs(eh->evl_proto);
3221 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3223 etype = ntohs(eh->evl_encap_proto);
3224 ehdrlen = ETHER_HDR_LEN;
3227 /* Set the ether header length */
3228 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3232 ip = (struct ip *)(mp->m_data + ehdrlen);
3233 ip_hlen = ip->ip_hl << 2;
3234 if (mp->m_len < ehdrlen + ip_hlen) {
3239 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3241 case ETHERTYPE_IPV6:
3242 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3243 ip_hlen = sizeof(struct ip6_hdr);
3244 if (mp->m_len < ehdrlen + ip_hlen)
3246 ipproto = ip6->ip6_nxt;
3247 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3254 vlan_macip_lens |= ip_hlen;
3255 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3259 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3260 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3263 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3264 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3266 #if __FreeBSD_version >= 800000
3268 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3269 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3277 /* Now copy bits into descriptor */
3278 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3279 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3280 TXD->seqnum_seed = htole32(0);
3281 TXD->mss_l4len_idx = htole32(0);
3283 tx_buffer->m_head = NULL;
3284 tx_buffer->next_eop = -1;
3286 /* We've consumed the first desc, adjust counters */
3287 if (++ctxd == adapter->num_tx_desc)
3289 txr->next_avail_desc = ctxd;
3296 /**********************************************************************
3298 * Examine each tx_buffer in the used queue. If the hardware is done
3299 * processing the packet then free associated resources. The
3300 * tx_buffer is put back on the free queue.
3302 * TRUE return means there's work in the ring to clean, FALSE its empty.
3303 **********************************************************************/
3305 igb_txeof(struct tx_ring *txr)
3307 struct adapter *adapter = txr->adapter;
3308 int first, last, done, num_avail;
3310 struct igb_tx_buffer *tx_buffer;
3311 struct e1000_tx_desc *tx_desc, *eop_desc;
3312 struct ifnet *ifp = adapter->ifp;
3314 IGB_TX_LOCK_ASSERT(txr);
3316 if (txr->tx_avail == adapter->num_tx_desc)
3319 num_avail = txr->tx_avail;
3320 first = txr->next_to_clean;
3321 tx_desc = &txr->tx_base[first];
3322 tx_buffer = &txr->tx_buffers[first];
3323 last = tx_buffer->next_eop;
3324 eop_desc = &txr->tx_base[last];
3327 * What this does is get the index of the
3328 * first descriptor AFTER the EOP of the
3329 * first packet, that way we can do the
3330 * simple comparison on the inner while loop.
3332 if (++last == adapter->num_tx_desc)
3336 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3337 BUS_DMASYNC_POSTREAD);
3339 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3340 /* We clean the range of the packet */
3341 while (first != done) {
3342 tx_desc->upper.data = 0;
3343 tx_desc->lower.data = 0;
3344 tx_desc->buffer_addr = 0;
3345 ++num_avail; ++cleaned;
3347 if (tx_buffer->m_head) {
3349 bus_dmamap_sync(txr->txtag,
3351 BUS_DMASYNC_POSTWRITE);
3352 bus_dmamap_unload(txr->txtag,
3355 m_freem(tx_buffer->m_head);
3356 tx_buffer->m_head = NULL;
3358 tx_buffer->next_eop = -1;
3360 if (++first == adapter->num_tx_desc)
3363 tx_buffer = &txr->tx_buffers[first];
3364 tx_desc = &txr->tx_base[first];
3366 /* See if we can continue to the next packet */
3367 last = tx_buffer->next_eop;
3369 eop_desc = &txr->tx_base[last];
3370 /* Get new done point */
3371 if (++last == adapter->num_tx_desc) last = 0;
3376 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3377 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3379 txr->next_to_clean = first;
3382 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3383 * that it is OK to send packets.
3384 * If there are no pending descriptors, clear the timeout. Otherwise,
3385 * if some descriptors have been freed, restart the timeout.
3387 if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3388 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3389 /* All clean, turn off the timer */
3390 if (num_avail == adapter->num_tx_desc) {
3391 txr->watchdog_timer = 0;
3392 txr->tx_avail = num_avail;
3397 /* Some cleaned, reset the timer */
3399 txr->watchdog_timer = IGB_TX_TIMEOUT;
3400 txr->tx_avail = num_avail;
3405 /*********************************************************************
3407 * Setup descriptor buffer(s) from system mbuf buffer pools.
3408 * i - designates the ring index
3409 * clean - tells the function whether to update
3410 * the header, the packet buffer, or both.
3412 **********************************************************************/
3414 igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3416 struct adapter *adapter = rxr->adapter;
3417 struct mbuf *mh, *mp;
3418 bus_dma_segment_t seg[2];
3420 struct igb_rx_buffer *rx_buffer;
3425 rx_buffer = &rxr->rx_buffers[i];
3427 /* First get our header and payload mbuf */
3428 if (clean & IGB_CLEAN_HEADER) {
3429 mh = m_gethdr(M_DONTWAIT, MT_DATA);
3433 mh = rxr->rx_buffers[i].m_head;
3436 mh->m_flags |= M_PKTHDR;
3438 if (clean & IGB_CLEAN_PAYLOAD) {
3439 mp = m_getjcl(M_DONTWAIT, MT_DATA,
3440 M_PKTHDR, adapter->rx_mbuf_sz);
3443 mp->m_len = adapter->rx_mbuf_sz;
3444 mp->m_flags &= ~M_PKTHDR;
3445 } else { /* reusing */
3446 mp = rxr->rx_buffers[i].m_pack;
3447 mp->m_len = adapter->rx_mbuf_sz;
3448 mp->m_flags &= ~M_PKTHDR;
3451 ** Need to create a chain for the following
3452 ** dmamap call at this point.
3455 mh->m_pkthdr.len = mh->m_len + mp->m_len;
3457 /* Get the memory mapping */
3458 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3459 rxr->rx_spare_map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3461 printf("GET BUF: dmamap load failure - %d\n", error);
3466 /* Unload old mapping and update buffer struct */
3467 if (rx_buffer->m_head != NULL)
3468 bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3469 map = rx_buffer->map;
3470 rx_buffer->map = rxr->rx_spare_map;
3471 rxr->rx_spare_map = map;
3472 rx_buffer->m_head = mh;
3473 rx_buffer->m_pack = mp;
3474 bus_dmamap_sync(rxr->rxtag,
3475 rx_buffer->map, BUS_DMASYNC_PREREAD);
3477 /* Update descriptor */
3478 rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3479 rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3484 ** If we get here, we have an mbuf resource
3485 ** issue, so we discard the incoming packet
3486 ** and attempt to reuse existing mbufs next
3487 ** pass thru the ring, but to do so we must
3488 ** fix up the descriptor which had the address
3489 ** clobbered with writeback info.
3492 adapter->mbuf_header_failed++;
3494 /* Is there a reusable buffer? */
3495 mh = rxr->rx_buffers[i].m_head;
3496 if (mh == NULL) /* Nope, init error */
3498 mp = rxr->rx_buffers[i].m_pack;
3499 if (mp == NULL) /* Nope, init error */
3501 /* Get our old mapping */
3502 rx_buffer = &rxr->rx_buffers[i];
3503 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3504 rx_buffer->map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3506 /* We really have a problem */
3510 /* Now fix the descriptor as needed */
3511 rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3512 rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3517 /*********************************************************************
3519 * Allocate memory for rx_buffer structures. Since we use one
3520 * rx_buffer per received packet, the maximum number of rx_buffer's
3521 * that we'll need is equal to the number of receive descriptors
3522 * that we've allocated.
3524 **********************************************************************/
3526 igb_allocate_receive_buffers(struct rx_ring *rxr)
3528 struct adapter *adapter = rxr->adapter;
3529 device_t dev = adapter->dev;
3530 struct igb_rx_buffer *rxbuf;
3531 int i, bsize, error;
3533 bsize = sizeof(struct igb_rx_buffer) * adapter->num_rx_desc;
3534 if (!(rxr->rx_buffers =
3535 (struct igb_rx_buffer *) malloc(bsize,
3536 M_DEVBUF, M_NOWAIT | M_ZERO))) {
3537 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3543 ** The tag is made to accomodate the largest buffer size
3544 ** with packet split (hence the two segments, even though
3545 ** it may not always use this.
3547 if ((error = bus_dma_tag_create(NULL, /* parent */
3548 PAGE_SIZE, 0, /* alignment, bounds */
3549 BUS_SPACE_MAXADDR, /* lowaddr */
3550 BUS_SPACE_MAXADDR, /* highaddr */
3551 NULL, NULL, /* filter, filterarg */
3552 MJUM16BYTES, /* maxsize */
3554 MJUMPAGESIZE, /* maxsegsize */
3556 NULL, /* lockfunc */
3557 NULL, /* lockfuncarg */
3559 device_printf(dev, "Unable to create RX DMA tag\n");
3563 /* Create the spare map (used by getbuf) */
3564 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3565 &rxr->rx_spare_map);
3568 "%s: bus_dmamap_create header spare failed: %d\n",
3573 for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3574 rxbuf = &rxr->rx_buffers[i];
3575 error = bus_dmamap_create(rxr->rxtag,
3576 BUS_DMA_NOWAIT, &rxbuf->map);
3578 device_printf(dev, "Unable to create RX DMA maps\n");
3586 /* Frees all, but can handle partial completion */
3587 igb_free_receive_structures(adapter);
3591 /*********************************************************************
3593 * Initialize a receive ring and its buffers.
3595 **********************************************************************/
3597 igb_setup_receive_ring(struct rx_ring *rxr)
3599 struct adapter *adapter;
3602 struct igb_rx_buffer *rxbuf;
3603 struct lro_ctrl *lro = &rxr->lro;
3606 adapter = rxr->adapter;
3609 rxr->lro_enabled = FALSE;
3610 rxr->hdr_split = FALSE;
3612 /* Clear the ring contents */
3613 rsize = roundup2(adapter->num_rx_desc *
3614 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3615 bzero((void *)rxr->rx_base, rsize);
3618 ** Free current RX buffer structures and their mbufs
3620 for (int i = 0; i < adapter->num_rx_desc; i++) {
3621 rxbuf = &rxr->rx_buffers[i];
3622 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3623 BUS_DMASYNC_POSTREAD);
3624 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3625 if (rxbuf->m_head) {
3626 rxbuf->m_head->m_next = rxbuf->m_pack;
3627 m_freem(rxbuf->m_head);
3629 rxbuf->m_head = NULL;
3630 rxbuf->m_pack = NULL;
3633 /* Next replenish the ring */
3634 for (j = 0; j < adapter->num_rx_desc; j++) {
3635 if (igb_get_buf(rxr, j, IGB_CLEAN_BOTH) == ENOBUFS) {
3636 rxr->rx_buffers[j].m_head = NULL;
3637 rxr->rx_buffers[j].m_pack = NULL;
3638 rxr->rx_base[j].read.hdr_addr = 0;
3639 rxr->rx_base[j].read.pkt_addr = 0;
3644 /* Setup our descriptor indices */
3645 rxr->next_to_check = 0;
3646 rxr->last_cleaned = 0;
3648 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3649 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3652 ** Now set up the LRO interface, we
3653 ** also only do head split when LRO
3654 ** is enabled, since so often they
3655 ** are undesireable in similar setups.
3657 if (ifp->if_capenable & IFCAP_LRO) {
3658 int err = tcp_lro_init(lro);
3660 device_printf(dev,"LRO Initialization failed!\n");
3663 INIT_DEBUGOUT("RX LRO Initialized\n");
3664 rxr->lro_enabled = TRUE;
3665 rxr->hdr_split = TRUE;
3666 lro->ifp = adapter->ifp;
3672 * We need to clean up any buffers allocated
3673 * so far, 'j' is the failing index.
3675 for (int i = 0; i < j; i++) {
3676 rxbuf = &rxr->rx_buffers[i];
3677 if (rxbuf->m_head != NULL) {
3678 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3679 BUS_DMASYNC_POSTREAD);
3680 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3681 m_freem(rxbuf->m_head);
3682 rxbuf->m_head = NULL;
3688 /*********************************************************************
3690 * Initialize all receive rings.
3692 **********************************************************************/
3694 igb_setup_receive_structures(struct adapter *adapter)
3696 struct rx_ring *rxr = adapter->rx_rings;
3699 for (i = 0; i < adapter->num_queues; i++, rxr++)
3700 if (igb_setup_receive_ring(rxr))
3706 * Free RX buffers allocated so far, we will only handle
3707 * the rings that completed, the failing case will have
3708 * cleaned up for itself. The value of 'i' will be the
3709 * failed ring so we must pre-decrement it.
3711 rxr = adapter->rx_rings;
3712 for (--i; i > 0; i--, rxr++) {
3713 for (j = 0; j < adapter->num_rx_desc; j++) {
3714 struct igb_rx_buffer *rxbuf;
3715 rxbuf = &rxr->rx_buffers[j];
3716 if (rxbuf->m_head != NULL) {
3717 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3718 BUS_DMASYNC_POSTREAD);
3719 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3720 m_freem(rxbuf->m_head);
3721 rxbuf->m_head = NULL;
3729 /*********************************************************************
3731 * Enable receive unit.
3733 **********************************************************************/
3735 igb_initialize_receive_units(struct adapter *adapter)
3737 struct rx_ring *rxr = adapter->rx_rings;
3738 struct ifnet *ifp = adapter->ifp;
3739 u32 rctl, rxcsum, psize, srrctl = 0;
3741 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3744 * Make sure receives are disabled while setting
3745 * up the descriptor ring
3747 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3748 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3751 ** Set up for header split
3753 if (rxr->hdr_split) {
3754 /* Use a standard mbuf for the header */
3755 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3756 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3758 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3761 ** Set up for jumbo frames
3763 if (ifp->if_mtu > ETHERMTU) {
3764 rctl |= E1000_RCTL_LPE;
3765 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3766 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3768 /* Set maximum packet len */
3769 psize = adapter->max_frame_size;
3770 /* are we on a vlan? */
3771 if (adapter->ifp->if_vlantrunk != NULL)
3772 psize += VLAN_TAG_SIZE;
3773 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3775 rctl &= ~E1000_RCTL_LPE;
3776 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3777 rctl |= E1000_RCTL_SZ_2048;
3780 /* Setup the Base and Length of the Rx Descriptor Rings */
3781 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3782 u64 bus_addr = rxr->rxdma.dma_paddr;
3785 E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3786 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3787 E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3788 (uint32_t)(bus_addr >> 32));
3789 E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3790 (uint32_t)bus_addr);
3791 E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3792 /* Enable this Queue */
3793 rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3794 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3795 rxdctl &= 0xFFF00000;
3796 rxdctl |= IGB_RX_PTHRESH;
3797 rxdctl |= IGB_RX_HTHRESH << 8;
3798 rxdctl |= IGB_RX_WTHRESH << 16;
3799 E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3803 ** Setup for RX MultiQueue
3805 rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3806 if (adapter->num_queues >1) {
3807 u32 random[10], mrqc, shift = 0;
3813 arc4rand(&random, sizeof(random), 0);
3814 if (adapter->hw.mac.type == e1000_82575)
3816 /* Warning FM follows */
3817 for (int i = 0; i < 128; i++) {
3819 (i % adapter->num_queues) << shift;
3821 E1000_WRITE_REG(&adapter->hw,
3822 E1000_RETA(i >> 2), reta.dword);
3824 /* Now fill in hash table */
3825 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3826 for (int i = 0; i < 10; i++)
3827 E1000_WRITE_REG_ARRAY(&adapter->hw,
3828 E1000_RSSRK(0), i, random[i]);
3830 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3831 E1000_MRQC_RSS_FIELD_IPV4_TCP);
3832 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3833 E1000_MRQC_RSS_FIELD_IPV6_TCP);
3834 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3835 E1000_MRQC_RSS_FIELD_IPV6_UDP);
3836 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3837 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3839 E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3842 ** NOTE: Receive Full-Packet Checksum Offload
3843 ** is mutually exclusive with Multiqueue. However
3844 ** this is not the same as TCP/IP checksums which
3847 rxcsum |= E1000_RXCSUM_PCSD;
3848 #if __FreeBSD_version >= 800000
3849 /* For SCTP Offload */
3850 if ((adapter->hw.mac.type == e1000_82576)
3851 && (ifp->if_capenable & IFCAP_RXCSUM))
3852 rxcsum |= E1000_RXCSUM_CRCOFL;
3856 if (ifp->if_capenable & IFCAP_RXCSUM) {
3857 rxcsum |= E1000_RXCSUM_IPPCSE;
3858 #if __FreeBSD_version >= 800000
3859 if (adapter->hw.mac.type == e1000_82576)
3860 rxcsum |= E1000_RXCSUM_CRCOFL;
3863 rxcsum &= ~E1000_RXCSUM_TUOFL;
3865 E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3867 /* Setup the Receive Control Register */
3868 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3869 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3870 E1000_RCTL_RDMTS_HALF |
3871 (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3873 /* Make sure VLAN Filters are off */
3874 rctl &= ~E1000_RCTL_VFE;
3875 /* Don't store bad packets */
3876 rctl &= ~E1000_RCTL_SBP;
3878 /* Enable Receives */
3879 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3882 * Setup the HW Rx Head and Tail Descriptor Pointers
3883 * - needs to be after enable
3885 for (int i = 0; i < adapter->num_queues; i++) {
3886 E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3887 E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3888 adapter->num_rx_desc - 1);
3893 /*********************************************************************
3895 * Free receive rings.
3897 **********************************************************************/
3899 igb_free_receive_structures(struct adapter *adapter)
3901 struct rx_ring *rxr = adapter->rx_rings;
3903 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3904 struct lro_ctrl *lro = &rxr->lro;
3905 igb_free_receive_buffers(rxr);
3907 igb_dma_free(adapter, &rxr->rxdma);
3910 free(adapter->rx_rings, M_DEVBUF);
3913 /*********************************************************************
3915 * Free receive ring data structures.
3917 **********************************************************************/
3919 igb_free_receive_buffers(struct rx_ring *rxr)
3921 struct adapter *adapter = rxr->adapter;
3922 struct igb_rx_buffer *rx_buffer;
3924 INIT_DEBUGOUT("free_receive_structures: begin");
3926 if (rxr->rx_spare_map) {
3927 bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3928 rxr->rx_spare_map = NULL;
3931 /* Cleanup any existing buffers */
3932 if (rxr->rx_buffers != NULL) {
3933 rx_buffer = &rxr->rx_buffers[0];
3934 for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3935 if (rx_buffer->m_head != NULL) {
3936 bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3937 BUS_DMASYNC_POSTREAD);
3938 bus_dmamap_unload(rxr->rxtag,
3940 m_freem(rx_buffer->m_head);
3941 rx_buffer->m_head = NULL;
3942 } else if (rx_buffer->map != NULL)
3943 bus_dmamap_unload(rxr->rxtag,
3945 if (rx_buffer->map != NULL) {
3946 bus_dmamap_destroy(rxr->rxtag,
3948 rx_buffer->map = NULL;
3953 if (rxr->rx_buffers != NULL) {
3954 free(rxr->rx_buffers, M_DEVBUF);
3955 rxr->rx_buffers = NULL;
3958 if (rxr->rxtag != NULL) {
3959 bus_dma_tag_destroy(rxr->rxtag);
3963 /*********************************************************************
3965 * This routine executes in interrupt context. It replenishes
3966 * the mbufs in the descriptor and sends data which has been
3967 * dma'ed into host memory to upper layer.
3969 * We loop at most count times if count is > 0, or until done if
3972 * Return TRUE if more to clean, FALSE otherwise
3973 *********************************************************************/
3975 igb_rxeof(struct rx_ring *rxr, int count)
3977 struct adapter *adapter = rxr->adapter;
3979 struct lro_ctrl *lro = &rxr->lro;
3980 struct lro_entry *queued;
3983 union e1000_adv_rx_desc *cur;
3988 i = rxr->next_to_check;
3989 cur = &rxr->rx_base[i];
3990 staterr = cur->wb.upper.status_error;
3992 if (!(staterr & E1000_RXD_STAT_DD)) {
3998 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3999 BUS_DMASYNC_POSTREAD);
4001 /* Main clean loop */
4002 while ((staterr & E1000_RXD_STAT_DD) &&
4004 (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4005 struct mbuf *sendmp, *mh, *mp;
4006 u16 hlen, plen, hdr, ptype, len_adj, vtag;
4007 u8 dopayload, accept_frame, eop;
4010 hlen = plen = len_adj = vtag = 0;
4011 sendmp = mh = mp = NULL;
4012 ptype = (u16)(cur->wb.lower.lo_dword.data >> 4);
4014 /* Sync the buffers */
4015 bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
4016 BUS_DMASYNC_POSTREAD);
4019 ** The way the hardware is configured to
4020 ** split, it will ONLY use the header buffer
4021 ** when header split is enabled, otherwise we
4022 ** get normal behavior, ie, both header and
4023 ** payload are DMA'd into the payload buffer.
4025 ** The fmp test is to catch the case where a
4026 ** packet spans multiple descriptors, in that
4027 ** case only the first header is valid.
4029 if ((rxr->hdr_split) && (rxr->fmp == NULL)){
4031 wb.lower.lo_dword.hs_rss.hdr_info);
4032 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4033 E1000_RXDADV_HDRBUFLEN_SHIFT;
4034 if (hlen > IGB_HDR_BUF)
4036 plen = le16toh(cur->wb.upper.length);
4037 /* Handle the header mbuf */
4038 mh = rxr->rx_buffers[i].m_head;
4040 dopayload = IGB_CLEAN_HEADER;
4042 ** Get the payload length, this
4043 ** could be zero if its a small
4047 mp = rxr->rx_buffers[i].m_pack;
4050 mp->m_flags &= ~M_PKTHDR;
4052 mh->m_flags |= M_PKTHDR;
4053 dopayload = IGB_CLEAN_BOTH;
4054 rxr->rx_split_packets++;
4055 } else { /* small packets */
4056 mh->m_flags &= ~M_PKTHDR;
4061 ** Either no header split, or a
4062 ** secondary piece of a fragmented
4065 mh = rxr->rx_buffers[i].m_pack;
4066 mh->m_flags |= M_PKTHDR;
4067 mh->m_len = le16toh(cur->wb.upper.length);
4068 dopayload = IGB_CLEAN_PAYLOAD;
4071 if (staterr & E1000_RXD_STAT_EOP) {
4075 ** Strip CRC and account for frag
4078 if (mp->m_len < ETHER_CRC_LEN) {
4079 /* a frag, how much is left? */
4080 len_adj = ETHER_CRC_LEN - mp->m_len;
4083 mp->m_len -= ETHER_CRC_LEN;
4084 } else { /* not split */
4085 if (mh->m_len < ETHER_CRC_LEN) {
4086 len_adj = ETHER_CRC_LEN - mh->m_len;
4089 mh->m_len -= ETHER_CRC_LEN;
4094 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)
4097 This linux code needs to be converted to work here
4098 -----------------------------------------------------
4099 if (unlikely(staterr & E1000_RXD_STAT_TS)) {
4102 // Create an mtag and set it up
4103 struct skb_shared_hwtstamps *shhwtstamps =
4106 rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
4107 "igb: no RX time stamp available for time stamped packet");
4108 regval = rd32(E1000_RXSTMPL);
4109 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4110 // Do time conversion from the register
4111 ns = timecounter_cyc2time(&adapter->clock, regval);
4112 clocksync_update(&adapter->sync, ns);
4113 memset(shhwtstamps, 0, sizeof(*shhwtstamps));
4114 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4115 shhwtstamps->syststamp =
4116 clocksync_hw2sys(&adapter->sync, ns);
4121 ** get_buf will overwrite the writeback
4122 ** descriptor so save the VLAN tag now.
4124 vtag = le16toh(cur->wb.upper.vlan);
4125 if (igb_get_buf(rxr, i, dopayload) != 0) {
4129 /* Initial frame - setup */
4130 if (rxr->fmp == NULL) {
4131 mh->m_flags |= M_PKTHDR;
4132 mh->m_pkthdr.len = mh->m_len;
4133 rxr->fmp = mh; /* Store the first mbuf */
4135 if (mp) { /* Add payload if split */
4136 mh->m_pkthdr.len += mp->m_len;
4137 rxr->lmp = mh->m_next;
4140 /* Chain mbuf's together */
4141 mh->m_flags &= ~M_PKTHDR;
4142 rxr->lmp->m_next = mh;
4143 rxr->lmp = rxr->lmp->m_next;
4144 rxr->fmp->m_pkthdr.len += mh->m_len;
4145 /* Adjust for CRC frag */
4147 rxr->lmp->m_len -= len_adj;
4148 rxr->fmp->m_pkthdr.len -= len_adj;
4153 bool sctp = ((ptype & 0x40) != 0);
4154 rxr->fmp->m_pkthdr.rcvif = ifp;
4157 /* capture data for AIM */
4158 rxr->bytes += rxr->fmp->m_pkthdr.len;
4159 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4161 igb_rx_checksum(staterr, rxr->fmp, sctp);
4162 if (staterr & E1000_RXD_STAT_VP) {
4163 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4164 rxr->fmp->m_flags |= M_VLANTAG;
4166 #if __FreeBSD_version >= 800000
4167 rxr->fmp->m_pkthdr.flowid = curcpu;
4168 rxr->fmp->m_flags |= M_FLOWID;
4177 /* Reuse loaded DMA map and just update mbuf chain */
4179 mh = rxr->rx_buffers[i].m_head;
4183 mp = rxr->rx_buffers[i].m_pack;
4184 mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4185 mp->m_data = mp->m_ext.ext_buf;
4187 if (adapter->max_frame_size <=
4188 (MCLBYTES - ETHER_ALIGN))
4189 m_adj(mp, ETHER_ALIGN);
4190 if (rxr->fmp != NULL) {
4191 /* handles the whole chain */
4199 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4200 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4202 rxr->last_cleaned = i; /* For updating tail */
4204 /* Advance our pointers to the next descriptor. */
4205 if (++i == adapter->num_rx_desc)
4209 ** Note that we hold the RX lock thru
4210 ** the following call so this ring's
4211 ** next_to_check is not gonna change.
4213 if (sendmp != NULL) {
4215 ** Send to the stack if:
4216 ** - LRO not enabled, or
4217 ** - no LRO resources, or
4218 ** - lro enqueue fails
4220 if ((!rxr->lro_enabled) ||
4221 ((!lro->lro_cnt) || (tcp_lro_rx(lro, sendmp, 0))))
4222 (*ifp->if_input)(ifp, sendmp);
4225 /* Get the next descriptor */
4226 cur = &rxr->rx_base[i];
4227 staterr = cur->wb.upper.status_error;
4229 rxr->next_to_check = i;
4231 /* Advance the E1000's Receive Queue #0 "Tail Pointer". */
4232 E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4235 * Flush any outstanding LRO work
4237 while (!SLIST_EMPTY(&lro->lro_active)) {
4238 queued = SLIST_FIRST(&lro->lro_active);
4239 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4240 tcp_lro_flush(lro, queued);
4246 ** We still have cleaning to do?
4247 ** Schedule another interrupt if so.
4249 if (staterr & E1000_RXD_STAT_DD) {
4250 E1000_WRITE_REG(&adapter->hw, E1000_EICS, rxr->eims);
4258 /*********************************************************************
4260 * Verify that the hardware indicated that the checksum is valid.
4261 * Inform the stack about the status of checksum so that stack
4262 * doesn't spend time verifying the checksum.
4264 *********************************************************************/
4266 igb_rx_checksum(u32 staterr, struct mbuf *mp, bool sctp)
4268 u16 status = (u16)staterr;
4269 u8 errors = (u8) (staterr >> 24);
4271 /* Ignore Checksum bit is set */
4272 if (status & E1000_RXD_STAT_IXSM) {
4273 mp->m_pkthdr.csum_flags = 0;
4277 if (status & E1000_RXD_STAT_IPCS) {
4279 if (!(errors & E1000_RXD_ERR_IPE)) {
4280 /* IP Checksum Good */
4281 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4282 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4284 mp->m_pkthdr.csum_flags = 0;
4287 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4288 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4289 #if __FreeBSD_version >= 800000
4290 if (sctp) /* reassign */
4291 type = CSUM_SCTP_VALID;
4294 if (!(errors & E1000_RXD_ERR_TCPE)) {
4295 mp->m_pkthdr.csum_flags |= type;
4297 mp->m_pkthdr.csum_data = htons(0xffff);
4304 * This routine is run via an vlan
4308 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4310 struct adapter *adapter = ifp->if_softc;
4313 if (ifp->if_softc != arg) /* Not our event */
4316 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4319 index = (vtag >> 5) & 0x7F;
4321 igb_shadow_vfta[index] |= (1 << bit);
4322 ++adapter->num_vlans;
4323 /* Re-init to load the changes */
4328 * This routine is run via an vlan
4332 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4334 struct adapter *adapter = ifp->if_softc;
4337 if (ifp->if_softc != arg)
4340 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4343 index = (vtag >> 5) & 0x7F;
4345 igb_shadow_vfta[index] &= ~(1 << bit);
4346 --adapter->num_vlans;
4347 /* Re-init to load the changes */
4352 igb_setup_vlan_hw_support(struct adapter *adapter)
4354 struct e1000_hw *hw = &adapter->hw;
4358 ** We get here thru init_locked, meaning
4359 ** a soft reset, this has already cleared
4360 ** the VFTA and other state, so if there
4361 ** have been no vlan's registered do nothing.
4363 if (adapter->num_vlans == 0)
4367 ** A soft reset zero's out the VFTA, so
4368 ** we need to repopulate it now.
4370 for (int i = 0; i < IGB_VFTA_SIZE; i++)
4371 if (igb_shadow_vfta[i] != 0)
4372 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4373 i, igb_shadow_vfta[i]);
4375 reg = E1000_READ_REG(hw, E1000_CTRL);
4376 reg |= E1000_CTRL_VME;
4377 E1000_WRITE_REG(hw, E1000_CTRL, reg);
4379 /* Enable the Filter Table */
4380 reg = E1000_READ_REG(hw, E1000_RCTL);
4381 reg &= ~E1000_RCTL_CFIEN;
4382 reg |= E1000_RCTL_VFE;
4383 E1000_WRITE_REG(hw, E1000_RCTL, reg);
4385 /* Update the frame size */
4386 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4387 adapter->max_frame_size + VLAN_TAG_SIZE);
4391 igb_enable_intr(struct adapter *adapter)
4393 /* With RSS set up what to auto clear */
4394 if (adapter->msix_mem) {
4395 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4396 adapter->eims_mask);
4397 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4398 adapter->eims_mask);
4399 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4400 adapter->eims_mask);
4401 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4404 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4407 E1000_WRITE_FLUSH(&adapter->hw);
4413 igb_disable_intr(struct adapter *adapter)
4415 if (adapter->msix_mem) {
4416 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4417 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4419 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4420 E1000_WRITE_FLUSH(&adapter->hw);
4425 * Bit of a misnomer, what this really means is
4426 * to enable OS management of the system... aka
4427 * to disable special hardware management features
4430 igb_init_manageability(struct adapter *adapter)
4432 if (adapter->has_manage) {
4433 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4434 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4436 /* disable hardware interception of ARP */
4437 manc &= ~(E1000_MANC_ARP_EN);
4439 /* enable receiving management packets to the host */
4440 manc |= E1000_MANC_EN_MNG2HOST;
4441 manc2h |= 1 << 5; /* Mng Port 623 */
4442 manc2h |= 1 << 6; /* Mng Port 664 */
4443 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4444 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4449 * Give control back to hardware management
4450 * controller if there is one.
4453 igb_release_manageability(struct adapter *adapter)
4455 if (adapter->has_manage) {
4456 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4458 /* re-enable hardware interception of ARP */
4459 manc |= E1000_MANC_ARP_EN;
4460 manc &= ~E1000_MANC_EN_MNG2HOST;
4462 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4467 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4468 * For ASF and Pass Through versions of f/w this means that
4469 * the driver is loaded.
4473 igb_get_hw_control(struct adapter *adapter)
4477 /* Let firmware know the driver has taken over */
4478 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4479 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4480 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4484 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4485 * For ASF and Pass Through versions of f/w this means that the
4486 * driver is no longer loaded.
4490 igb_release_hw_control(struct adapter *adapter)
4494 /* Let firmware taken over control of h/w */
4495 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4496 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4497 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4501 igb_is_valid_ether_addr(uint8_t *addr)
4503 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4505 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4514 * Enable PCI Wake On Lan capability
4517 igb_enable_wakeup(device_t dev)
4522 /* First find the capabilities pointer*/
4523 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4524 /* Read the PM Capabilities */
4525 id = pci_read_config(dev, cap, 1);
4526 if (id != PCIY_PMG) /* Something wrong */
4528 /* OK, we have the power capabilities, so
4529 now get the status register */
4530 cap += PCIR_POWER_STATUS;
4531 status = pci_read_config(dev, cap, 2);
4532 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4533 pci_write_config(dev, cap, status, 2);
4538 /**********************************************************************
4540 * Update the board statistics counters.
4542 **********************************************************************/
4544 igb_update_stats_counters(struct adapter *adapter)
4548 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4549 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4550 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4551 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4553 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4554 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4555 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4556 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4558 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4559 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4560 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4561 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4562 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4563 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4564 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4565 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4566 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4567 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4568 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4569 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4570 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4571 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4572 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4573 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4574 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4575 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4576 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4577 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4579 /* For the 64-bit byte counters the low dword must be read first. */
4580 /* Both registers clear on the read of the high dword */
4582 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4583 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4585 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4586 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4587 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4588 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4589 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4591 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4592 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4594 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4595 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4596 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4597 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4598 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4599 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4600 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4601 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4602 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4603 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4605 adapter->stats.algnerrc +=
4606 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4607 adapter->stats.rxerrc +=
4608 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4609 adapter->stats.tncrs +=
4610 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4611 adapter->stats.cexterr +=
4612 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4613 adapter->stats.tsctc +=
4614 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4615 adapter->stats.tsctfc +=
4616 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4619 ifp->if_collisions = adapter->stats.colc;
4622 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4623 adapter->stats.crcerrs + adapter->stats.algnerrc +
4624 adapter->stats.ruc + adapter->stats.roc +
4625 adapter->stats.mpc + adapter->stats.cexterr;
4628 ifp->if_oerrors = adapter->stats.ecol +
4629 adapter->stats.latecol + adapter->watchdog_events;
4633 /**********************************************************************
4635 * This routine is called only when igb_display_debug_stats is enabled.
4636 * This routine provides a way to take a look at important statistics
4637 * maintained by the driver and hardware.
4639 **********************************************************************/
4641 igb_print_debug_info(struct adapter *adapter)
4643 device_t dev = adapter->dev;
4644 struct rx_ring *rxr = adapter->rx_rings;
4645 struct tx_ring *txr = adapter->tx_rings;
4646 uint8_t *hw_addr = adapter->hw.hw_addr;
4648 device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4649 device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4650 E1000_READ_REG(&adapter->hw, E1000_CTRL),
4651 E1000_READ_REG(&adapter->hw, E1000_RCTL));
4653 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4654 device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4655 E1000_READ_REG(&adapter->hw, E1000_IMS),
4656 E1000_READ_REG(&adapter->hw, E1000_EIMS));
4659 device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4660 ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4661 (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4662 device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4663 adapter->hw.fc.high_water,
4664 adapter->hw.fc.low_water);
4666 for (int i = 0; i < adapter->num_queues; i++, txr++) {
4667 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4668 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4669 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4670 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4671 txr->me, (long long)txr->no_desc_avail);
4672 device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
4673 (long long)txr->tx_irq);
4674 device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4675 (long long)txr->tx_packets);
4678 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4679 struct lro_ctrl *lro = &rxr->lro;
4680 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4681 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4682 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4683 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4684 (long long)rxr->rx_packets);
4685 device_printf(dev, "RX(%d) Split Packets = %lld\n", rxr->me,
4686 (long long)rxr->rx_split_packets);
4687 device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4688 (long long)rxr->rx_bytes);
4689 device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
4690 (long long)rxr->rx_irq);
4691 device_printf(dev,"RX(%d) LRO Queued= %d\n",
4692 rxr->me, lro->lro_queued);
4693 device_printf(dev,"RX(%d) LRO Flushed= %d\n",
4694 rxr->me, lro->lro_flushed);
4697 device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4699 device_printf(dev, "Mbuf defrag failed = %ld\n",
4700 adapter->mbuf_defrag_failed);
4701 device_printf(dev, "Std mbuf header failed = %ld\n",
4702 adapter->mbuf_header_failed);
4703 device_printf(dev, "Std mbuf packet failed = %ld\n",
4704 adapter->mbuf_packet_failed);
4705 device_printf(dev, "Driver dropped packets = %ld\n",
4706 adapter->dropped_pkts);
4707 device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4708 adapter->no_tx_dma_setup);
4712 igb_print_hw_stats(struct adapter *adapter)
4714 device_t dev = adapter->dev;
4716 device_printf(dev, "Excessive collisions = %lld\n",
4717 (long long)adapter->stats.ecol);
4718 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4719 device_printf(dev, "Symbol errors = %lld\n",
4720 (long long)adapter->stats.symerrs);
4722 device_printf(dev, "Sequence errors = %lld\n",
4723 (long long)adapter->stats.sec);
4724 device_printf(dev, "Defer count = %lld\n",
4725 (long long)adapter->stats.dc);
4726 device_printf(dev, "Missed Packets = %lld\n",
4727 (long long)adapter->stats.mpc);
4728 device_printf(dev, "Receive No Buffers = %lld\n",
4729 (long long)adapter->stats.rnbc);
4730 /* RLEC is inaccurate on some hardware, calculate our own. */
4731 device_printf(dev, "Receive Length Errors = %lld\n",
4732 ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4733 device_printf(dev, "Receive errors = %lld\n",
4734 (long long)adapter->stats.rxerrc);
4735 device_printf(dev, "Crc errors = %lld\n",
4736 (long long)adapter->stats.crcerrs);
4737 device_printf(dev, "Alignment errors = %lld\n",
4738 (long long)adapter->stats.algnerrc);
4739 /* On 82575 these are collision counts */
4740 device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4741 (long long)adapter->stats.cexterr);
4742 device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4743 device_printf(dev, "watchdog timeouts = %ld\n",
4744 adapter->watchdog_events);
4745 device_printf(dev, "XON Rcvd = %lld\n",
4746 (long long)adapter->stats.xonrxc);
4747 device_printf(dev, "XON Xmtd = %lld\n",
4748 (long long)adapter->stats.xontxc);
4749 device_printf(dev, "XOFF Rcvd = %lld\n",
4750 (long long)adapter->stats.xoffrxc);
4751 device_printf(dev, "XOFF Xmtd = %lld\n",
4752 (long long)adapter->stats.xofftxc);
4753 device_printf(dev, "Good Packets Rcvd = %lld\n",
4754 (long long)adapter->stats.gprc);
4755 device_printf(dev, "Good Packets Xmtd = %lld\n",
4756 (long long)adapter->stats.gptc);
4757 device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4758 (long long)adapter->stats.tsctc);
4759 device_printf(dev, "TSO Contexts Failed = %lld\n",
4760 (long long)adapter->stats.tsctfc);
4763 /**********************************************************************
4765 * This routine provides a way to dump out the adapter eeprom,
4766 * often a useful debug/service tool. This only dumps the first
4767 * 32 words, stuff that matters is in that extent.
4769 **********************************************************************/
4771 igb_print_nvm_info(struct adapter *adapter)
4776 /* Its a bit crude, but it gets the job done */
4777 printf("\nInterface EEPROM Dump:\n");
4778 printf("Offset\n0x0000 ");
4779 for (i = 0, j = 0; i < 32; i++, j++) {
4780 if (j == 8) { /* Make the offset block */
4782 printf("\n0x00%x0 ",row);
4784 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4785 printf("%04x ", eeprom_data);
4791 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4793 struct adapter *adapter;
4798 error = sysctl_handle_int(oidp, &result, 0, req);
4800 if (error || !req->newptr)
4804 adapter = (struct adapter *)arg1;
4805 igb_print_debug_info(adapter);
4808 * This value will cause a hex dump of the
4809 * first 32 16-bit words of the EEPROM to
4813 adapter = (struct adapter *)arg1;
4814 igb_print_nvm_info(adapter);
4822 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4824 struct adapter *adapter;
4829 error = sysctl_handle_int(oidp, &result, 0, req);
4831 if (error || !req->newptr)
4835 adapter = (struct adapter *)arg1;
4836 igb_print_hw_stats(adapter);
4843 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4844 const char *description, int *limit, int value)
4847 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4848 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4849 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4854 ** igb_hwtstamp_ioctl - control hardware time stamping
4856 ** Outgoing time stamping can be enabled and disabled. Play nice and
4857 ** disable it when requested, although it shouldn't case any overhead
4858 ** when no packet needs it. At most one packet in the queue may be
4859 ** marked for time stamping, otherwise it would be impossible to tell
4860 ** for sure to which packet the hardware time stamp belongs.
4862 ** Incoming time stamping has to be configured via the hardware
4863 ** filters. Not all combinations are supported, in particular event
4864 ** type has to be specified. Matching the kind of event packet is
4865 ** not supported, with the exception of "all V2 events regardless of
4870 igb_hwtstamp_ioctl(struct adapter *adapter, struct ifreq *ifr)
4872 struct e1000_hw *hw = &adapter->hw;
4873 struct hwtstamp_ctrl *config;
4874 u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4875 u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
4876 u32 tsync_rx_ctl_type = 0;
4877 u32 tsync_rx_cfg = 0;
4880 u16 port = 319; /* PTP */
4883 config = (struct hwtstamp_ctrl *) ifr->ifr_data;
4885 /* reserved for future extensions */
4889 switch (config->tx_type) {
4890 case HWTSTAMP_TX_OFF:
4891 tsync_tx_ctl_bit = 0;
4893 case HWTSTAMP_TX_ON:
4894 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4900 switch (config->rx_filter) {
4901 case HWTSTAMP_FILTER_NONE:
4902 tsync_rx_ctl_bit = 0;
4904 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
4905 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
4906 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
4907 case HWTSTAMP_FILTER_ALL:
4909 * register TSYNCRXCFG must be set, therefore it is not
4910 * possible to time stamp both Sync and Delay_Req messages
4911 * => fall back to time stamping all packets
4913 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
4914 config->rx_filter = HWTSTAMP_FILTER_ALL;
4916 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
4917 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4918 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
4921 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
4922 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4923 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
4926 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
4927 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
4928 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4929 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
4932 config->rx_filter = HWTSTAMP_FILTER_SOME;
4934 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
4935 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
4936 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4937 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
4940 config->rx_filter = HWTSTAMP_FILTER_SOME;
4942 case HWTSTAMP_FILTER_PTP_V2_EVENT:
4943 case HWTSTAMP_FILTER_PTP_V2_SYNC:
4944 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
4945 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
4946 config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
4953 /* enable/disable TX */
4954 regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
4955 regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
4956 E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
4958 /* enable/disable RX, define which PTP packets are time stamped */
4959 regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
4960 regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
4961 regval = (regval & ~0xE) | tsync_rx_ctl_type;
4962 E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
4963 E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
4966 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
4967 * (Ethertype to filter on)
4968 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
4969 * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
4971 E1000_WRITE_REG(hw, E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
4973 /* L4 Queue Filter[0]: only filter by source and destination port */
4974 E1000_WRITE_REG(hw, E1000_SPQF0, htons(port));
4975 E1000_WRITE_REG(hw, E1000_IMIREXT(0), is_l4 ?
4976 ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
4977 E1000_WRITE_REG(hw, E1000_IMIR(0), is_l4 ?
4979 | (0<<16) /* immediate interrupt disabled */
4980 | 0 /* (1<<17) bit cleared: do not bypass
4981 destination port check */)
4983 E1000_WRITE_REG(hw, E1000_FTQF0, is_l4 ?
4985 | (1<<15) /* VF not compared */
4986 | (1<<27) /* Enable Timestamping */
4987 | (7<<28) /* only source port filter enabled,
4988 source/target address and protocol
4990 : ((1<<15) | (15<<28) /* all mask bits set = filter not
4995 adapter->hwtstamp_ctrl = config;
4997 /* clear TX/RX time stamp registers, just to be sure */
4998 regval = E1000_READ_REG(hw, E1000_TXSTMPH);
4999 regval = E1000_READ_REG(hw, E1000_RXSTMPH);
5005 ** igb_read_clock - read raw cycle counter (to be used by time counter)
5007 static cycle_t igb_read_clock(const struct cyclecounter *tc)
5009 struct igb_adapter *adapter =
5010 container_of(tc, struct igb_adapter, cycles);
5011 struct e1000_hw *hw = &adapter->hw;
5014 stamp = E1000_READ_REG(hw, E1000_SYSTIML);
5015 stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIMH) << 32ULL;
5020 #endif /* IGB_IEEE1588 */