]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_igb.c
MFV r304056:
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "opt_rss.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #include "opt_altq.h"
43 #endif
44
45 #include "if_igb.h"
46
47 /*********************************************************************
48  *  Driver version:
49  *********************************************************************/
50 char igb_driver_version[] = "2.5.3-k";
51
52
53 /*********************************************************************
54  *  PCI Device ID Table
55  *
56  *  Used by probe to select devices to load on
57  *  Last field stores an index into e1000_strings
58  *  Last entry must be all 0s
59  *
60  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61  *********************************************************************/
62
63 static igb_vendor_info_t igb_vendor_info_array[] =
64 {
65         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
72         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
79         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
81         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
88         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,  0, 0, 0},
89         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
90         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,  0, 0, 0},
91         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
93         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,  0, 0, 0},
98         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
99         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,  0, 0, 0},
100         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
101         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,  0, 0, 0},
104         /* required last entry */
105         {0, 0, 0, 0, 0}
106 };
107
108 /*********************************************************************
109  *  Table of branding strings for all supported NICs.
110  *********************************************************************/
111
112 static char *igb_strings[] = {
113         "Intel(R) PRO/1000 Network Connection"
114 };
115
116 /*********************************************************************
117  *  Function prototypes
118  *********************************************************************/
119 static int      igb_probe(device_t);
120 static int      igb_attach(device_t);
121 static int      igb_detach(device_t);
122 static int      igb_shutdown(device_t);
123 static int      igb_suspend(device_t);
124 static int      igb_resume(device_t);
125 #ifndef IGB_LEGACY_TX
126 static int      igb_mq_start(struct ifnet *, struct mbuf *);
127 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128 static void     igb_qflush(struct ifnet *);
129 static void     igb_deferred_mq_start(void *, int);
130 #else
131 static void     igb_start(struct ifnet *);
132 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133 #endif
134 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
135 static uint64_t igb_get_counter(if_t, ift_counter);
136 static void     igb_init(void *);
137 static void     igb_init_locked(struct adapter *);
138 static void     igb_stop(void *);
139 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
140 static int      igb_media_change(struct ifnet *);
141 static void     igb_identify_hardware(struct adapter *);
142 static int      igb_allocate_pci_resources(struct adapter *);
143 static int      igb_allocate_msix(struct adapter *);
144 static int      igb_allocate_legacy(struct adapter *);
145 static int      igb_setup_msix(struct adapter *);
146 static void     igb_free_pci_resources(struct adapter *);
147 static void     igb_local_timer(void *);
148 static void     igb_reset(struct adapter *);
149 static int      igb_setup_interface(device_t, struct adapter *);
150 static int      igb_allocate_queues(struct adapter *);
151 static void     igb_configure_queues(struct adapter *);
152
153 static int      igb_allocate_transmit_buffers(struct tx_ring *);
154 static void     igb_setup_transmit_structures(struct adapter *);
155 static void     igb_setup_transmit_ring(struct tx_ring *);
156 static void     igb_initialize_transmit_units(struct adapter *);
157 static void     igb_free_transmit_structures(struct adapter *);
158 static void     igb_free_transmit_buffers(struct tx_ring *);
159
160 static int      igb_allocate_receive_buffers(struct rx_ring *);
161 static int      igb_setup_receive_structures(struct adapter *);
162 static int      igb_setup_receive_ring(struct rx_ring *);
163 static void     igb_initialize_receive_units(struct adapter *);
164 static void     igb_free_receive_structures(struct adapter *);
165 static void     igb_free_receive_buffers(struct rx_ring *);
166 static void     igb_free_receive_ring(struct rx_ring *);
167
168 static void     igb_enable_intr(struct adapter *);
169 static void     igb_disable_intr(struct adapter *);
170 static void     igb_update_stats_counters(struct adapter *);
171 static bool     igb_txeof(struct tx_ring *);
172
173 static __inline void igb_rx_discard(struct rx_ring *, int);
174 static __inline void igb_rx_input(struct rx_ring *,
175                     struct ifnet *, struct mbuf *, u32);
176
177 static bool     igb_rxeof(struct igb_queue *, int, int *);
178 static void     igb_rx_checksum(u32, struct mbuf *, u32);
179 static int      igb_tx_ctx_setup(struct tx_ring *,
180                     struct mbuf *, u32 *, u32 *);
181 static int      igb_tso_setup(struct tx_ring *,
182                     struct mbuf *, u32 *, u32 *);
183 static void     igb_set_promisc(struct adapter *);
184 static void     igb_disable_promisc(struct adapter *);
185 static void     igb_set_multi(struct adapter *);
186 static void     igb_update_link_status(struct adapter *);
187 static void     igb_refresh_mbufs(struct rx_ring *, int);
188
189 static void     igb_register_vlan(void *, struct ifnet *, u16);
190 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
191 static void     igb_setup_vlan_hw_support(struct adapter *);
192
193 static int      igb_xmit(struct tx_ring *, struct mbuf **);
194 static int      igb_dma_malloc(struct adapter *, bus_size_t,
195                     struct igb_dma_alloc *, int);
196 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198 static void     igb_print_nvm_info(struct adapter *);
199 static int      igb_is_valid_ether_addr(u8 *);
200 static void     igb_add_hw_stats(struct adapter *);
201
202 static void     igb_vf_init_stats(struct adapter *);
203 static void     igb_update_vf_stats_counters(struct adapter *);
204
205 /* Management and WOL Support */
206 static void     igb_init_manageability(struct adapter *);
207 static void     igb_release_manageability(struct adapter *);
208 static void     igb_get_hw_control(struct adapter *);
209 static void     igb_release_hw_control(struct adapter *);
210 static void     igb_enable_wakeup(device_t);
211 static void     igb_led_func(void *, int);
212
213 static int      igb_irq_fast(void *);
214 static void     igb_msix_que(void *);
215 static void     igb_msix_link(void *);
216 static void     igb_handle_que(void *context, int pending);
217 static void     igb_handle_link(void *context, int pending);
218 static void     igb_handle_link_locked(struct adapter *);
219
220 static void     igb_set_sysctl_value(struct adapter *, const char *,
221                     const char *, int *, int);
222 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226 #ifdef DEVICE_POLLING
227 static poll_handler_t igb_poll;
228 #endif /* POLLING */
229
230 /*********************************************************************
231  *  FreeBSD Device Interface Entry Points
232  *********************************************************************/
233
234 static device_method_t igb_methods[] = {
235         /* Device interface */
236         DEVMETHOD(device_probe, igb_probe),
237         DEVMETHOD(device_attach, igb_attach),
238         DEVMETHOD(device_detach, igb_detach),
239         DEVMETHOD(device_shutdown, igb_shutdown),
240         DEVMETHOD(device_suspend, igb_suspend),
241         DEVMETHOD(device_resume, igb_resume),
242         DEVMETHOD_END
243 };
244
245 static driver_t igb_driver = {
246         "igb", igb_methods, sizeof(struct adapter),
247 };
248
249 static devclass_t igb_devclass;
250 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251 MODULE_DEPEND(igb, pci, 1, 1, 1);
252 MODULE_DEPEND(igb, ether, 1, 1, 1);
253 #ifdef DEV_NETMAP
254 MODULE_DEPEND(igb, netmap, 1, 1, 1);
255 #endif /* DEV_NETMAP */
256
257 /*********************************************************************
258  *  Tunable default values.
259  *********************************************************************/
260
261 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263 /* Descriptor defaults */
264 static int igb_rxd = IGB_DEFAULT_RXD;
265 static int igb_txd = IGB_DEFAULT_TXD;
266 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267     "Number of receive descriptors per queue");
268 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269     "Number of transmit descriptors per queue");
270
271 /*
272 ** AIM: Adaptive Interrupt Moderation
273 ** which means that the interrupt rate
274 ** is varied over time based on the
275 ** traffic for that interrupt vector
276 */
277 static int igb_enable_aim = TRUE;
278 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279     "Enable adaptive interrupt moderation");
280
281 /*
282  * MSIX should be the default for best performance,
283  * but this allows it to be forced off for testing.
284  */         
285 static int igb_enable_msix = 1;
286 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287     "Enable MSI-X interrupts");
288
289 /*
290 ** Tuneable Interrupt rate
291 */
292 static int igb_max_interrupt_rate = 8000;
293 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296 #ifndef IGB_LEGACY_TX
297 /*
298 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
299 */
300 static int igb_buf_ring_size = IGB_BR_SIZE;
301 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302     &igb_buf_ring_size, 0, "Size of the bufring");
303 #endif
304
305 /*
306 ** Header split causes the packet header to
307 ** be dma'd to a separate mbuf from the payload.
308 ** this can have memory alignment benefits. But
309 ** another plus is that small packets often fit
310 ** into the header and thus use no cluster. Its
311 ** a very workload dependent type feature.
312 */
313 static int igb_header_split = FALSE;
314 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315     "Enable receive mbuf header split");
316
317 /*
318 ** This will autoconfigure based on the
319 ** number of CPUs and max supported
320 ** MSIX messages if left at 0.
321 */
322 static int igb_num_queues = 0;
323 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324     "Number of queues to configure, 0 indicates autoconfigure");
325
326 /*
327 ** Global variable to store last used CPU when binding queues
328 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
329 ** queue is bound to a cpu.
330 */
331 static int igb_last_bind_cpu = -1;
332
333 /* How many packets rxeof tries to clean at a time */
334 static int igb_rx_process_limit = 100;
335 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336     &igb_rx_process_limit, 0,
337     "Maximum number of received packets to process at a time, -1 means unlimited");
338
339 /* How many packets txeof tries to clean at a time */
340 static int igb_tx_process_limit = -1;
341 SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342     &igb_tx_process_limit, 0,
343     "Maximum number of sent packets to process at a time, -1 means unlimited");
344
345 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
346 #include <dev/netmap/if_igb_netmap.h>
347 #endif /* DEV_NETMAP */
348 /*********************************************************************
349  *  Device identification routine
350  *
351  *  igb_probe determines if the driver should be loaded on
352  *  adapter based on PCI vendor/device id of the adapter.
353  *
354  *  return BUS_PROBE_DEFAULT on success, positive on failure
355  *********************************************************************/
356
357 static int
358 igb_probe(device_t dev)
359 {
360         char            adapter_name[256];
361         uint16_t        pci_vendor_id = 0;
362         uint16_t        pci_device_id = 0;
363         uint16_t        pci_subvendor_id = 0;
364         uint16_t        pci_subdevice_id = 0;
365         igb_vendor_info_t *ent;
366
367         INIT_DEBUGOUT("igb_probe: begin");
368
369         pci_vendor_id = pci_get_vendor(dev);
370         if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371                 return (ENXIO);
372
373         pci_device_id = pci_get_device(dev);
374         pci_subvendor_id = pci_get_subvendor(dev);
375         pci_subdevice_id = pci_get_subdevice(dev);
376
377         ent = igb_vendor_info_array;
378         while (ent->vendor_id != 0) {
379                 if ((pci_vendor_id == ent->vendor_id) &&
380                     (pci_device_id == ent->device_id) &&
381
382                     ((pci_subvendor_id == ent->subvendor_id) ||
383                     (ent->subvendor_id == 0)) &&
384
385                     ((pci_subdevice_id == ent->subdevice_id) ||
386                     (ent->subdevice_id == 0))) {
387                         sprintf(adapter_name, "%s, Version - %s",
388                                 igb_strings[ent->index],
389                                 igb_driver_version);
390                         device_set_desc_copy(dev, adapter_name);
391                         return (BUS_PROBE_DEFAULT);
392                 }
393                 ent++;
394         }
395         return (ENXIO);
396 }
397
398 /*********************************************************************
399  *  Device initialization routine
400  *
401  *  The attach entry point is called when the driver is being loaded.
402  *  This routine identifies the type of hardware, allocates all resources
403  *  and initializes the hardware.
404  *
405  *  return 0 on success, positive on failure
406  *********************************************************************/
407
408 static int
409 igb_attach(device_t dev)
410 {
411         struct adapter  *adapter;
412         int             error = 0;
413         u16             eeprom_data;
414
415         INIT_DEBUGOUT("igb_attach: begin");
416
417         if (resource_disabled("igb", device_get_unit(dev))) {
418                 device_printf(dev, "Disabled by device hint\n");
419                 return (ENXIO);
420         }
421
422         adapter = device_get_softc(dev);
423         adapter->dev = adapter->osdep.dev = dev;
424         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425
426         /* SYSCTLs */
427         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430             igb_sysctl_nvm_info, "I", "NVM Information");
431
432         igb_set_sysctl_value(adapter, "enable_aim",
433             "Interrupt Moderation", &adapter->enable_aim,
434             igb_enable_aim);
435
436         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440
441         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442
443         /* Determine hardware and mac info */
444         igb_identify_hardware(adapter);
445
446         /* Setup PCI resources */
447         if (igb_allocate_pci_resources(adapter)) {
448                 device_printf(dev, "Allocation of PCI resources failed\n");
449                 error = ENXIO;
450                 goto err_pci;
451         }
452
453         /* Do Shared Code initialization */
454         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455                 device_printf(dev, "Setup of Shared code failed\n");
456                 error = ENXIO;
457                 goto err_pci;
458         }
459
460         e1000_get_bus_info(&adapter->hw);
461
462         /* Sysctls for limiting the amount of work done in the taskqueues */
463         igb_set_sysctl_value(adapter, "rx_processing_limit",
464             "max number of rx packets to process",
465             &adapter->rx_process_limit, igb_rx_process_limit);
466
467         igb_set_sysctl_value(adapter, "tx_processing_limit",
468             "max number of tx packets to process",
469             &adapter->tx_process_limit, igb_tx_process_limit);
470
471         /*
472          * Validate number of transmit and receive descriptors. It
473          * must not exceed hardware maximum, and must be multiple
474          * of E1000_DBA_ALIGN.
475          */
476         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479                     IGB_DEFAULT_TXD, igb_txd);
480                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
481         } else
482                 adapter->num_tx_desc = igb_txd;
483         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486                     IGB_DEFAULT_RXD, igb_rxd);
487                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
488         } else
489                 adapter->num_rx_desc = igb_rxd;
490
491         adapter->hw.mac.autoneg = DO_AUTO_NEG;
492         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494
495         /* Copper options */
496         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
498                 adapter->hw.phy.disable_polarity_correction = FALSE;
499                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500         }
501
502         /*
503          * Set the frame limits assuming
504          * standard ethernet sized frames.
505          */
506         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507
508         /*
509         ** Allocate and Setup Queues
510         */
511         if (igb_allocate_queues(adapter)) {
512                 error = ENOMEM;
513                 goto err_pci;
514         }
515
516         /* Allocate the appropriate stats memory */
517         if (adapter->vf_ifp) {
518                 adapter->stats =
519                     (struct e1000_vf_stats *)malloc(sizeof \
520                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521                 igb_vf_init_stats(adapter);
522         } else
523                 adapter->stats =
524                     (struct e1000_hw_stats *)malloc(sizeof \
525                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526         if (adapter->stats == NULL) {
527                 device_printf(dev, "Can not allocate stats memory\n");
528                 error = ENOMEM;
529                 goto err_late;
530         }
531
532         /* Allocate multicast array memory. */
533         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535         if (adapter->mta == NULL) {
536                 device_printf(dev, "Can not allocate multicast setup array\n");
537                 error = ENOMEM;
538                 goto err_late;
539         }
540
541         /* Some adapter-specific advanced features */
542         if (adapter->hw.mac.type >= e1000_i350) {
543                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550                     adapter, 0, igb_sysctl_eee, "I",
551                     "Disable Energy Efficient Ethernet");
552                 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553                         if (adapter->hw.mac.type == e1000_i354)
554                                 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
555                         else
556                                 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
557                 }
558         }
559
560         /*
561         ** Start from a known state, this is
562         ** important in reading the nvm and
563         ** mac from that.
564         */
565         e1000_reset_hw(&adapter->hw);
566
567         /* Make sure we have a good EEPROM before we read from it */
568         if (((adapter->hw.mac.type != e1000_i210) &&
569             (adapter->hw.mac.type != e1000_i211)) &&
570             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571                 /*
572                 ** Some PCI-E parts fail the first check due to
573                 ** the link being in sleep state, call it again,
574                 ** if it fails a second time its a real issue.
575                 */
576                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577                         device_printf(dev,
578                             "The EEPROM Checksum Is Not Valid\n");
579                         error = EIO;
580                         goto err_late;
581                 }
582         }
583
584         /*
585         ** Copy the permanent MAC address out of the EEPROM
586         */
587         if (e1000_read_mac_addr(&adapter->hw) < 0) {
588                 device_printf(dev, "EEPROM read error while reading MAC"
589                     " address\n");
590                 error = EIO;
591                 goto err_late;
592         }
593         /* Check its sanity */
594         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595                 device_printf(dev, "Invalid MAC address\n");
596                 error = EIO;
597                 goto err_late;
598         }
599
600         /* Setup OS specific network interface */
601         if (igb_setup_interface(dev, adapter) != 0)
602                 goto err_late;
603
604         /* Now get a good starting state */
605         igb_reset(adapter);
606
607         /* Initialize statistics */
608         igb_update_stats_counters(adapter);
609
610         adapter->hw.mac.get_link_status = 1;
611         igb_update_link_status(adapter);
612
613         /* Indicate SOL/IDER usage */
614         if (e1000_check_reset_block(&adapter->hw))
615                 device_printf(dev,
616                     "PHY reset is blocked due to SOL/IDER session.\n");
617
618         /* Determine if we have to control management hardware */
619         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620
621         /*
622          * Setup Wake-on-Lan
623          */
624         /* APME bit in EEPROM is mapped to WUC.APME */
625         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626         if (eeprom_data)
627                 adapter->wol = E1000_WUFC_MAG;
628
629         /* Register for VLAN events */
630         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634
635         igb_add_hw_stats(adapter);
636
637         /* Tell the stack that the interface is not active */
638         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
640
641         adapter->led_dev = led_create(igb_led_func, adapter,
642             device_get_nameunit(dev));
643
644         /* 
645         ** Configure Interrupts
646         */
647         if ((adapter->msix > 1) && (igb_enable_msix))
648                 error = igb_allocate_msix(adapter);
649         else /* MSI or Legacy */
650                 error = igb_allocate_legacy(adapter);
651         if (error)
652                 goto err_late;
653
654 #ifdef DEV_NETMAP
655         igb_netmap_attach(adapter);
656 #endif /* DEV_NETMAP */
657         INIT_DEBUGOUT("igb_attach: end");
658
659         return (0);
660
661 err_late:
662         if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */
663                 return(error);
664         igb_free_transmit_structures(adapter);
665         igb_free_receive_structures(adapter);
666         igb_release_hw_control(adapter);
667 err_pci:
668         igb_free_pci_resources(adapter);
669         if (adapter->ifp != NULL)
670                 if_free(adapter->ifp);
671         free(adapter->mta, M_DEVBUF);
672         IGB_CORE_LOCK_DESTROY(adapter);
673
674         return (error);
675 }
676
677 /*********************************************************************
678  *  Device removal routine
679  *
680  *  The detach entry point is called when the driver is being removed.
681  *  This routine stops the adapter and deallocates all the resources
682  *  that were allocated for driver operation.
683  *
684  *  return 0 on success, positive on failure
685  *********************************************************************/
686
687 static int
688 igb_detach(device_t dev)
689 {
690         struct adapter  *adapter = device_get_softc(dev);
691         struct ifnet    *ifp = adapter->ifp;
692
693         INIT_DEBUGOUT("igb_detach: begin");
694
695         /* Make sure VLANS are not using driver */
696         if (adapter->ifp->if_vlantrunk != NULL) {
697                 device_printf(dev,"Vlan in use, detach first\n");
698                 return (EBUSY);
699         }
700
701         ether_ifdetach(adapter->ifp);
702
703         if (adapter->led_dev != NULL)
704                 led_destroy(adapter->led_dev);
705
706 #ifdef DEVICE_POLLING
707         if (ifp->if_capenable & IFCAP_POLLING)
708                 ether_poll_deregister(ifp);
709 #endif
710
711         IGB_CORE_LOCK(adapter);
712         adapter->in_detach = 1;
713         igb_stop(adapter);
714         IGB_CORE_UNLOCK(adapter);
715
716         e1000_phy_hw_reset(&adapter->hw);
717
718         /* Give control back to firmware */
719         igb_release_manageability(adapter);
720         igb_release_hw_control(adapter);
721
722         if (adapter->wol) {
723                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
724                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
725                 igb_enable_wakeup(dev);
726         }
727
728         /* Unregister VLAN events */
729         if (adapter->vlan_attach != NULL)
730                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
731         if (adapter->vlan_detach != NULL)
732                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
733
734         callout_drain(&adapter->timer);
735
736 #ifdef DEV_NETMAP
737         netmap_detach(adapter->ifp);
738 #endif /* DEV_NETMAP */
739         igb_free_pci_resources(adapter);
740         bus_generic_detach(dev);
741         if_free(ifp);
742
743         igb_free_transmit_structures(adapter);
744         igb_free_receive_structures(adapter);
745         if (adapter->mta != NULL)
746                 free(adapter->mta, M_DEVBUF);
747
748         IGB_CORE_LOCK_DESTROY(adapter);
749
750         return (0);
751 }
752
753 /*********************************************************************
754  *
755  *  Shutdown entry point
756  *
757  **********************************************************************/
758
759 static int
760 igb_shutdown(device_t dev)
761 {
762         return igb_suspend(dev);
763 }
764
765 /*
766  * Suspend/resume device methods.
767  */
768 static int
769 igb_suspend(device_t dev)
770 {
771         struct adapter *adapter = device_get_softc(dev);
772
773         IGB_CORE_LOCK(adapter);
774
775         igb_stop(adapter);
776
777         igb_release_manageability(adapter);
778         igb_release_hw_control(adapter);
779
780         if (adapter->wol) {
781                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
782                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
783                 igb_enable_wakeup(dev);
784         }
785
786         IGB_CORE_UNLOCK(adapter);
787
788         return bus_generic_suspend(dev);
789 }
790
791 static int
792 igb_resume(device_t dev)
793 {
794         struct adapter *adapter = device_get_softc(dev);
795         struct tx_ring  *txr = adapter->tx_rings;
796         struct ifnet *ifp = adapter->ifp;
797
798         IGB_CORE_LOCK(adapter);
799         igb_init_locked(adapter);
800         igb_init_manageability(adapter);
801
802         if ((ifp->if_flags & IFF_UP) &&
803             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
804                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
805                         IGB_TX_LOCK(txr);
806 #ifndef IGB_LEGACY_TX
807                         /* Process the stack queue only if not depleted */
808                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
809                             !drbr_empty(ifp, txr->br))
810                                 igb_mq_start_locked(ifp, txr);
811 #else
812                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
813                                 igb_start_locked(txr, ifp);
814 #endif
815                         IGB_TX_UNLOCK(txr);
816                 }
817         }
818         IGB_CORE_UNLOCK(adapter);
819
820         return bus_generic_resume(dev);
821 }
822
823
824 #ifdef IGB_LEGACY_TX
825
826 /*********************************************************************
827  *  Transmit entry point
828  *
829  *  igb_start is called by the stack to initiate a transmit.
830  *  The driver will remain in this routine as long as there are
831  *  packets to transmit and transmit resources are available.
832  *  In case resources are not available stack is notified and
833  *  the packet is requeued.
834  **********************************************************************/
835
836 static void
837 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
838 {
839         struct adapter  *adapter = ifp->if_softc;
840         struct mbuf     *m_head;
841
842         IGB_TX_LOCK_ASSERT(txr);
843
844         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
845             IFF_DRV_RUNNING)
846                 return;
847         if (!adapter->link_active)
848                 return;
849
850         /* Call cleanup if number of TX descriptors low */
851         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
852                 igb_txeof(txr);
853
854         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
855                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
856                         txr->queue_status |= IGB_QUEUE_DEPLETED;
857                         break;
858                 }
859                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
860                 if (m_head == NULL)
861                         break;
862                 /*
863                  *  Encapsulation can modify our pointer, and or make it
864                  *  NULL on failure.  In that event, we can't requeue.
865                  */
866                 if (igb_xmit(txr, &m_head)) {
867                         if (m_head != NULL)
868                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
869                         if (txr->tx_avail <= IGB_MAX_SCATTER)
870                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
871                         break;
872                 }
873
874                 /* Send a copy of the frame to the BPF listener */
875                 ETHER_BPF_MTAP(ifp, m_head);
876
877                 /* Set watchdog on */
878                 txr->watchdog_time = ticks;
879                 txr->queue_status |= IGB_QUEUE_WORKING;
880         }
881 }
882  
883 /*
884  * Legacy TX driver routine, called from the
885  * stack, always uses tx[0], and spins for it.
886  * Should not be used with multiqueue tx
887  */
888 static void
889 igb_start(struct ifnet *ifp)
890 {
891         struct adapter  *adapter = ifp->if_softc;
892         struct tx_ring  *txr = adapter->tx_rings;
893
894         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
895                 IGB_TX_LOCK(txr);
896                 igb_start_locked(txr, ifp);
897                 IGB_TX_UNLOCK(txr);
898         }
899         return;
900 }
901
902 #else /* ~IGB_LEGACY_TX */
903
904 /*
905 ** Multiqueue Transmit Entry:
906 **  quick turnaround to the stack
907 **
908 */
909 static int
910 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
911 {
912         struct adapter          *adapter = ifp->if_softc;
913         struct igb_queue        *que;
914         struct tx_ring          *txr;
915         int                     i, err = 0;
916 #ifdef  RSS
917         uint32_t                bucket_id;
918 #endif
919
920         /* Which queue to use */
921         /*
922          * When doing RSS, map it to the same outbound queue
923          * as the incoming flow would be mapped to.
924          *
925          * If everything is setup correctly, it should be the
926          * same bucket that the current CPU we're on is.
927          */
928         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
929 #ifdef  RSS
930                 if (rss_hash2bucket(m->m_pkthdr.flowid,
931                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
932                         /* XXX TODO: spit out something if bucket_id > num_queues? */
933                         i = bucket_id % adapter->num_queues;
934                 } else {
935 #endif
936                         i = m->m_pkthdr.flowid % adapter->num_queues;
937 #ifdef  RSS
938                 }
939 #endif
940         } else {
941                 i = curcpu % adapter->num_queues;
942         }
943         txr = &adapter->tx_rings[i];
944         que = &adapter->queues[i];
945
946         err = drbr_enqueue(ifp, txr->br, m);
947         if (err)
948                 return (err);
949         if (IGB_TX_TRYLOCK(txr)) {
950                 igb_mq_start_locked(ifp, txr);
951                 IGB_TX_UNLOCK(txr);
952         } else
953                 taskqueue_enqueue(que->tq, &txr->txq_task);
954
955         return (0);
956 }
957
958 static int
959 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
960 {
961         struct adapter  *adapter = txr->adapter;
962         struct mbuf     *next;
963         int             err = 0, enq = 0;
964
965         IGB_TX_LOCK_ASSERT(txr);
966
967         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
968             adapter->link_active == 0)
969                 return (ENETDOWN);
970
971         /* Process the queue */
972         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
973                 if ((err = igb_xmit(txr, &next)) != 0) {
974                         if (next == NULL) {
975                                 /* It was freed, move forward */
976                                 drbr_advance(ifp, txr->br);
977                         } else {
978                                 /* 
979                                  * Still have one left, it may not be
980                                  * the same since the transmit function
981                                  * may have changed it.
982                                  */
983                                 drbr_putback(ifp, txr->br, next);
984                         }
985                         break;
986                 }
987                 drbr_advance(ifp, txr->br);
988                 enq++;
989                 if (next->m_flags & M_MCAST && adapter->vf_ifp)
990                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
991                 ETHER_BPF_MTAP(ifp, next);
992                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
993                         break;
994         }
995         if (enq > 0) {
996                 /* Set the watchdog */
997                 txr->queue_status |= IGB_QUEUE_WORKING;
998                 txr->watchdog_time = ticks;
999         }
1000         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1001                 igb_txeof(txr);
1002         if (txr->tx_avail <= IGB_MAX_SCATTER)
1003                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1004         return (err);
1005 }
1006
1007 /*
1008  * Called from a taskqueue to drain queued transmit packets.
1009  */
1010 static void
1011 igb_deferred_mq_start(void *arg, int pending)
1012 {
1013         struct tx_ring *txr = arg;
1014         struct adapter *adapter = txr->adapter;
1015         struct ifnet *ifp = adapter->ifp;
1016
1017         IGB_TX_LOCK(txr);
1018         if (!drbr_empty(ifp, txr->br))
1019                 igb_mq_start_locked(ifp, txr);
1020         IGB_TX_UNLOCK(txr);
1021 }
1022
1023 /*
1024 ** Flush all ring buffers
1025 */
1026 static void
1027 igb_qflush(struct ifnet *ifp)
1028 {
1029         struct adapter  *adapter = ifp->if_softc;
1030         struct tx_ring  *txr = adapter->tx_rings;
1031         struct mbuf     *m;
1032
1033         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1034                 IGB_TX_LOCK(txr);
1035                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1036                         m_freem(m);
1037                 IGB_TX_UNLOCK(txr);
1038         }
1039         if_qflush(ifp);
1040 }
1041 #endif /* ~IGB_LEGACY_TX */
1042
1043 /*********************************************************************
1044  *  Ioctl entry point
1045  *
1046  *  igb_ioctl is called when the user wants to configure the
1047  *  interface.
1048  *
1049  *  return 0 on success, positive on failure
1050  **********************************************************************/
1051
1052 static int
1053 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1054 {
1055         struct adapter  *adapter = ifp->if_softc;
1056         struct ifreq    *ifr = (struct ifreq *)data;
1057 #if defined(INET) || defined(INET6)
1058         struct ifaddr   *ifa = (struct ifaddr *)data;
1059 #endif
1060         bool            avoid_reset = FALSE;
1061         int             error = 0;
1062
1063         if (adapter->in_detach)
1064                 return (error);
1065
1066         switch (command) {
1067         case SIOCSIFADDR:
1068 #ifdef INET
1069                 if (ifa->ifa_addr->sa_family == AF_INET)
1070                         avoid_reset = TRUE;
1071 #endif
1072 #ifdef INET6
1073                 if (ifa->ifa_addr->sa_family == AF_INET6)
1074                         avoid_reset = TRUE;
1075 #endif
1076                 /*
1077                 ** Calling init results in link renegotiation,
1078                 ** so we avoid doing it when possible.
1079                 */
1080                 if (avoid_reset) {
1081                         ifp->if_flags |= IFF_UP;
1082                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1083                                 igb_init(adapter);
1084 #ifdef INET
1085                         if (!(ifp->if_flags & IFF_NOARP))
1086                                 arp_ifinit(ifp, ifa);
1087 #endif
1088                 } else
1089                         error = ether_ioctl(ifp, command, data);
1090                 break;
1091         case SIOCSIFMTU:
1092             {
1093                 int max_frame_size;
1094
1095                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1096
1097                 IGB_CORE_LOCK(adapter);
1098                 max_frame_size = 9234;
1099                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1100                     ETHER_CRC_LEN) {
1101                         IGB_CORE_UNLOCK(adapter);
1102                         error = EINVAL;
1103                         break;
1104                 }
1105
1106                 ifp->if_mtu = ifr->ifr_mtu;
1107                 adapter->max_frame_size =
1108                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1109                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1110                         igb_init_locked(adapter);
1111                 IGB_CORE_UNLOCK(adapter);
1112                 break;
1113             }
1114         case SIOCSIFFLAGS:
1115                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1116                     SIOCSIFFLAGS (Set Interface Flags)");
1117                 IGB_CORE_LOCK(adapter);
1118                 if (ifp->if_flags & IFF_UP) {
1119                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1120                                 if ((ifp->if_flags ^ adapter->if_flags) &
1121                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1122                                         igb_disable_promisc(adapter);
1123                                         igb_set_promisc(adapter);
1124                                 }
1125                         } else
1126                                 igb_init_locked(adapter);
1127                 } else
1128                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1129                                 igb_stop(adapter);
1130                 adapter->if_flags = ifp->if_flags;
1131                 IGB_CORE_UNLOCK(adapter);
1132                 break;
1133         case SIOCADDMULTI:
1134         case SIOCDELMULTI:
1135                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1136                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1137                         IGB_CORE_LOCK(adapter);
1138                         igb_disable_intr(adapter);
1139                         igb_set_multi(adapter);
1140 #ifdef DEVICE_POLLING
1141                         if (!(ifp->if_capenable & IFCAP_POLLING))
1142 #endif
1143                                 igb_enable_intr(adapter);
1144                         IGB_CORE_UNLOCK(adapter);
1145                 }
1146                 break;
1147         case SIOCSIFMEDIA:
1148                 /* Check SOL/IDER usage */
1149                 IGB_CORE_LOCK(adapter);
1150                 if (e1000_check_reset_block(&adapter->hw)) {
1151                         IGB_CORE_UNLOCK(adapter);
1152                         device_printf(adapter->dev, "Media change is"
1153                             " blocked due to SOL/IDER session.\n");
1154                         break;
1155                 }
1156                 IGB_CORE_UNLOCK(adapter);
1157         case SIOCGIFMEDIA:
1158                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1159                     SIOCxIFMEDIA (Get/Set Interface Media)");
1160                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1161                 break;
1162         case SIOCSIFCAP:
1163             {
1164                 int mask, reinit;
1165
1166                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1167                 reinit = 0;
1168                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1169 #ifdef DEVICE_POLLING
1170                 if (mask & IFCAP_POLLING) {
1171                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1172                                 error = ether_poll_register(igb_poll, ifp);
1173                                 if (error)
1174                                         return (error);
1175                                 IGB_CORE_LOCK(adapter);
1176                                 igb_disable_intr(adapter);
1177                                 ifp->if_capenable |= IFCAP_POLLING;
1178                                 IGB_CORE_UNLOCK(adapter);
1179                         } else {
1180                                 error = ether_poll_deregister(ifp);
1181                                 /* Enable interrupt even in error case */
1182                                 IGB_CORE_LOCK(adapter);
1183                                 igb_enable_intr(adapter);
1184                                 ifp->if_capenable &= ~IFCAP_POLLING;
1185                                 IGB_CORE_UNLOCK(adapter);
1186                         }
1187                 }
1188 #endif
1189 #if __FreeBSD_version >= 1000000
1190                 /* HW cannot turn these on/off separately */
1191                 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1192                         ifp->if_capenable ^= IFCAP_RXCSUM;
1193                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1194                         reinit = 1;
1195                 }
1196                 if (mask & IFCAP_TXCSUM) {
1197                         ifp->if_capenable ^= IFCAP_TXCSUM;
1198                         reinit = 1;
1199                 }
1200                 if (mask & IFCAP_TXCSUM_IPV6) {
1201                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1202                         reinit = 1;
1203                 }
1204 #else
1205                 if (mask & IFCAP_HWCSUM) {
1206                         ifp->if_capenable ^= IFCAP_HWCSUM;
1207                         reinit = 1;
1208                 }
1209 #endif
1210                 if (mask & IFCAP_TSO4) {
1211                         ifp->if_capenable ^= IFCAP_TSO4;
1212                         reinit = 1;
1213                 }
1214                 if (mask & IFCAP_TSO6) {
1215                         ifp->if_capenable ^= IFCAP_TSO6;
1216                         reinit = 1;
1217                 }
1218                 if (mask & IFCAP_VLAN_HWTAGGING) {
1219                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1220                         reinit = 1;
1221                 }
1222                 if (mask & IFCAP_VLAN_HWFILTER) {
1223                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1224                         reinit = 1;
1225                 }
1226                 if (mask & IFCAP_VLAN_HWTSO) {
1227                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1228                         reinit = 1;
1229                 }
1230                 if (mask & IFCAP_LRO) {
1231                         ifp->if_capenable ^= IFCAP_LRO;
1232                         reinit = 1;
1233                 }
1234                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1235                         igb_init(adapter);
1236                 VLAN_CAPABILITIES(ifp);
1237                 break;
1238             }
1239
1240         default:
1241                 error = ether_ioctl(ifp, command, data);
1242                 break;
1243         }
1244
1245         return (error);
1246 }
1247
1248
1249 /*********************************************************************
1250  *  Init entry point
1251  *
1252  *  This routine is used in two ways. It is used by the stack as
1253  *  init entry point in network interface structure. It is also used
1254  *  by the driver as a hw/sw initialization routine to get to a
1255  *  consistent state.
1256  *
1257  *  return 0 on success, positive on failure
1258  **********************************************************************/
1259
1260 static void
1261 igb_init_locked(struct adapter *adapter)
1262 {
1263         struct ifnet    *ifp = adapter->ifp;
1264         device_t        dev = adapter->dev;
1265
1266         INIT_DEBUGOUT("igb_init: begin");
1267
1268         IGB_CORE_LOCK_ASSERT(adapter);
1269
1270         igb_disable_intr(adapter);
1271         callout_stop(&adapter->timer);
1272
1273         /* Get the latest mac address, User can use a LAA */
1274         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1275               ETHER_ADDR_LEN);
1276
1277         /* Put the address into the Receive Address Array */
1278         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1279
1280         igb_reset(adapter);
1281         igb_update_link_status(adapter);
1282
1283         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1284
1285         /* Set hardware offload abilities */
1286         ifp->if_hwassist = 0;
1287         if (ifp->if_capenable & IFCAP_TXCSUM) {
1288 #if __FreeBSD_version >= 1000000
1289                 ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
1290                 if (adapter->hw.mac.type != e1000_82575)
1291                         ifp->if_hwassist |= CSUM_IP_SCTP;
1292 #else
1293                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1294 #if __FreeBSD_version >= 800000
1295                 if (adapter->hw.mac.type != e1000_82575)
1296                         ifp->if_hwassist |= CSUM_SCTP;
1297 #endif
1298 #endif
1299         }
1300
1301 #if __FreeBSD_version >= 1000000
1302         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
1303                 ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
1304                 if (adapter->hw.mac.type != e1000_82575)
1305                         ifp->if_hwassist |= CSUM_IP6_SCTP;
1306         }
1307 #endif
1308         if (ifp->if_capenable & IFCAP_TSO)
1309                 ifp->if_hwassist |= CSUM_TSO;
1310
1311         /* Clear bad data from Rx FIFOs */
1312         e1000_rx_fifo_flush_82575(&adapter->hw);
1313
1314         /* Configure for OS presence */
1315         igb_init_manageability(adapter);
1316
1317         /* Prepare transmit descriptors and buffers */
1318         igb_setup_transmit_structures(adapter);
1319         igb_initialize_transmit_units(adapter);
1320
1321         /* Setup Multicast table */
1322         igb_set_multi(adapter);
1323
1324         /*
1325         ** Figure out the desired mbuf pool
1326         ** for doing jumbo/packetsplit
1327         */
1328         if (adapter->max_frame_size <= 2048)
1329                 adapter->rx_mbuf_sz = MCLBYTES;
1330         else if (adapter->max_frame_size <= 4096)
1331                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1332         else
1333                 adapter->rx_mbuf_sz = MJUM9BYTES;
1334
1335         /* Prepare receive descriptors and buffers */
1336         if (igb_setup_receive_structures(adapter)) {
1337                 device_printf(dev, "Could not setup receive structures\n");
1338                 return;
1339         }
1340         igb_initialize_receive_units(adapter);
1341
1342         /* Enable VLAN support */
1343         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1344                 igb_setup_vlan_hw_support(adapter);
1345                                 
1346         /* Don't lose promiscuous settings */
1347         igb_set_promisc(adapter);
1348
1349         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1350         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1351
1352         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1353         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1354
1355         if (adapter->msix > 1) /* Set up queue routing */
1356                 igb_configure_queues(adapter);
1357
1358         /* this clears any pending interrupts */
1359         E1000_READ_REG(&adapter->hw, E1000_ICR);
1360 #ifdef DEVICE_POLLING
1361         /*
1362          * Only enable interrupts if we are not polling, make sure
1363          * they are off otherwise.
1364          */
1365         if (ifp->if_capenable & IFCAP_POLLING)
1366                 igb_disable_intr(adapter);
1367         else
1368 #endif /* DEVICE_POLLING */
1369         {
1370                 igb_enable_intr(adapter);
1371                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1372         }
1373
1374         /* Set Energy Efficient Ethernet */
1375         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1376                 if (adapter->hw.mac.type == e1000_i354)
1377                         e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1378                 else
1379                         e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1380         }
1381 }
1382
1383 static void
1384 igb_init(void *arg)
1385 {
1386         struct adapter *adapter = arg;
1387
1388         IGB_CORE_LOCK(adapter);
1389         igb_init_locked(adapter);
1390         IGB_CORE_UNLOCK(adapter);
1391 }
1392
1393
1394 static void
1395 igb_handle_que(void *context, int pending)
1396 {
1397         struct igb_queue *que = context;
1398         struct adapter *adapter = que->adapter;
1399         struct tx_ring *txr = que->txr;
1400         struct ifnet    *ifp = adapter->ifp;
1401
1402         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1403                 bool    more;
1404
1405                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1406
1407                 IGB_TX_LOCK(txr);
1408                 igb_txeof(txr);
1409 #ifndef IGB_LEGACY_TX
1410                 /* Process the stack queue only if not depleted */
1411                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1412                     !drbr_empty(ifp, txr->br))
1413                         igb_mq_start_locked(ifp, txr);
1414 #else
1415                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1416                         igb_start_locked(txr, ifp);
1417 #endif
1418                 IGB_TX_UNLOCK(txr);
1419                 /* Do we need another? */
1420                 if (more) {
1421                         taskqueue_enqueue(que->tq, &que->que_task);
1422                         return;
1423                 }
1424         }
1425
1426 #ifdef DEVICE_POLLING
1427         if (ifp->if_capenable & IFCAP_POLLING)
1428                 return;
1429 #endif
1430         /* Reenable this interrupt */
1431         if (que->eims)
1432                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1433         else
1434                 igb_enable_intr(adapter);
1435 }
1436
1437 /* Deal with link in a sleepable context */
1438 static void
1439 igb_handle_link(void *context, int pending)
1440 {
1441         struct adapter *adapter = context;
1442
1443         IGB_CORE_LOCK(adapter);
1444         igb_handle_link_locked(adapter);
1445         IGB_CORE_UNLOCK(adapter);
1446 }
1447
1448 static void
1449 igb_handle_link_locked(struct adapter *adapter)
1450 {
1451         struct tx_ring  *txr = adapter->tx_rings;
1452         struct ifnet *ifp = adapter->ifp;
1453
1454         IGB_CORE_LOCK_ASSERT(adapter);
1455         adapter->hw.mac.get_link_status = 1;
1456         igb_update_link_status(adapter);
1457         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1458                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1459                         IGB_TX_LOCK(txr);
1460 #ifndef IGB_LEGACY_TX
1461                         /* Process the stack queue only if not depleted */
1462                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1463                             !drbr_empty(ifp, txr->br))
1464                                 igb_mq_start_locked(ifp, txr);
1465 #else
1466                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1467                                 igb_start_locked(txr, ifp);
1468 #endif
1469                         IGB_TX_UNLOCK(txr);
1470                 }
1471         }
1472 }
1473
1474 /*********************************************************************
1475  *
1476  *  MSI/Legacy Deferred
1477  *  Interrupt Service routine  
1478  *
1479  *********************************************************************/
1480 static int
1481 igb_irq_fast(void *arg)
1482 {
1483         struct adapter          *adapter = arg;
1484         struct igb_queue        *que = adapter->queues;
1485         u32                     reg_icr;
1486
1487
1488         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1489
1490         /* Hot eject?  */
1491         if (reg_icr == 0xffffffff)
1492                 return FILTER_STRAY;
1493
1494         /* Definitely not our interrupt.  */
1495         if (reg_icr == 0x0)
1496                 return FILTER_STRAY;
1497
1498         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1499                 return FILTER_STRAY;
1500
1501         /*
1502          * Mask interrupts until the taskqueue is finished running.  This is
1503          * cheap, just assume that it is needed.  This also works around the
1504          * MSI message reordering errata on certain systems.
1505          */
1506         igb_disable_intr(adapter);
1507         taskqueue_enqueue(que->tq, &que->que_task);
1508
1509         /* Link status change */
1510         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1511                 taskqueue_enqueue(que->tq, &adapter->link_task);
1512
1513         if (reg_icr & E1000_ICR_RXO)
1514                 adapter->rx_overruns++;
1515         return FILTER_HANDLED;
1516 }
1517
1518 #ifdef DEVICE_POLLING
1519 #if __FreeBSD_version >= 800000
1520 #define POLL_RETURN_COUNT(a) (a)
1521 static int
1522 #else
1523 #define POLL_RETURN_COUNT(a)
1524 static void
1525 #endif
1526 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1527 {
1528         struct adapter          *adapter = ifp->if_softc;
1529         struct igb_queue        *que;
1530         struct tx_ring          *txr;
1531         u32                     reg_icr, rx_done = 0;
1532         u32                     loop = IGB_MAX_LOOP;
1533         bool                    more;
1534
1535         IGB_CORE_LOCK(adapter);
1536         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1537                 IGB_CORE_UNLOCK(adapter);
1538                 return POLL_RETURN_COUNT(rx_done);
1539         }
1540
1541         if (cmd == POLL_AND_CHECK_STATUS) {
1542                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1543                 /* Link status change */
1544                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1545                         igb_handle_link_locked(adapter);
1546
1547                 if (reg_icr & E1000_ICR_RXO)
1548                         adapter->rx_overruns++;
1549         }
1550         IGB_CORE_UNLOCK(adapter);
1551
1552         for (int i = 0; i < adapter->num_queues; i++) {
1553                 que = &adapter->queues[i];
1554                 txr = que->txr;
1555
1556                 igb_rxeof(que, count, &rx_done);
1557
1558                 IGB_TX_LOCK(txr);
1559                 do {
1560                         more = igb_txeof(txr);
1561                 } while (loop-- && more);
1562 #ifndef IGB_LEGACY_TX
1563                 if (!drbr_empty(ifp, txr->br))
1564                         igb_mq_start_locked(ifp, txr);
1565 #else
1566                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1567                         igb_start_locked(txr, ifp);
1568 #endif
1569                 IGB_TX_UNLOCK(txr);
1570         }
1571
1572         return POLL_RETURN_COUNT(rx_done);
1573 }
1574 #endif /* DEVICE_POLLING */
1575
1576 /*********************************************************************
1577  *
1578  *  MSIX Que Interrupt Service routine
1579  *
1580  **********************************************************************/
1581 static void
1582 igb_msix_que(void *arg)
1583 {
1584         struct igb_queue *que = arg;
1585         struct adapter *adapter = que->adapter;
1586         struct ifnet   *ifp = adapter->ifp;
1587         struct tx_ring *txr = que->txr;
1588         struct rx_ring *rxr = que->rxr;
1589         u32             newitr = 0;
1590         bool            more_rx;
1591
1592         /* Ignore spurious interrupts */
1593         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1594                 return;
1595
1596         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1597         ++que->irqs;
1598
1599         IGB_TX_LOCK(txr);
1600         igb_txeof(txr);
1601 #ifndef IGB_LEGACY_TX
1602         /* Process the stack queue only if not depleted */
1603         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1604             !drbr_empty(ifp, txr->br))
1605                 igb_mq_start_locked(ifp, txr);
1606 #else
1607         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1608                 igb_start_locked(txr, ifp);
1609 #endif
1610         IGB_TX_UNLOCK(txr);
1611
1612         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1613
1614         if (adapter->enable_aim == FALSE)
1615                 goto no_calc;
1616         /*
1617         ** Do Adaptive Interrupt Moderation:
1618         **  - Write out last calculated setting
1619         **  - Calculate based on average size over
1620         **    the last interval.
1621         */
1622         if (que->eitr_setting)
1623                 E1000_WRITE_REG(&adapter->hw,
1624                     E1000_EITR(que->msix), que->eitr_setting);
1625  
1626         que->eitr_setting = 0;
1627
1628         /* Idle, do nothing */
1629         if ((txr->bytes == 0) && (rxr->bytes == 0))
1630                 goto no_calc;
1631                                 
1632         /* Used half Default if sub-gig */
1633         if (adapter->link_speed != 1000)
1634                 newitr = IGB_DEFAULT_ITR / 2;
1635         else {
1636                 if ((txr->bytes) && (txr->packets))
1637                         newitr = txr->bytes/txr->packets;
1638                 if ((rxr->bytes) && (rxr->packets))
1639                         newitr = max(newitr,
1640                             (rxr->bytes / rxr->packets));
1641                 newitr += 24; /* account for hardware frame, crc */
1642                 /* set an upper boundary */
1643                 newitr = min(newitr, 3000);
1644                 /* Be nice to the mid range */
1645                 if ((newitr > 300) && (newitr < 1200))
1646                         newitr = (newitr / 3);
1647                 else
1648                         newitr = (newitr / 2);
1649         }
1650         newitr &= 0x7FFC;  /* Mask invalid bits */
1651         if (adapter->hw.mac.type == e1000_82575)
1652                 newitr |= newitr << 16;
1653         else
1654                 newitr |= E1000_EITR_CNT_IGNR;
1655                  
1656         /* save for next interrupt */
1657         que->eitr_setting = newitr;
1658
1659         /* Reset state */
1660         txr->bytes = 0;
1661         txr->packets = 0;
1662         rxr->bytes = 0;
1663         rxr->packets = 0;
1664
1665 no_calc:
1666         /* Schedule a clean task if needed*/
1667         if (more_rx)
1668                 taskqueue_enqueue(que->tq, &que->que_task);
1669         else
1670                 /* Reenable this interrupt */
1671                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1672         return;
1673 }
1674
1675
1676 /*********************************************************************
1677  *
1678  *  MSIX Link Interrupt Service routine
1679  *
1680  **********************************************************************/
1681
1682 static void
1683 igb_msix_link(void *arg)
1684 {
1685         struct adapter  *adapter = arg;
1686         u32             icr;
1687
1688         ++adapter->link_irq;
1689         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1690         if (!(icr & E1000_ICR_LSC))
1691                 goto spurious;
1692         igb_handle_link(adapter, 0);
1693
1694 spurious:
1695         /* Rearm */
1696         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1697         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1698         return;
1699 }
1700
1701
1702 /*********************************************************************
1703  *
1704  *  Media Ioctl callback
1705  *
1706  *  This routine is called whenever the user queries the status of
1707  *  the interface using ifconfig.
1708  *
1709  **********************************************************************/
1710 static void
1711 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1712 {
1713         struct adapter *adapter = ifp->if_softc;
1714
1715         INIT_DEBUGOUT("igb_media_status: begin");
1716
1717         IGB_CORE_LOCK(adapter);
1718         igb_update_link_status(adapter);
1719
1720         ifmr->ifm_status = IFM_AVALID;
1721         ifmr->ifm_active = IFM_ETHER;
1722
1723         if (!adapter->link_active) {
1724                 IGB_CORE_UNLOCK(adapter);
1725                 return;
1726         }
1727
1728         ifmr->ifm_status |= IFM_ACTIVE;
1729
1730         switch (adapter->link_speed) {
1731         case 10:
1732                 ifmr->ifm_active |= IFM_10_T;
1733                 break;
1734         case 100:
1735                 /*
1736                 ** Support for 100Mb SFP - these are Fiber 
1737                 ** but the media type appears as serdes
1738                 */
1739                 if (adapter->hw.phy.media_type ==
1740                     e1000_media_type_internal_serdes)
1741                         ifmr->ifm_active |= IFM_100_FX;
1742                 else
1743                         ifmr->ifm_active |= IFM_100_TX;
1744                 break;
1745         case 1000:
1746                 ifmr->ifm_active |= IFM_1000_T;
1747                 break;
1748         case 2500:
1749                 ifmr->ifm_active |= IFM_2500_SX;
1750                 break;
1751         }
1752
1753         if (adapter->link_duplex == FULL_DUPLEX)
1754                 ifmr->ifm_active |= IFM_FDX;
1755         else
1756                 ifmr->ifm_active |= IFM_HDX;
1757
1758         IGB_CORE_UNLOCK(adapter);
1759 }
1760
1761 /*********************************************************************
1762  *
1763  *  Media Ioctl callback
1764  *
1765  *  This routine is called when the user changes speed/duplex using
1766  *  media/mediopt option with ifconfig.
1767  *
1768  **********************************************************************/
1769 static int
1770 igb_media_change(struct ifnet *ifp)
1771 {
1772         struct adapter *adapter = ifp->if_softc;
1773         struct ifmedia  *ifm = &adapter->media;
1774
1775         INIT_DEBUGOUT("igb_media_change: begin");
1776
1777         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1778                 return (EINVAL);
1779
1780         IGB_CORE_LOCK(adapter);
1781         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1782         case IFM_AUTO:
1783                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1784                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1785                 break;
1786         case IFM_1000_LX:
1787         case IFM_1000_SX:
1788         case IFM_1000_T:
1789                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1790                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1791                 break;
1792         case IFM_100_TX:
1793                 adapter->hw.mac.autoneg = FALSE;
1794                 adapter->hw.phy.autoneg_advertised = 0;
1795                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1796                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1797                 else
1798                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1799                 break;
1800         case IFM_10_T:
1801                 adapter->hw.mac.autoneg = FALSE;
1802                 adapter->hw.phy.autoneg_advertised = 0;
1803                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1804                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1805                 else
1806                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1807                 break;
1808         default:
1809                 device_printf(adapter->dev, "Unsupported media type\n");
1810         }
1811
1812         igb_init_locked(adapter);
1813         IGB_CORE_UNLOCK(adapter);
1814
1815         return (0);
1816 }
1817
1818
1819 /*********************************************************************
1820  *
1821  *  This routine maps the mbufs to Advanced TX descriptors.
1822  *  
1823  **********************************************************************/
1824 static int
1825 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1826 {
1827         struct adapter  *adapter = txr->adapter;
1828         u32             olinfo_status = 0, cmd_type_len;
1829         int             i, j, error, nsegs;
1830         int             first;
1831         bool            remap = TRUE;
1832         struct mbuf     *m_head;
1833         bus_dma_segment_t segs[IGB_MAX_SCATTER];
1834         bus_dmamap_t    map;
1835         struct igb_tx_buf *txbuf;
1836         union e1000_adv_tx_desc *txd = NULL;
1837
1838         m_head = *m_headp;
1839
1840         /* Basic descriptor defines */
1841         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1842             E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1843
1844         if (m_head->m_flags & M_VLANTAG)
1845                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1846
1847         /*
1848          * Important to capture the first descriptor
1849          * used because it will contain the index of
1850          * the one we tell the hardware to report back
1851          */
1852         first = txr->next_avail_desc;
1853         txbuf = &txr->tx_buffers[first];
1854         map = txbuf->map;
1855
1856         /*
1857          * Map the packet for DMA.
1858          */
1859 retry:
1860         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1861             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1862
1863         if (__predict_false(error)) {
1864                 struct mbuf *m;
1865
1866                 switch (error) {
1867                 case EFBIG:
1868                         /* Try it again? - one try */
1869                         if (remap == TRUE) {
1870                                 remap = FALSE;
1871                                 m = m_collapse(*m_headp, M_NOWAIT,
1872                                     IGB_MAX_SCATTER);
1873                                 if (m == NULL) {
1874                                         adapter->mbuf_defrag_failed++;
1875                                         m_freem(*m_headp);
1876                                         *m_headp = NULL;
1877                                         return (ENOBUFS);
1878                                 }
1879                                 *m_headp = m;
1880                                 goto retry;
1881                         } else
1882                                 return (error);
1883                 default:
1884                         txr->no_tx_dma_setup++;
1885                         m_freem(*m_headp);
1886                         *m_headp = NULL;
1887                         return (error);
1888                 }
1889         }
1890
1891         /* Make certain there are enough descriptors */
1892         if (txr->tx_avail < (nsegs + 2)) {
1893                 txr->no_desc_avail++;
1894                 bus_dmamap_unload(txr->txtag, map);
1895                 return (ENOBUFS);
1896         }
1897         m_head = *m_headp;
1898
1899         /*
1900         ** Set up the appropriate offload context
1901         ** this will consume the first descriptor
1902         */
1903         error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1904         if (__predict_false(error)) {
1905                 m_freem(*m_headp);
1906                 *m_headp = NULL;
1907                 return (error);
1908         }
1909
1910         /* 82575 needs the queue index added */
1911         if (adapter->hw.mac.type == e1000_82575)
1912                 olinfo_status |= txr->me << 4;
1913
1914         i = txr->next_avail_desc;
1915         for (j = 0; j < nsegs; j++) {
1916                 bus_size_t seglen;
1917                 bus_addr_t segaddr;
1918
1919                 txbuf = &txr->tx_buffers[i];
1920                 txd = &txr->tx_base[i];
1921                 seglen = segs[j].ds_len;
1922                 segaddr = htole64(segs[j].ds_addr);
1923
1924                 txd->read.buffer_addr = segaddr;
1925                 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1926                     cmd_type_len | seglen);
1927                 txd->read.olinfo_status = htole32(olinfo_status);
1928
1929                 if (++i == txr->num_desc)
1930                         i = 0;
1931         }
1932
1933         txd->read.cmd_type_len |=
1934             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1935         txr->tx_avail -= nsegs;
1936         txr->next_avail_desc = i;
1937
1938         txbuf->m_head = m_head;
1939         /*
1940         ** Here we swap the map so the last descriptor,
1941         ** which gets the completion interrupt has the
1942         ** real map, and the first descriptor gets the
1943         ** unused map from this descriptor.
1944         */
1945         txr->tx_buffers[first].map = txbuf->map;
1946         txbuf->map = map;
1947         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1948
1949         /* Set the EOP descriptor that will be marked done */
1950         txbuf = &txr->tx_buffers[first];
1951         txbuf->eop = txd;
1952
1953         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1954             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1955         /*
1956          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1957          * hardware that this frame is available to transmit.
1958          */
1959         ++txr->total_packets;
1960         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1961
1962         return (0);
1963 }
1964 static void
1965 igb_set_promisc(struct adapter *adapter)
1966 {
1967         struct ifnet    *ifp = adapter->ifp;
1968         struct e1000_hw *hw = &adapter->hw;
1969         u32             reg;
1970
1971         if (adapter->vf_ifp) {
1972                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1973                 return;
1974         }
1975
1976         reg = E1000_READ_REG(hw, E1000_RCTL);
1977         if (ifp->if_flags & IFF_PROMISC) {
1978                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1979                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1980         } else if (ifp->if_flags & IFF_ALLMULTI) {
1981                 reg |= E1000_RCTL_MPE;
1982                 reg &= ~E1000_RCTL_UPE;
1983                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1984         }
1985 }
1986
1987 static void
1988 igb_disable_promisc(struct adapter *adapter)
1989 {
1990         struct e1000_hw *hw = &adapter->hw;
1991         struct ifnet    *ifp = adapter->ifp;
1992         u32             reg;
1993         int             mcnt = 0;
1994
1995         if (adapter->vf_ifp) {
1996                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1997                 return;
1998         }
1999         reg = E1000_READ_REG(hw, E1000_RCTL);
2000         reg &=  (~E1000_RCTL_UPE);
2001         if (ifp->if_flags & IFF_ALLMULTI)
2002                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2003         else {
2004                 struct  ifmultiaddr *ifma;
2005 #if __FreeBSD_version < 800000
2006                 IF_ADDR_LOCK(ifp);
2007 #else   
2008                 if_maddr_rlock(ifp);
2009 #endif
2010                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2011                         if (ifma->ifma_addr->sa_family != AF_LINK)
2012                                 continue;
2013                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2014                                 break;
2015                         mcnt++;
2016                 }
2017 #if __FreeBSD_version < 800000
2018                 IF_ADDR_UNLOCK(ifp);
2019 #else
2020                 if_maddr_runlock(ifp);
2021 #endif
2022         }
2023         /* Don't disable if in MAX groups */
2024         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2025                 reg &=  (~E1000_RCTL_MPE);
2026         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2027 }
2028
2029
2030 /*********************************************************************
2031  *  Multicast Update
2032  *
2033  *  This routine is called whenever multicast address list is updated.
2034  *
2035  **********************************************************************/
2036
2037 static void
2038 igb_set_multi(struct adapter *adapter)
2039 {
2040         struct ifnet    *ifp = adapter->ifp;
2041         struct ifmultiaddr *ifma;
2042         u32 reg_rctl = 0;
2043         u8  *mta;
2044
2045         int mcnt = 0;
2046
2047         IOCTL_DEBUGOUT("igb_set_multi: begin");
2048
2049         mta = adapter->mta;
2050         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2051             MAX_NUM_MULTICAST_ADDRESSES);
2052
2053 #if __FreeBSD_version < 800000
2054         IF_ADDR_LOCK(ifp);
2055 #else
2056         if_maddr_rlock(ifp);
2057 #endif
2058         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2059                 if (ifma->ifma_addr->sa_family != AF_LINK)
2060                         continue;
2061
2062                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2063                         break;
2064
2065                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2066                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2067                 mcnt++;
2068         }
2069 #if __FreeBSD_version < 800000
2070         IF_ADDR_UNLOCK(ifp);
2071 #else
2072         if_maddr_runlock(ifp);
2073 #endif
2074
2075         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2076                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2077                 reg_rctl |= E1000_RCTL_MPE;
2078                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2079         } else
2080                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2081 }
2082
2083
2084 /*********************************************************************
2085  *  Timer routine:
2086  *      This routine checks for link status,
2087  *      updates statistics, and does the watchdog.
2088  *
2089  **********************************************************************/
2090
2091 static void
2092 igb_local_timer(void *arg)
2093 {
2094         struct adapter          *adapter = arg;
2095         device_t                dev = adapter->dev;
2096         struct ifnet            *ifp = adapter->ifp;
2097         struct tx_ring          *txr = adapter->tx_rings;
2098         struct igb_queue        *que = adapter->queues;
2099         int                     hung = 0, busy = 0;
2100
2101
2102         IGB_CORE_LOCK_ASSERT(adapter);
2103
2104         igb_update_link_status(adapter);
2105         igb_update_stats_counters(adapter);
2106
2107         /*
2108         ** Check the TX queues status
2109         **      - central locked handling of OACTIVE
2110         **      - watchdog only if all queues show hung
2111         */
2112         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2113                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2114                     (adapter->pause_frames == 0))
2115                         ++hung;
2116                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2117                         ++busy;
2118                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2119                         taskqueue_enqueue(que->tq, &que->que_task);
2120         }
2121         if (hung == adapter->num_queues)
2122                 goto timeout;
2123         if (busy == adapter->num_queues)
2124                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2125         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2126             (busy < adapter->num_queues))
2127                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2128
2129         adapter->pause_frames = 0;
2130         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2131 #ifndef DEVICE_POLLING
2132         /* Schedule all queue interrupts - deadlock protection */
2133         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2134 #endif
2135         return;
2136
2137 timeout:
2138         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2139         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2140             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2141             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2142         device_printf(dev,"TX(%d) desc avail = %d,"
2143             "Next TX to Clean = %d\n",
2144             txr->me, txr->tx_avail, txr->next_to_clean);
2145         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2146         adapter->watchdog_events++;
2147         igb_init_locked(adapter);
2148 }
2149
2150 static void
2151 igb_update_link_status(struct adapter *adapter)
2152 {
2153         struct e1000_hw         *hw = &adapter->hw;
2154         struct e1000_fc_info    *fc = &hw->fc;
2155         struct ifnet            *ifp = adapter->ifp;
2156         device_t                dev = adapter->dev;
2157         struct tx_ring          *txr = adapter->tx_rings;
2158         u32                     link_check, thstat, ctrl;
2159         char                    *flowctl = NULL;
2160
2161         link_check = thstat = ctrl = 0;
2162
2163         /* Get the cached link value or read for real */
2164         switch (hw->phy.media_type) {
2165         case e1000_media_type_copper:
2166                 if (hw->mac.get_link_status) {
2167                         /* Do the work to read phy */
2168                         e1000_check_for_link(hw);
2169                         link_check = !hw->mac.get_link_status;
2170                 } else
2171                         link_check = TRUE;
2172                 break;
2173         case e1000_media_type_fiber:
2174                 e1000_check_for_link(hw);
2175                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2176                                  E1000_STATUS_LU);
2177                 break;
2178         case e1000_media_type_internal_serdes:
2179                 e1000_check_for_link(hw);
2180                 link_check = adapter->hw.mac.serdes_has_link;
2181                 break;
2182         /* VF device is type_unknown */
2183         case e1000_media_type_unknown:
2184                 e1000_check_for_link(hw);
2185                 link_check = !hw->mac.get_link_status;
2186                 /* Fall thru */
2187         default:
2188                 break;
2189         }
2190
2191         /* Check for thermal downshift or shutdown */
2192         if (hw->mac.type == e1000_i350) {
2193                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2194                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2195         }
2196
2197         /* Get the flow control for display */
2198         switch (fc->current_mode) {
2199         case e1000_fc_rx_pause:
2200                 flowctl = "RX";
2201                 break;  
2202         case e1000_fc_tx_pause:
2203                 flowctl = "TX";
2204                 break;  
2205         case e1000_fc_full:
2206                 flowctl = "Full";
2207                 break;  
2208         case e1000_fc_none:
2209         default:
2210                 flowctl = "None";
2211                 break;  
2212         }
2213
2214         /* Now we check if a transition has happened */
2215         if (link_check && (adapter->link_active == 0)) {
2216                 e1000_get_speed_and_duplex(&adapter->hw, 
2217                     &adapter->link_speed, &adapter->link_duplex);
2218                 if (bootverbose)
2219                         device_printf(dev, "Link is up %d Mbps %s,"
2220                             " Flow Control: %s\n",
2221                             adapter->link_speed,
2222                             ((adapter->link_duplex == FULL_DUPLEX) ?
2223                             "Full Duplex" : "Half Duplex"), flowctl);
2224                 adapter->link_active = 1;
2225                 ifp->if_baudrate = adapter->link_speed * 1000000;
2226                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2227                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2228                         device_printf(dev, "Link: thermal downshift\n");
2229                 /* Delay Link Up for Phy update */
2230                 if (((hw->mac.type == e1000_i210) ||
2231                     (hw->mac.type == e1000_i211)) &&
2232                     (hw->phy.id == I210_I_PHY_ID))
2233                         msec_delay(I210_LINK_DELAY);
2234                 /* Reset if the media type changed. */
2235                 if (hw->dev_spec._82575.media_changed) {
2236                         hw->dev_spec._82575.media_changed = false;
2237                         adapter->flags |= IGB_MEDIA_RESET;
2238                         igb_reset(adapter);
2239                 }       
2240                 /* This can sleep */
2241                 if_link_state_change(ifp, LINK_STATE_UP);
2242         } else if (!link_check && (adapter->link_active == 1)) {
2243                 ifp->if_baudrate = adapter->link_speed = 0;
2244                 adapter->link_duplex = 0;
2245                 if (bootverbose)
2246                         device_printf(dev, "Link is Down\n");
2247                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2248                     (thstat & E1000_THSTAT_PWR_DOWN))
2249                         device_printf(dev, "Link: thermal shutdown\n");
2250                 adapter->link_active = 0;
2251                 /* This can sleep */
2252                 if_link_state_change(ifp, LINK_STATE_DOWN);
2253                 /* Reset queue state */
2254                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2255                         txr->queue_status = IGB_QUEUE_IDLE;
2256         }
2257 }
2258
2259 /*********************************************************************
2260  *
2261  *  This routine disables all traffic on the adapter by issuing a
2262  *  global reset on the MAC and deallocates TX/RX buffers.
2263  *
2264  **********************************************************************/
2265
2266 static void
2267 igb_stop(void *arg)
2268 {
2269         struct adapter  *adapter = arg;
2270         struct ifnet    *ifp = adapter->ifp;
2271         struct tx_ring *txr = adapter->tx_rings;
2272
2273         IGB_CORE_LOCK_ASSERT(adapter);
2274
2275         INIT_DEBUGOUT("igb_stop: begin");
2276
2277         igb_disable_intr(adapter);
2278
2279         callout_stop(&adapter->timer);
2280
2281         /* Tell the stack that the interface is no longer active */
2282         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2283         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2284
2285         /* Disarm watchdog timer. */
2286         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2287                 IGB_TX_LOCK(txr);
2288                 txr->queue_status = IGB_QUEUE_IDLE;
2289                 IGB_TX_UNLOCK(txr);
2290         }
2291
2292         e1000_reset_hw(&adapter->hw);
2293         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2294
2295         e1000_led_off(&adapter->hw);
2296         e1000_cleanup_led(&adapter->hw);
2297 }
2298
2299
2300 /*********************************************************************
2301  *
2302  *  Determine hardware revision.
2303  *
2304  **********************************************************************/
2305 static void
2306 igb_identify_hardware(struct adapter *adapter)
2307 {
2308         device_t dev = adapter->dev;
2309
2310         /* Make sure our PCI config space has the necessary stuff set */
2311         pci_enable_busmaster(dev);
2312         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2313
2314         /* Save off the information about this board */
2315         adapter->hw.vendor_id = pci_get_vendor(dev);
2316         adapter->hw.device_id = pci_get_device(dev);
2317         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2318         adapter->hw.subsystem_vendor_id =
2319             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2320         adapter->hw.subsystem_device_id =
2321             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2322
2323         /* Set MAC type early for PCI setup */
2324         e1000_set_mac_type(&adapter->hw);
2325
2326         /* Are we a VF device? */
2327         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2328             (adapter->hw.mac.type == e1000_vfadapt_i350))
2329                 adapter->vf_ifp = 1;
2330         else
2331                 adapter->vf_ifp = 0;
2332 }
2333
2334 static int
2335 igb_allocate_pci_resources(struct adapter *adapter)
2336 {
2337         device_t        dev = adapter->dev;
2338         int             rid;
2339
2340         rid = PCIR_BAR(0);
2341         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2342             &rid, RF_ACTIVE);
2343         if (adapter->pci_mem == NULL) {
2344                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2345                 return (ENXIO);
2346         }
2347         adapter->osdep.mem_bus_space_tag =
2348             rman_get_bustag(adapter->pci_mem);
2349         adapter->osdep.mem_bus_space_handle =
2350             rman_get_bushandle(adapter->pci_mem);
2351         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2352
2353         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2354
2355         /* This will setup either MSI/X or MSI */
2356         adapter->msix = igb_setup_msix(adapter);
2357         adapter->hw.back = &adapter->osdep;
2358
2359         return (0);
2360 }
2361
2362 /*********************************************************************
2363  *
2364  *  Setup the Legacy or MSI Interrupt handler
2365  *
2366  **********************************************************************/
2367 static int
2368 igb_allocate_legacy(struct adapter *adapter)
2369 {
2370         device_t                dev = adapter->dev;
2371         struct igb_queue        *que = adapter->queues;
2372 #ifndef IGB_LEGACY_TX
2373         struct tx_ring          *txr = adapter->tx_rings;
2374 #endif
2375         int                     error, rid = 0;
2376
2377         /* Turn off all interrupts */
2378         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2379
2380         /* MSI RID is 1 */
2381         if (adapter->msix == 1)
2382                 rid = 1;
2383
2384         /* We allocate a single interrupt resource */
2385         adapter->res = bus_alloc_resource_any(dev,
2386             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2387         if (adapter->res == NULL) {
2388                 device_printf(dev, "Unable to allocate bus resource: "
2389                     "interrupt\n");
2390                 return (ENXIO);
2391         }
2392
2393 #ifndef IGB_LEGACY_TX
2394         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2395 #endif
2396
2397         /*
2398          * Try allocating a fast interrupt and the associated deferred
2399          * processing contexts.
2400          */
2401         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2402         /* Make tasklet for deferred link handling */
2403         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2404         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2405             taskqueue_thread_enqueue, &que->tq);
2406         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2407             device_get_nameunit(adapter->dev));
2408         if ((error = bus_setup_intr(dev, adapter->res,
2409             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2410             adapter, &adapter->tag)) != 0) {
2411                 device_printf(dev, "Failed to register fast interrupt "
2412                             "handler: %d\n", error);
2413                 taskqueue_free(que->tq);
2414                 que->tq = NULL;
2415                 return (error);
2416         }
2417
2418         return (0);
2419 }
2420
2421
2422 /*********************************************************************
2423  *
2424  *  Setup the MSIX Queue Interrupt handlers: 
2425  *
2426  **********************************************************************/
2427 static int
2428 igb_allocate_msix(struct adapter *adapter)
2429 {
2430         device_t                dev = adapter->dev;
2431         struct igb_queue        *que = adapter->queues;
2432         int                     error, rid, vector = 0;
2433         int                     cpu_id = 0;
2434 #ifdef  RSS
2435         cpuset_t cpu_mask;
2436 #endif
2437
2438         /* Be sure to start with all interrupts disabled */
2439         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2440         E1000_WRITE_FLUSH(&adapter->hw);
2441
2442 #ifdef  RSS
2443         /*
2444          * If we're doing RSS, the number of queues needs to
2445          * match the number of RSS buckets that are configured.
2446          *
2447          * + If there's more queues than RSS buckets, we'll end
2448          *   up with queues that get no traffic.
2449          *
2450          * + If there's more RSS buckets than queues, we'll end
2451          *   up having multiple RSS buckets map to the same queue,
2452          *   so there'll be some contention.
2453          */
2454         if (adapter->num_queues != rss_getnumbuckets()) {
2455                 device_printf(dev,
2456                     "%s: number of queues (%d) != number of RSS buckets (%d)"
2457                     "; performance will be impacted.\n",
2458                     __func__,
2459                     adapter->num_queues,
2460                     rss_getnumbuckets());
2461         }
2462 #endif
2463
2464         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2465                 rid = vector +1;
2466                 que->res = bus_alloc_resource_any(dev,
2467                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2468                 if (que->res == NULL) {
2469                         device_printf(dev,
2470                             "Unable to allocate bus resource: "
2471                             "MSIX Queue Interrupt\n");
2472                         return (ENXIO);
2473                 }
2474                 error = bus_setup_intr(dev, que->res,
2475                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2476                     igb_msix_que, que, &que->tag);
2477                 if (error) {
2478                         que->res = NULL;
2479                         device_printf(dev, "Failed to register Queue handler");
2480                         return (error);
2481                 }
2482 #if __FreeBSD_version >= 800504
2483                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2484 #endif
2485                 que->msix = vector;
2486                 if (adapter->hw.mac.type == e1000_82575)
2487                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2488                 else
2489                         que->eims = 1 << vector;
2490
2491 #ifdef  RSS
2492                 /*
2493                  * The queue ID is used as the RSS layer bucket ID.
2494                  * We look up the queue ID -> RSS CPU ID and select
2495                  * that.
2496                  */
2497                 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2498 #else
2499                 /*
2500                  * Bind the msix vector, and thus the
2501                  * rings to the corresponding cpu.
2502                  *
2503                  * This just happens to match the default RSS round-robin
2504                  * bucket -> queue -> CPU allocation.
2505                  */
2506                 if (adapter->num_queues > 1) {
2507                         if (igb_last_bind_cpu < 0)
2508                                 igb_last_bind_cpu = CPU_FIRST();
2509                         cpu_id = igb_last_bind_cpu;
2510                 }
2511 #endif
2512
2513                 if (adapter->num_queues > 1) {
2514                         bus_bind_intr(dev, que->res, cpu_id);
2515 #ifdef  RSS
2516                         device_printf(dev,
2517                                 "Bound queue %d to RSS bucket %d\n",
2518                                 i, cpu_id);
2519 #else
2520                         device_printf(dev,
2521                                 "Bound queue %d to cpu %d\n",
2522                                 i, cpu_id);
2523 #endif
2524                 }
2525
2526 #ifndef IGB_LEGACY_TX
2527                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2528                     que->txr);
2529 #endif
2530                 /* Make tasklet for deferred handling */
2531                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2532                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2533                     taskqueue_thread_enqueue, &que->tq);
2534                 if (adapter->num_queues > 1) {
2535                         /*
2536                          * Only pin the taskqueue thread to a CPU if
2537                          * RSS is in use.
2538                          *
2539                          * This again just happens to match the default RSS
2540                          * round-robin bucket -> queue -> CPU allocation.
2541                          */
2542 #ifdef  RSS
2543                         CPU_SETOF(cpu_id, &cpu_mask);
2544                         taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2545                             &cpu_mask,
2546                             "%s que (bucket %d)",
2547                             device_get_nameunit(adapter->dev),
2548                             cpu_id);
2549 #else
2550                         taskqueue_start_threads(&que->tq, 1, PI_NET,
2551                             "%s que (qid %d)",
2552                             device_get_nameunit(adapter->dev),
2553                             cpu_id);
2554 #endif
2555                 } else {
2556                         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2557                             device_get_nameunit(adapter->dev));
2558                 }
2559
2560                 /* Finally update the last bound CPU id */
2561                 if (adapter->num_queues > 1)
2562                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2563         }
2564
2565         /* And Link */
2566         rid = vector + 1;
2567         adapter->res = bus_alloc_resource_any(dev,
2568             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2569         if (adapter->res == NULL) {
2570                 device_printf(dev,
2571                     "Unable to allocate bus resource: "
2572                     "MSIX Link Interrupt\n");
2573                 return (ENXIO);
2574         }
2575         if ((error = bus_setup_intr(dev, adapter->res,
2576             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2577             igb_msix_link, adapter, &adapter->tag)) != 0) {
2578                 device_printf(dev, "Failed to register Link handler");
2579                 return (error);
2580         }
2581 #if __FreeBSD_version >= 800504
2582         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2583 #endif
2584         adapter->linkvec = vector;
2585
2586         return (0);
2587 }
2588
2589
2590 static void
2591 igb_configure_queues(struct adapter *adapter)
2592 {
2593         struct  e1000_hw        *hw = &adapter->hw;
2594         struct  igb_queue       *que;
2595         u32                     tmp, ivar = 0, newitr = 0;
2596
2597         /* First turn on RSS capability */
2598         if (adapter->hw.mac.type != e1000_82575)
2599                 E1000_WRITE_REG(hw, E1000_GPIE,
2600                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2601                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2602
2603         /* Turn on MSIX */
2604         switch (adapter->hw.mac.type) {
2605         case e1000_82580:
2606         case e1000_i350:
2607         case e1000_i354:
2608         case e1000_i210:
2609         case e1000_i211:
2610         case e1000_vfadapt:
2611         case e1000_vfadapt_i350:
2612                 /* RX entries */
2613                 for (int i = 0; i < adapter->num_queues; i++) {
2614                         u32 index = i >> 1;
2615                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2616                         que = &adapter->queues[i];
2617                         if (i & 1) {
2618                                 ivar &= 0xFF00FFFF;
2619                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2620                         } else {
2621                                 ivar &= 0xFFFFFF00;
2622                                 ivar |= que->msix | E1000_IVAR_VALID;
2623                         }
2624                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2625                 }
2626                 /* TX entries */
2627                 for (int i = 0; i < adapter->num_queues; i++) {
2628                         u32 index = i >> 1;
2629                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2630                         que = &adapter->queues[i];
2631                         if (i & 1) {
2632                                 ivar &= 0x00FFFFFF;
2633                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2634                         } else {
2635                                 ivar &= 0xFFFF00FF;
2636                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2637                         }
2638                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2639                         adapter->que_mask |= que->eims;
2640                 }
2641
2642                 /* And for the link interrupt */
2643                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2644                 adapter->link_mask = 1 << adapter->linkvec;
2645                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2646                 break;
2647         case e1000_82576:
2648                 /* RX entries */
2649                 for (int i = 0; i < adapter->num_queues; i++) {
2650                         u32 index = i & 0x7; /* Each IVAR has two entries */
2651                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2652                         que = &adapter->queues[i];
2653                         if (i < 8) {
2654                                 ivar &= 0xFFFFFF00;
2655                                 ivar |= que->msix | E1000_IVAR_VALID;
2656                         } else {
2657                                 ivar &= 0xFF00FFFF;
2658                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2659                         }
2660                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2661                         adapter->que_mask |= que->eims;
2662                 }
2663                 /* TX entries */
2664                 for (int i = 0; i < adapter->num_queues; i++) {
2665                         u32 index = i & 0x7; /* Each IVAR has two entries */
2666                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2667                         que = &adapter->queues[i];
2668                         if (i < 8) {
2669                                 ivar &= 0xFFFF00FF;
2670                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2671                         } else {
2672                                 ivar &= 0x00FFFFFF;
2673                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2674                         }
2675                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2676                         adapter->que_mask |= que->eims;
2677                 }
2678
2679                 /* And for the link interrupt */
2680                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2681                 adapter->link_mask = 1 << adapter->linkvec;
2682                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2683                 break;
2684
2685         case e1000_82575:
2686                 /* enable MSI-X support*/
2687                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2688                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2689                 /* Auto-Mask interrupts upon ICR read. */
2690                 tmp |= E1000_CTRL_EXT_EIAME;
2691                 tmp |= E1000_CTRL_EXT_IRCA;
2692                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2693
2694                 /* Queues */
2695                 for (int i = 0; i < adapter->num_queues; i++) {
2696                         que = &adapter->queues[i];
2697                         tmp = E1000_EICR_RX_QUEUE0 << i;
2698                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2699                         que->eims = tmp;
2700                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2701                             i, que->eims);
2702                         adapter->que_mask |= que->eims;
2703                 }
2704
2705                 /* Link */
2706                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2707                     E1000_EIMS_OTHER);
2708                 adapter->link_mask |= E1000_EIMS_OTHER;
2709         default:
2710                 break;
2711         }
2712
2713         /* Set the starting interrupt rate */
2714         if (igb_max_interrupt_rate > 0)
2715                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2716
2717         if (hw->mac.type == e1000_82575)
2718                 newitr |= newitr << 16;
2719         else
2720                 newitr |= E1000_EITR_CNT_IGNR;
2721
2722         for (int i = 0; i < adapter->num_queues; i++) {
2723                 que = &adapter->queues[i];
2724                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2725         }
2726
2727         return;
2728 }
2729
2730
2731 static void
2732 igb_free_pci_resources(struct adapter *adapter)
2733 {
2734         struct          igb_queue *que = adapter->queues;
2735         device_t        dev = adapter->dev;
2736         int             rid;
2737
2738         /*
2739         ** There is a slight possibility of a failure mode
2740         ** in attach that will result in entering this function
2741         ** before interrupt resources have been initialized, and
2742         ** in that case we do not want to execute the loops below
2743         ** We can detect this reliably by the state of the adapter
2744         ** res pointer.
2745         */
2746         if (adapter->res == NULL)
2747                 goto mem;
2748
2749         /*
2750          * First release all the interrupt resources:
2751          */
2752         for (int i = 0; i < adapter->num_queues; i++, que++) {
2753                 rid = que->msix + 1;
2754                 if (que->tag != NULL) {
2755                         bus_teardown_intr(dev, que->res, que->tag);
2756                         que->tag = NULL;
2757                 }
2758                 if (que->res != NULL)
2759                         bus_release_resource(dev,
2760                             SYS_RES_IRQ, rid, que->res);
2761         }
2762
2763         /* Clean the Legacy or Link interrupt last */
2764         if (adapter->linkvec) /* we are doing MSIX */
2765                 rid = adapter->linkvec + 1;
2766         else
2767                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2768
2769         que = adapter->queues;
2770         if (adapter->tag != NULL) {
2771                 taskqueue_drain(que->tq, &adapter->link_task);
2772                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2773                 adapter->tag = NULL;
2774         }
2775         if (adapter->res != NULL)
2776                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2777
2778         for (int i = 0; i < adapter->num_queues; i++, que++) {
2779                 if (que->tq != NULL) {
2780 #ifndef IGB_LEGACY_TX
2781                         taskqueue_drain(que->tq, &que->txr->txq_task);
2782 #endif
2783                         taskqueue_drain(que->tq, &que->que_task);
2784                         taskqueue_free(que->tq);
2785                 }
2786         }
2787 mem:
2788         if (adapter->msix)
2789                 pci_release_msi(dev);
2790
2791         if (adapter->msix_mem != NULL)
2792                 bus_release_resource(dev, SYS_RES_MEMORY,
2793                     adapter->memrid, adapter->msix_mem);
2794
2795         if (adapter->pci_mem != NULL)
2796                 bus_release_resource(dev, SYS_RES_MEMORY,
2797                     PCIR_BAR(0), adapter->pci_mem);
2798
2799 }
2800
2801 /*
2802  * Setup Either MSI/X or MSI
2803  */
2804 static int
2805 igb_setup_msix(struct adapter *adapter)
2806 {
2807         device_t        dev = adapter->dev;
2808         int             bar, want, queues, msgs, maxqueues;
2809
2810         /* tuneable override */
2811         if (igb_enable_msix == 0)
2812                 goto msi;
2813
2814         /* First try MSI/X */
2815         msgs = pci_msix_count(dev); 
2816         if (msgs == 0)
2817                 goto msi;
2818         /*
2819         ** Some new devices, as with ixgbe, now may
2820         ** use a different BAR, so we need to keep
2821         ** track of which is used.
2822         */
2823         adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2824         bar = pci_read_config(dev, adapter->memrid, 4);
2825         if (bar == 0) /* use next bar */
2826                 adapter->memrid += 4;
2827         adapter->msix_mem = bus_alloc_resource_any(dev,
2828             SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2829         if (adapter->msix_mem == NULL) {
2830                 /* May not be enabled */
2831                 device_printf(adapter->dev,
2832                     "Unable to map MSIX table \n");
2833                 goto msi;
2834         }
2835
2836         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2837
2838         /* Override via tuneable */
2839         if (igb_num_queues != 0)
2840                 queues = igb_num_queues;
2841
2842 #ifdef  RSS
2843         /* If we're doing RSS, clamp at the number of RSS buckets */
2844         if (queues > rss_getnumbuckets())
2845                 queues = rss_getnumbuckets();
2846 #endif
2847
2848
2849         /* Sanity check based on HW */
2850         switch (adapter->hw.mac.type) {
2851                 case e1000_82575:
2852                         maxqueues = 4;
2853                         break;
2854                 case e1000_82576:
2855                 case e1000_82580:
2856                 case e1000_i350:
2857                 case e1000_i354:
2858                         maxqueues = 8;
2859                         break;
2860                 case e1000_i210:
2861                         maxqueues = 4;
2862                         break;
2863                 case e1000_i211:
2864                         maxqueues = 2;
2865                         break;
2866                 default:  /* VF interfaces */
2867                         maxqueues = 1;
2868                         break;
2869         }
2870
2871         /* Final clamp on the actual hardware capability */
2872         if (queues > maxqueues)
2873                 queues = maxqueues;
2874
2875         /*
2876         ** One vector (RX/TX pair) per queue
2877         ** plus an additional for Link interrupt
2878         */
2879         want = queues + 1;
2880         if (msgs >= want)
2881                 msgs = want;
2882         else {
2883                 device_printf(adapter->dev,
2884                     "MSIX Configuration Problem, "
2885                     "%d vectors configured, but %d queues wanted!\n",
2886                     msgs, want);
2887                 goto msi;
2888         }
2889         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2890                 device_printf(adapter->dev,
2891                     "Using MSIX interrupts with %d vectors\n", msgs);
2892                 adapter->num_queues = queues;
2893                 return (msgs);
2894         }
2895         /*
2896         ** If MSIX alloc failed or provided us with
2897         ** less than needed, free and fall through to MSI
2898         */
2899         pci_release_msi(dev);
2900
2901 msi:
2902         if (adapter->msix_mem != NULL) {
2903                 bus_release_resource(dev, SYS_RES_MEMORY,
2904                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2905                 adapter->msix_mem = NULL;
2906         }
2907         msgs = 1;
2908         if (pci_alloc_msi(dev, &msgs) == 0) {
2909                 device_printf(adapter->dev," Using an MSI interrupt\n");
2910                 return (msgs);
2911         }
2912         device_printf(adapter->dev," Using a Legacy interrupt\n");
2913         return (0);
2914 }
2915
2916 /*********************************************************************
2917  *
2918  *  Initialize the DMA Coalescing feature
2919  *
2920  **********************************************************************/
2921 static void
2922 igb_init_dmac(struct adapter *adapter, u32 pba)
2923 {
2924         device_t        dev = adapter->dev;
2925         struct e1000_hw *hw = &adapter->hw;
2926         u32             dmac, reg = ~E1000_DMACR_DMAC_EN;
2927         u16             hwm;
2928
2929         if (hw->mac.type == e1000_i211)
2930                 return;
2931
2932         if (hw->mac.type > e1000_82580) {
2933
2934                 if (adapter->dmac == 0) { /* Disabling it */
2935                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2936                         return;
2937                 } else
2938                         device_printf(dev, "DMA Coalescing enabled\n");
2939
2940                 /* Set starting threshold */
2941                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2942
2943                 hwm = 64 * pba - adapter->max_frame_size / 16;
2944                 if (hwm < 64 * (pba - 6))
2945                         hwm = 64 * (pba - 6);
2946                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2947                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2948                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2949                     & E1000_FCRTC_RTH_COAL_MASK);
2950                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2951
2952
2953                 dmac = pba - adapter->max_frame_size / 512;
2954                 if (dmac < pba - 10)
2955                         dmac = pba - 10;
2956                 reg = E1000_READ_REG(hw, E1000_DMACR);
2957                 reg &= ~E1000_DMACR_DMACTHR_MASK;
2958                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2959                     & E1000_DMACR_DMACTHR_MASK);
2960
2961                 /* transition to L0x or L1 if available..*/
2962                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2963
2964                 /* Check if status is 2.5Gb backplane connection
2965                 * before configuration of watchdog timer, which is
2966                 * in msec values in 12.8usec intervals
2967                 * watchdog timer= msec values in 32usec intervals
2968                 * for non 2.5Gb connection
2969                 */
2970                 if (hw->mac.type == e1000_i354) {
2971                         int status = E1000_READ_REG(hw, E1000_STATUS);
2972                         if ((status & E1000_STATUS_2P5_SKU) &&
2973                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2974                                 reg |= ((adapter->dmac * 5) >> 6);
2975                         else
2976                                 reg |= (adapter->dmac >> 5);
2977                 } else {
2978                         reg |= (adapter->dmac >> 5);
2979                 }
2980
2981                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2982
2983                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2984
2985                 /* Set the interval before transition */
2986                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2987                 if (hw->mac.type == e1000_i350)
2988                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
2989                 /*
2990                 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2991                 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2992                 */
2993                 if (hw->mac.type == e1000_i354) {
2994                         int status = E1000_READ_REG(hw, E1000_STATUS);
2995                         if ((status & E1000_STATUS_2P5_SKU) &&
2996                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2997                                 reg |= 0xA;
2998                         else
2999                                 reg |= 0x4;
3000                 } else {
3001                         reg |= 0x4;
3002                 }
3003
3004                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3005
3006                 /* free space in tx packet buffer to wake from DMA coal */
3007                 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
3008                     (2 * adapter->max_frame_size)) >> 6);
3009
3010                 /* make low power state decision controlled by DMA coal */
3011                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3012                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3013                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3014
3015         } else if (hw->mac.type == e1000_82580) {
3016                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3017                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3018                     reg & ~E1000_PCIEMISC_LX_DECISION);
3019                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3020         }
3021 }
3022
3023
3024 /*********************************************************************
3025  *
3026  *  Set up an fresh starting state
3027  *
3028  **********************************************************************/
3029 static void
3030 igb_reset(struct adapter *adapter)
3031 {
3032         device_t        dev = adapter->dev;
3033         struct e1000_hw *hw = &adapter->hw;
3034         struct e1000_fc_info *fc = &hw->fc;
3035         struct ifnet    *ifp = adapter->ifp;
3036         u32             pba = 0;
3037         u16             hwm;
3038
3039         INIT_DEBUGOUT("igb_reset: begin");
3040
3041         /* Let the firmware know the OS is in control */
3042         igb_get_hw_control(adapter);
3043
3044         /*
3045          * Packet Buffer Allocation (PBA)
3046          * Writing PBA sets the receive portion of the buffer
3047          * the remainder is used for the transmit buffer.
3048          */
3049         switch (hw->mac.type) {
3050         case e1000_82575:
3051                 pba = E1000_PBA_32K;
3052                 break;
3053         case e1000_82576:
3054         case e1000_vfadapt:
3055                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3056                 pba &= E1000_RXPBS_SIZE_MASK_82576;
3057                 break;
3058         case e1000_82580:
3059         case e1000_i350:
3060         case e1000_i354:
3061         case e1000_vfadapt_i350:
3062                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3063                 pba = e1000_rxpbs_adjust_82580(pba);
3064                 break;
3065         case e1000_i210:
3066         case e1000_i211:
3067                 pba = E1000_PBA_34K;
3068         default:
3069                 break;
3070         }
3071
3072         /* Special needs in case of Jumbo frames */
3073         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3074                 u32 tx_space, min_tx, min_rx;
3075                 pba = E1000_READ_REG(hw, E1000_PBA);
3076                 tx_space = pba >> 16;
3077                 pba &= 0xffff;
3078                 min_tx = (adapter->max_frame_size +
3079                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3080                 min_tx = roundup2(min_tx, 1024);
3081                 min_tx >>= 10;
3082                 min_rx = adapter->max_frame_size;
3083                 min_rx = roundup2(min_rx, 1024);
3084                 min_rx >>= 10;
3085                 if (tx_space < min_tx &&
3086                     ((min_tx - tx_space) < pba)) {
3087                         pba = pba - (min_tx - tx_space);
3088                         /*
3089                          * if short on rx space, rx wins
3090                          * and must trump tx adjustment
3091                          */
3092                         if (pba < min_rx)
3093                                 pba = min_rx;
3094                 }
3095                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3096         }
3097
3098         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3099
3100         /*
3101          * These parameters control the automatic generation (Tx) and
3102          * response (Rx) to Ethernet PAUSE frames.
3103          * - High water mark should allow for at least two frames to be
3104          *   received after sending an XOFF.
3105          * - Low water mark works best when it is very near the high water mark.
3106          *   This allows the receiver to restart by sending XON when it has
3107          *   drained a bit.
3108          */
3109         hwm = min(((pba << 10) * 9 / 10),
3110             ((pba << 10) - 2 * adapter->max_frame_size));
3111
3112         if (hw->mac.type < e1000_82576) {
3113                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3114                 fc->low_water = fc->high_water - 8;
3115         } else {
3116                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3117                 fc->low_water = fc->high_water - 16;
3118         }
3119
3120         fc->pause_time = IGB_FC_PAUSE_TIME;
3121         fc->send_xon = TRUE;
3122         if (adapter->fc)
3123                 fc->requested_mode = adapter->fc;
3124         else
3125                 fc->requested_mode = e1000_fc_default;
3126
3127         /* Issue a global reset */
3128         e1000_reset_hw(hw);
3129         E1000_WRITE_REG(hw, E1000_WUC, 0);
3130
3131         /* Reset for AutoMediaDetect */
3132         if (adapter->flags & IGB_MEDIA_RESET) {
3133                 e1000_setup_init_funcs(hw, TRUE);
3134                 e1000_get_bus_info(hw);
3135                 adapter->flags &= ~IGB_MEDIA_RESET;
3136         }
3137
3138         if (e1000_init_hw(hw) < 0)
3139                 device_printf(dev, "Hardware Initialization Failed\n");
3140
3141         /* Setup DMA Coalescing */
3142         igb_init_dmac(adapter, pba);
3143
3144         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3145         e1000_get_phy_info(hw);
3146         e1000_check_for_link(hw);
3147         return;
3148 }
3149
3150 /*********************************************************************
3151  *
3152  *  Setup networking device structure and register an interface.
3153  *
3154  **********************************************************************/
3155 static int
3156 igb_setup_interface(device_t dev, struct adapter *adapter)
3157 {
3158         struct ifnet   *ifp;
3159
3160         INIT_DEBUGOUT("igb_setup_interface: begin");
3161
3162         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3163         if (ifp == NULL) {
3164                 device_printf(dev, "can not allocate ifnet structure\n");
3165                 return (-1);
3166         }
3167         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3168         ifp->if_init =  igb_init;
3169         ifp->if_softc = adapter;
3170         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3171         ifp->if_ioctl = igb_ioctl;
3172         ifp->if_get_counter = igb_get_counter;
3173
3174         /* TSO parameters */
3175         ifp->if_hw_tsomax = IP_MAXPACKET;
3176         ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3177         ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3178
3179 #ifndef IGB_LEGACY_TX
3180         ifp->if_transmit = igb_mq_start;
3181         ifp->if_qflush = igb_qflush;
3182 #else
3183         ifp->if_start = igb_start;
3184         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3185         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3186         IFQ_SET_READY(&ifp->if_snd);
3187 #endif
3188
3189         ether_ifattach(ifp, adapter->hw.mac.addr);
3190
3191         ifp->if_capabilities = ifp->if_capenable = 0;
3192
3193         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3194 #if __FreeBSD_version >= 1000000
3195         ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
3196 #endif
3197         ifp->if_capabilities |= IFCAP_TSO;
3198         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3199         ifp->if_capenable = ifp->if_capabilities;
3200
3201         /* Don't enable LRO by default */
3202         ifp->if_capabilities |= IFCAP_LRO;
3203
3204 #ifdef DEVICE_POLLING
3205         ifp->if_capabilities |= IFCAP_POLLING;
3206 #endif
3207
3208         /*
3209          * Tell the upper layer(s) we
3210          * support full VLAN capability.
3211          */
3212         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3213         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3214                              |  IFCAP_VLAN_HWTSO
3215                              |  IFCAP_VLAN_MTU;
3216         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3217                           |  IFCAP_VLAN_HWTSO
3218                           |  IFCAP_VLAN_MTU;
3219
3220         /*
3221         ** Don't turn this on by default, if vlans are
3222         ** created on another pseudo device (eg. lagg)
3223         ** then vlan events are not passed thru, breaking
3224         ** operation, but with HW FILTER off it works. If
3225         ** using vlans directly on the igb driver you can
3226         ** enable this and get full hardware tag filtering.
3227         */
3228         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3229
3230         /*
3231          * Specify the media types supported by this adapter and register
3232          * callbacks to update media and link information
3233          */
3234         ifmedia_init(&adapter->media, IFM_IMASK,
3235             igb_media_change, igb_media_status);
3236         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3237             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3238                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3239                             0, NULL);
3240                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3241         } else {
3242                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3243                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3244                             0, NULL);
3245                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3246                             0, NULL);
3247                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3248                             0, NULL);
3249                 if (adapter->hw.phy.type != e1000_phy_ife) {
3250                         ifmedia_add(&adapter->media,
3251                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3252                         ifmedia_add(&adapter->media,
3253                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3254                 }
3255         }
3256         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3257         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3258         return (0);
3259 }
3260
3261
3262 /*
3263  * Manage DMA'able memory.
3264  */
3265 static void
3266 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3267 {
3268         if (error)
3269                 return;
3270         *(bus_addr_t *) arg = segs[0].ds_addr;
3271 }
3272
3273 static int
3274 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3275         struct igb_dma_alloc *dma, int mapflags)
3276 {
3277         int error;
3278
3279         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3280                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3281                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3282                                 BUS_SPACE_MAXADDR,      /* highaddr */
3283                                 NULL, NULL,             /* filter, filterarg */
3284                                 size,                   /* maxsize */
3285                                 1,                      /* nsegments */
3286                                 size,                   /* maxsegsize */
3287                                 0,                      /* flags */
3288                                 NULL,                   /* lockfunc */
3289                                 NULL,                   /* lockarg */
3290                                 &dma->dma_tag);
3291         if (error) {
3292                 device_printf(adapter->dev,
3293                     "%s: bus_dma_tag_create failed: %d\n",
3294                     __func__, error);
3295                 goto fail_0;
3296         }
3297
3298         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3299             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3300         if (error) {
3301                 device_printf(adapter->dev,
3302                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3303                     __func__, (uintmax_t)size, error);
3304                 goto fail_2;
3305         }
3306
3307         dma->dma_paddr = 0;
3308         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3309             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3310         if (error || dma->dma_paddr == 0) {
3311                 device_printf(adapter->dev,
3312                     "%s: bus_dmamap_load failed: %d\n",
3313                     __func__, error);
3314                 goto fail_3;
3315         }
3316
3317         return (0);
3318
3319 fail_3:
3320         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3321 fail_2:
3322         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3323         bus_dma_tag_destroy(dma->dma_tag);
3324 fail_0:
3325         dma->dma_tag = NULL;
3326
3327         return (error);
3328 }
3329
3330 static void
3331 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3332 {
3333         if (dma->dma_tag == NULL)
3334                 return;
3335         if (dma->dma_paddr != 0) {
3336                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3337                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3338                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3339                 dma->dma_paddr = 0;
3340         }
3341         if (dma->dma_vaddr != NULL) {
3342                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3343                 dma->dma_vaddr = NULL;
3344         }
3345         bus_dma_tag_destroy(dma->dma_tag);
3346         dma->dma_tag = NULL;
3347 }
3348
3349
3350 /*********************************************************************
3351  *
3352  *  Allocate memory for the transmit and receive rings, and then
3353  *  the descriptors associated with each, called only once at attach.
3354  *
3355  **********************************************************************/
3356 static int
3357 igb_allocate_queues(struct adapter *adapter)
3358 {
3359         device_t dev = adapter->dev;
3360         struct igb_queue        *que = NULL;
3361         struct tx_ring          *txr = NULL;
3362         struct rx_ring          *rxr = NULL;
3363         int rsize, tsize, error = E1000_SUCCESS;
3364         int txconf = 0, rxconf = 0;
3365
3366         /* First allocate the top level queue structs */
3367         if (!(adapter->queues =
3368             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3369             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3370                 device_printf(dev, "Unable to allocate queue memory\n");
3371                 error = ENOMEM;
3372                 goto fail;
3373         }
3374
3375         /* Next allocate the TX ring struct memory */
3376         if (!(adapter->tx_rings =
3377             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3378             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3379                 device_printf(dev, "Unable to allocate TX ring memory\n");
3380                 error = ENOMEM;
3381                 goto tx_fail;
3382         }
3383
3384         /* Now allocate the RX */
3385         if (!(adapter->rx_rings =
3386             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3387             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3388                 device_printf(dev, "Unable to allocate RX ring memory\n");
3389                 error = ENOMEM;
3390                 goto rx_fail;
3391         }
3392
3393         tsize = roundup2(adapter->num_tx_desc *
3394             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3395         /*
3396          * Now set up the TX queues, txconf is needed to handle the
3397          * possibility that things fail midcourse and we need to
3398          * undo memory gracefully
3399          */ 
3400         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3401                 /* Set up some basics */
3402                 txr = &adapter->tx_rings[i];
3403                 txr->adapter = adapter;
3404                 txr->me = i;
3405                 txr->num_desc = adapter->num_tx_desc;
3406
3407                 /* Initialize the TX lock */
3408                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3409                     device_get_nameunit(dev), txr->me);
3410                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3411
3412                 if (igb_dma_malloc(adapter, tsize,
3413                         &txr->txdma, BUS_DMA_NOWAIT)) {
3414                         device_printf(dev,
3415                             "Unable to allocate TX Descriptor memory\n");
3416                         error = ENOMEM;
3417                         goto err_tx_desc;
3418                 }
3419                 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3420                 bzero((void *)txr->tx_base, tsize);
3421
3422                 /* Now allocate transmit buffers for the ring */
3423                 if (igb_allocate_transmit_buffers(txr)) {
3424                         device_printf(dev,
3425                             "Critical Failure setting up transmit buffers\n");
3426                         error = ENOMEM;
3427                         goto err_tx_desc;
3428                 }
3429 #ifndef IGB_LEGACY_TX
3430                 /* Allocate a buf ring */
3431                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3432                     M_WAITOK, &txr->tx_mtx);
3433 #endif
3434         }
3435
3436         /*
3437          * Next the RX queues...
3438          */ 
3439         rsize = roundup2(adapter->num_rx_desc *
3440             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3441         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3442                 rxr = &adapter->rx_rings[i];
3443                 rxr->adapter = adapter;
3444                 rxr->me = i;
3445
3446                 /* Initialize the RX lock */
3447                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3448                     device_get_nameunit(dev), txr->me);
3449                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3450
3451                 if (igb_dma_malloc(adapter, rsize,
3452                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3453                         device_printf(dev,
3454                             "Unable to allocate RxDescriptor memory\n");
3455                         error = ENOMEM;
3456                         goto err_rx_desc;
3457                 }
3458                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3459                 bzero((void *)rxr->rx_base, rsize);
3460
3461                 /* Allocate receive buffers for the ring*/
3462                 if (igb_allocate_receive_buffers(rxr)) {
3463                         device_printf(dev,
3464                             "Critical Failure setting up receive buffers\n");
3465                         error = ENOMEM;
3466                         goto err_rx_desc;
3467                 }
3468         }
3469
3470         /*
3471         ** Finally set up the queue holding structs
3472         */
3473         for (int i = 0; i < adapter->num_queues; i++) {
3474                 que = &adapter->queues[i];
3475                 que->adapter = adapter;
3476                 que->txr = &adapter->tx_rings[i];
3477                 que->rxr = &adapter->rx_rings[i];
3478         }
3479
3480         return (0);
3481
3482 err_rx_desc:
3483         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3484                 igb_dma_free(adapter, &rxr->rxdma);
3485 err_tx_desc:
3486         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3487                 igb_dma_free(adapter, &txr->txdma);
3488         free(adapter->rx_rings, M_DEVBUF);
3489 rx_fail:
3490 #ifndef IGB_LEGACY_TX
3491         buf_ring_free(txr->br, M_DEVBUF);
3492 #endif
3493         free(adapter->tx_rings, M_DEVBUF);
3494 tx_fail:
3495         free(adapter->queues, M_DEVBUF);
3496 fail:
3497         return (error);
3498 }
3499
3500 /*********************************************************************
3501  *
3502  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3503  *  the information needed to transmit a packet on the wire. This is
3504  *  called only once at attach, setup is done every reset.
3505  *
3506  **********************************************************************/
3507 static int
3508 igb_allocate_transmit_buffers(struct tx_ring *txr)
3509 {
3510         struct adapter *adapter = txr->adapter;
3511         device_t dev = adapter->dev;
3512         struct igb_tx_buf *txbuf;
3513         int error, i;
3514
3515         /*
3516          * Setup DMA descriptor areas.
3517          */
3518         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3519                                1, 0,                    /* alignment, bounds */
3520                                BUS_SPACE_MAXADDR,       /* lowaddr */
3521                                BUS_SPACE_MAXADDR,       /* highaddr */
3522                                NULL, NULL,              /* filter, filterarg */
3523                                IGB_TSO_SIZE,            /* maxsize */
3524                                IGB_MAX_SCATTER,         /* nsegments */
3525                                PAGE_SIZE,               /* maxsegsize */
3526                                0,                       /* flags */
3527                                NULL,                    /* lockfunc */
3528                                NULL,                    /* lockfuncarg */
3529                                &txr->txtag))) {
3530                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3531                 goto fail;
3532         }
3533
3534         if (!(txr->tx_buffers =
3535             (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3536             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3537                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3538                 error = ENOMEM;
3539                 goto fail;
3540         }
3541
3542         /* Create the descriptor buffer dma maps */
3543         txbuf = txr->tx_buffers;
3544         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3545                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3546                 if (error != 0) {
3547                         device_printf(dev, "Unable to create TX DMA map\n");
3548                         goto fail;
3549                 }
3550         }
3551
3552         return 0;
3553 fail:
3554         /* We free all, it handles case where we are in the middle */
3555         igb_free_transmit_structures(adapter);
3556         return (error);
3557 }
3558
3559 /*********************************************************************
3560  *
3561  *  Initialize a transmit ring.
3562  *
3563  **********************************************************************/
3564 static void
3565 igb_setup_transmit_ring(struct tx_ring *txr)
3566 {
3567         struct adapter *adapter = txr->adapter;
3568         struct igb_tx_buf *txbuf;
3569         int i;
3570 #ifdef DEV_NETMAP
3571         struct netmap_adapter *na = NA(adapter->ifp);
3572         struct netmap_slot *slot;
3573 #endif /* DEV_NETMAP */
3574
3575         /* Clear the old descriptor contents */
3576         IGB_TX_LOCK(txr);
3577 #ifdef DEV_NETMAP
3578         slot = netmap_reset(na, NR_TX, txr->me, 0);
3579 #endif /* DEV_NETMAP */
3580         bzero((void *)txr->tx_base,
3581               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3582         /* Reset indices */
3583         txr->next_avail_desc = 0;
3584         txr->next_to_clean = 0;
3585
3586         /* Free any existing tx buffers. */
3587         txbuf = txr->tx_buffers;
3588         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3589                 if (txbuf->m_head != NULL) {
3590                         bus_dmamap_sync(txr->txtag, txbuf->map,
3591                             BUS_DMASYNC_POSTWRITE);
3592                         bus_dmamap_unload(txr->txtag, txbuf->map);
3593                         m_freem(txbuf->m_head);
3594                         txbuf->m_head = NULL;
3595                 }
3596 #ifdef DEV_NETMAP
3597                 if (slot) {
3598                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3599                         /* no need to set the address */
3600                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3601                 }
3602 #endif /* DEV_NETMAP */
3603                 /* clear the watch index */
3604                 txbuf->eop = NULL;
3605         }
3606
3607         /* Set number of descriptors available */
3608         txr->tx_avail = adapter->num_tx_desc;
3609
3610         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3611             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3612         IGB_TX_UNLOCK(txr);
3613 }
3614
3615 /*********************************************************************
3616  *
3617  *  Initialize all transmit rings.
3618  *
3619  **********************************************************************/
3620 static void
3621 igb_setup_transmit_structures(struct adapter *adapter)
3622 {
3623         struct tx_ring *txr = adapter->tx_rings;
3624
3625         for (int i = 0; i < adapter->num_queues; i++, txr++)
3626                 igb_setup_transmit_ring(txr);
3627
3628         return;
3629 }
3630
3631 /*********************************************************************
3632  *
3633  *  Enable transmit unit.
3634  *
3635  **********************************************************************/
3636 static void
3637 igb_initialize_transmit_units(struct adapter *adapter)
3638 {
3639         struct tx_ring  *txr = adapter->tx_rings;
3640         struct e1000_hw *hw = &adapter->hw;
3641         u32             tctl, txdctl;
3642
3643         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3644         tctl = txdctl = 0;
3645
3646         /* Setup the Tx Descriptor Rings */
3647         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3648                 u64 bus_addr = txr->txdma.dma_paddr;
3649
3650                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3651                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3652                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3653                     (uint32_t)(bus_addr >> 32));
3654                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3655                     (uint32_t)bus_addr);
3656
3657                 /* Setup the HW Tx Head and Tail descriptor pointers */
3658                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3659                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3660
3661                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3662                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3663                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3664
3665                 txr->queue_status = IGB_QUEUE_IDLE;
3666
3667                 txdctl |= IGB_TX_PTHRESH;
3668                 txdctl |= IGB_TX_HTHRESH << 8;
3669                 txdctl |= IGB_TX_WTHRESH << 16;
3670                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3671                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3672         }
3673
3674         if (adapter->vf_ifp)
3675                 return;
3676
3677         e1000_config_collision_dist(hw);
3678
3679         /* Program the Transmit Control Register */
3680         tctl = E1000_READ_REG(hw, E1000_TCTL);
3681         tctl &= ~E1000_TCTL_CT;
3682         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3683                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3684
3685         /* This write will effectively turn on the transmit unit. */
3686         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3687 }
3688
3689 /*********************************************************************
3690  *
3691  *  Free all transmit rings.
3692  *
3693  **********************************************************************/
3694 static void
3695 igb_free_transmit_structures(struct adapter *adapter)
3696 {
3697         struct tx_ring *txr = adapter->tx_rings;
3698
3699         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3700                 IGB_TX_LOCK(txr);
3701                 igb_free_transmit_buffers(txr);
3702                 igb_dma_free(adapter, &txr->txdma);
3703                 IGB_TX_UNLOCK(txr);
3704                 IGB_TX_LOCK_DESTROY(txr);
3705         }
3706         free(adapter->tx_rings, M_DEVBUF);
3707 }
3708
3709 /*********************************************************************
3710  *
3711  *  Free transmit ring related data structures.
3712  *
3713  **********************************************************************/
3714 static void
3715 igb_free_transmit_buffers(struct tx_ring *txr)
3716 {
3717         struct adapter *adapter = txr->adapter;
3718         struct igb_tx_buf *tx_buffer;
3719         int             i;
3720
3721         INIT_DEBUGOUT("free_transmit_ring: begin");
3722
3723         if (txr->tx_buffers == NULL)
3724                 return;
3725
3726         tx_buffer = txr->tx_buffers;
3727         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3728                 if (tx_buffer->m_head != NULL) {
3729                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3730                             BUS_DMASYNC_POSTWRITE);
3731                         bus_dmamap_unload(txr->txtag,
3732                             tx_buffer->map);
3733                         m_freem(tx_buffer->m_head);
3734                         tx_buffer->m_head = NULL;
3735                         if (tx_buffer->map != NULL) {
3736                                 bus_dmamap_destroy(txr->txtag,
3737                                     tx_buffer->map);
3738                                 tx_buffer->map = NULL;
3739                         }
3740                 } else if (tx_buffer->map != NULL) {
3741                         bus_dmamap_unload(txr->txtag,
3742                             tx_buffer->map);
3743                         bus_dmamap_destroy(txr->txtag,
3744                             tx_buffer->map);
3745                         tx_buffer->map = NULL;
3746                 }
3747         }
3748 #ifndef IGB_LEGACY_TX
3749         if (txr->br != NULL)
3750                 buf_ring_free(txr->br, M_DEVBUF);
3751 #endif
3752         if (txr->tx_buffers != NULL) {
3753                 free(txr->tx_buffers, M_DEVBUF);
3754                 txr->tx_buffers = NULL;
3755         }
3756         if (txr->txtag != NULL) {
3757                 bus_dma_tag_destroy(txr->txtag);
3758                 txr->txtag = NULL;
3759         }
3760         return;
3761 }
3762
3763 /**********************************************************************
3764  *
3765  *  Setup work for hardware segmentation offload (TSO) on
3766  *  adapters using advanced tx descriptors
3767  *
3768  **********************************************************************/
3769 static int
3770 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3771     u32 *cmd_type_len, u32 *olinfo_status)
3772 {
3773         struct adapter *adapter = txr->adapter;
3774         struct e1000_adv_tx_context_desc *TXD;
3775         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3776         u32 mss_l4len_idx = 0, paylen;
3777         u16 vtag = 0, eh_type;
3778         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3779         struct ether_vlan_header *eh;
3780 #ifdef INET6
3781         struct ip6_hdr *ip6;
3782 #endif
3783 #ifdef INET
3784         struct ip *ip;
3785 #endif
3786         struct tcphdr *th;
3787
3788
3789         /*
3790          * Determine where frame payload starts.
3791          * Jump over vlan headers if already present
3792          */
3793         eh = mtod(mp, struct ether_vlan_header *);
3794         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3795                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3796                 eh_type = eh->evl_proto;
3797         } else {
3798                 ehdrlen = ETHER_HDR_LEN;
3799                 eh_type = eh->evl_encap_proto;
3800         }
3801
3802         switch (ntohs(eh_type)) {
3803 #ifdef INET6
3804         case ETHERTYPE_IPV6:
3805                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3806                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3807                 if (ip6->ip6_nxt != IPPROTO_TCP)
3808                         return (ENXIO);
3809                 ip_hlen = sizeof(struct ip6_hdr);
3810                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3811                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3812                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3813                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3814                 break;
3815 #endif
3816 #ifdef INET
3817         case ETHERTYPE_IP:
3818                 ip = (struct ip *)(mp->m_data + ehdrlen);
3819                 if (ip->ip_p != IPPROTO_TCP)
3820                         return (ENXIO);
3821                 ip->ip_sum = 0;
3822                 ip_hlen = ip->ip_hl << 2;
3823                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3824                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3825                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3826                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3827                 /* Tell transmit desc to also do IPv4 checksum. */
3828                 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3829                 break;
3830 #endif
3831         default:
3832                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3833                     __func__, ntohs(eh_type));
3834                 break;
3835         }
3836
3837         ctxd = txr->next_avail_desc;
3838         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3839
3840         tcp_hlen = th->th_off << 2;
3841
3842         /* This is used in the transmit desc in encap */
3843         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3844
3845         /* VLAN MACLEN IPLEN */
3846         if (mp->m_flags & M_VLANTAG) {
3847                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3848                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3849         }
3850
3851         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3852         vlan_macip_lens |= ip_hlen;
3853         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3854
3855         /* ADV DTYPE TUCMD */
3856         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3857         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3858         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3859
3860         /* MSS L4LEN IDX */
3861         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3862         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3863         /* 82575 needs the queue index added */
3864         if (adapter->hw.mac.type == e1000_82575)
3865                 mss_l4len_idx |= txr->me << 4;
3866         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3867
3868         TXD->seqnum_seed = htole32(0);
3869
3870         if (++ctxd == txr->num_desc)
3871                 ctxd = 0;
3872
3873         txr->tx_avail--;
3874         txr->next_avail_desc = ctxd;
3875         *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3876         *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3877         *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3878         ++txr->tso_tx;
3879         return (0);
3880 }
3881
3882 /*********************************************************************
3883  *
3884  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3885  *
3886  **********************************************************************/
3887
3888 static int
3889 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3890     u32 *cmd_type_len, u32 *olinfo_status)
3891 {
3892         struct e1000_adv_tx_context_desc *TXD;
3893         struct adapter *adapter = txr->adapter;
3894         struct ether_vlan_header *eh;
3895         struct ip *ip;
3896         struct ip6_hdr *ip6;
3897         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3898         int     ehdrlen, ip_hlen = 0;
3899         u16     etype;
3900         u8      ipproto = 0;
3901         int     offload = TRUE;
3902         int     ctxd = txr->next_avail_desc;
3903         u16     vtag = 0;
3904
3905         /* First check if TSO is to be used */
3906         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3907                 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3908
3909         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3910                 offload = FALSE;
3911
3912         /* Indicate the whole packet as payload when not doing TSO */
3913         *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3914
3915         /* Now ready a context descriptor */
3916         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3917
3918         /*
3919         ** In advanced descriptors the vlan tag must 
3920         ** be placed into the context descriptor. Hence
3921         ** we need to make one even if not doing offloads.
3922         */
3923         if (mp->m_flags & M_VLANTAG) {
3924                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3925                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3926         } else if (offload == FALSE) /* ... no offload to do */
3927                 return (0);
3928
3929         /*
3930          * Determine where frame payload starts.
3931          * Jump over vlan headers if already present,
3932          * helpful for QinQ too.
3933          */
3934         eh = mtod(mp, struct ether_vlan_header *);
3935         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3936                 etype = ntohs(eh->evl_proto);
3937                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3938         } else {
3939                 etype = ntohs(eh->evl_encap_proto);
3940                 ehdrlen = ETHER_HDR_LEN;
3941         }
3942
3943         /* Set the ether header length */
3944         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3945
3946         switch (etype) {
3947                 case ETHERTYPE_IP:
3948                         ip = (struct ip *)(mp->m_data + ehdrlen);
3949                         ip_hlen = ip->ip_hl << 2;
3950                         ipproto = ip->ip_p;
3951                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3952                         break;
3953                 case ETHERTYPE_IPV6:
3954                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3955                         ip_hlen = sizeof(struct ip6_hdr);
3956                         /* XXX-BZ this will go badly in case of ext hdrs. */
3957                         ipproto = ip6->ip6_nxt;
3958                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3959                         break;
3960                 default:
3961                         offload = FALSE;
3962                         break;
3963         }
3964
3965         vlan_macip_lens |= ip_hlen;
3966         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3967
3968         switch (ipproto) {
3969                 case IPPROTO_TCP:
3970 #if __FreeBSD_version >= 1000000
3971                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
3972 #else
3973                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3974 #endif
3975                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3976                         break;
3977                 case IPPROTO_UDP:
3978 #if __FreeBSD_version >= 1000000
3979                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
3980 #else
3981                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3982 #endif
3983                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3984                         break;
3985
3986 #if __FreeBSD_version >= 800000
3987                 case IPPROTO_SCTP:
3988 #if __FreeBSD_version >= 1000000
3989                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
3990 #else
3991                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3992 #endif
3993                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3994                         break;
3995 #endif
3996                 default:
3997                         offload = FALSE;
3998                         break;
3999         }
4000
4001         if (offload) /* For the TX descriptor setup */
4002                 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4003
4004         /* 82575 needs the queue index added */
4005         if (adapter->hw.mac.type == e1000_82575)
4006                 mss_l4len_idx = txr->me << 4;
4007
4008         /* Now copy bits into descriptor */
4009         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
4010         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
4011         TXD->seqnum_seed = htole32(0);
4012         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
4013
4014         /* We've consumed the first desc, adjust counters */
4015         if (++ctxd == txr->num_desc)
4016                 ctxd = 0;
4017         txr->next_avail_desc = ctxd;
4018         --txr->tx_avail;
4019
4020         return (0);
4021 }
4022
4023 /**********************************************************************
4024  *
4025  *  Examine each tx_buffer in the used queue. If the hardware is done
4026  *  processing the packet then free associated resources. The
4027  *  tx_buffer is put back on the free queue.
4028  *
4029  *  TRUE return means there's work in the ring to clean, FALSE its empty.
4030  **********************************************************************/
4031 static bool
4032 igb_txeof(struct tx_ring *txr)
4033 {
4034         struct adapter          *adapter = txr->adapter;
4035 #ifdef DEV_NETMAP
4036         struct ifnet            *ifp = adapter->ifp;
4037 #endif /* DEV_NETMAP */
4038         u32                     work, processed = 0;
4039         int                     limit = adapter->tx_process_limit;
4040         struct igb_tx_buf       *buf;
4041         union e1000_adv_tx_desc *txd;
4042
4043         mtx_assert(&txr->tx_mtx, MA_OWNED);
4044
4045 #ifdef DEV_NETMAP
4046         if (netmap_tx_irq(ifp, txr->me))
4047                 return (FALSE);
4048 #endif /* DEV_NETMAP */
4049
4050         if (txr->tx_avail == txr->num_desc) {
4051                 txr->queue_status = IGB_QUEUE_IDLE;
4052                 return FALSE;
4053         }
4054
4055         /* Get work starting point */
4056         work = txr->next_to_clean;
4057         buf = &txr->tx_buffers[work];
4058         txd = &txr->tx_base[work];
4059         work -= txr->num_desc; /* The distance to ring end */
4060         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4061             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4062         do {
4063                 union e1000_adv_tx_desc *eop = buf->eop;
4064                 if (eop == NULL) /* No work */
4065                         break;
4066
4067                 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4068                         break;  /* I/O not complete */
4069
4070                 if (buf->m_head) {
4071                         txr->bytes +=
4072                             buf->m_head->m_pkthdr.len;
4073                         bus_dmamap_sync(txr->txtag,
4074                             buf->map,
4075                             BUS_DMASYNC_POSTWRITE);
4076                         bus_dmamap_unload(txr->txtag,
4077                             buf->map);
4078                         m_freem(buf->m_head);
4079                         buf->m_head = NULL;
4080                 }
4081                 buf->eop = NULL;
4082                 ++txr->tx_avail;
4083
4084                 /* We clean the range if multi segment */
4085                 while (txd != eop) {
4086                         ++txd;
4087                         ++buf;
4088                         ++work;
4089                         /* wrap the ring? */
4090                         if (__predict_false(!work)) {
4091                                 work -= txr->num_desc;
4092                                 buf = txr->tx_buffers;
4093                                 txd = txr->tx_base;
4094                         }
4095                         if (buf->m_head) {
4096                                 txr->bytes +=
4097                                     buf->m_head->m_pkthdr.len;
4098                                 bus_dmamap_sync(txr->txtag,
4099                                     buf->map,
4100                                     BUS_DMASYNC_POSTWRITE);
4101                                 bus_dmamap_unload(txr->txtag,
4102                                     buf->map);
4103                                 m_freem(buf->m_head);
4104                                 buf->m_head = NULL;
4105                         }
4106                         ++txr->tx_avail;
4107                         buf->eop = NULL;
4108
4109                 }
4110                 ++txr->packets;
4111                 ++processed;
4112                 txr->watchdog_time = ticks;
4113
4114                 /* Try the next packet */
4115                 ++txd;
4116                 ++buf;
4117                 ++work;
4118                 /* reset with a wrap */
4119                 if (__predict_false(!work)) {
4120                         work -= txr->num_desc;
4121                         buf = txr->tx_buffers;
4122                         txd = txr->tx_base;
4123                 }
4124                 prefetch(txd);
4125         } while (__predict_true(--limit));
4126
4127         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4128             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4129
4130         work += txr->num_desc;
4131         txr->next_to_clean = work;
4132
4133         /*
4134         ** Watchdog calculation, we know there's
4135         ** work outstanding or the first return
4136         ** would have been taken, so none processed
4137         ** for too long indicates a hang.
4138         */
4139         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4140                 txr->queue_status |= IGB_QUEUE_HUNG;
4141
4142         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4143                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;       
4144
4145         if (txr->tx_avail == txr->num_desc) {
4146                 txr->queue_status = IGB_QUEUE_IDLE;
4147                 return (FALSE);
4148         }
4149
4150         return (TRUE);
4151 }
4152
4153 /*********************************************************************
4154  *
4155  *  Refresh mbuf buffers for RX descriptor rings
4156  *   - now keeps its own state so discards due to resource
4157  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4158  *     it just returns, keeping its placeholder, thus it can simply
4159  *     be recalled to try again.
4160  *
4161  **********************************************************************/
4162 static void
4163 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4164 {
4165         struct adapter          *adapter = rxr->adapter;
4166         bus_dma_segment_t       hseg[1];
4167         bus_dma_segment_t       pseg[1];
4168         struct igb_rx_buf       *rxbuf;
4169         struct mbuf             *mh, *mp;
4170         int                     i, j, nsegs, error;
4171         bool                    refreshed = FALSE;
4172
4173         i = j = rxr->next_to_refresh;
4174         /*
4175         ** Get one descriptor beyond
4176         ** our work mark to control
4177         ** the loop.
4178         */
4179         if (++j == adapter->num_rx_desc)
4180                 j = 0;
4181
4182         while (j != limit) {
4183                 rxbuf = &rxr->rx_buffers[i];
4184                 /* No hdr mbuf used with header split off */
4185                 if (rxr->hdr_split == FALSE)
4186                         goto no_split;
4187                 if (rxbuf->m_head == NULL) {
4188                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4189                         if (mh == NULL)
4190                                 goto update;
4191                 } else
4192                         mh = rxbuf->m_head;
4193
4194                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4195                 mh->m_len = MHLEN;
4196                 mh->m_flags |= M_PKTHDR;
4197                 /* Get the memory mapping */
4198                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4199                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4200                 if (error != 0) {
4201                         printf("Refresh mbufs: hdr dmamap load"
4202                             " failure - %d\n", error);
4203                         m_free(mh);
4204                         rxbuf->m_head = NULL;
4205                         goto update;
4206                 }
4207                 rxbuf->m_head = mh;
4208                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4209                     BUS_DMASYNC_PREREAD);
4210                 rxr->rx_base[i].read.hdr_addr =
4211                     htole64(hseg[0].ds_addr);
4212 no_split:
4213                 if (rxbuf->m_pack == NULL) {
4214                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4215                             M_PKTHDR, adapter->rx_mbuf_sz);
4216                         if (mp == NULL)
4217                                 goto update;
4218                 } else
4219                         mp = rxbuf->m_pack;
4220
4221                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4222                 /* Get the memory mapping */
4223                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4224                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4225                 if (error != 0) {
4226                         printf("Refresh mbufs: payload dmamap load"
4227                             " failure - %d\n", error);
4228                         m_free(mp);
4229                         rxbuf->m_pack = NULL;
4230                         goto update;
4231                 }
4232                 rxbuf->m_pack = mp;
4233                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4234                     BUS_DMASYNC_PREREAD);
4235                 rxr->rx_base[i].read.pkt_addr =
4236                     htole64(pseg[0].ds_addr);
4237                 refreshed = TRUE; /* I feel wefreshed :) */
4238
4239                 i = j; /* our next is precalculated */
4240                 rxr->next_to_refresh = i;
4241                 if (++j == adapter->num_rx_desc)
4242                         j = 0;
4243         }
4244 update:
4245         if (refreshed) /* update tail */
4246                 E1000_WRITE_REG(&adapter->hw,
4247                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4248         return;
4249 }
4250
4251
4252 /*********************************************************************
4253  *
4254  *  Allocate memory for rx_buffer structures. Since we use one
4255  *  rx_buffer per received packet, the maximum number of rx_buffer's
4256  *  that we'll need is equal to the number of receive descriptors
4257  *  that we've allocated.
4258  *
4259  **********************************************************************/
4260 static int
4261 igb_allocate_receive_buffers(struct rx_ring *rxr)
4262 {
4263         struct  adapter         *adapter = rxr->adapter;
4264         device_t                dev = adapter->dev;
4265         struct igb_rx_buf       *rxbuf;
4266         int                     i, bsize, error;
4267
4268         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4269         if (!(rxr->rx_buffers =
4270             (struct igb_rx_buf *) malloc(bsize,
4271             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4272                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4273                 error = ENOMEM;
4274                 goto fail;
4275         }
4276
4277         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4278                                    1, 0,                /* alignment, bounds */
4279                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4280                                    BUS_SPACE_MAXADDR,   /* highaddr */
4281                                    NULL, NULL,          /* filter, filterarg */
4282                                    MSIZE,               /* maxsize */
4283                                    1,                   /* nsegments */
4284                                    MSIZE,               /* maxsegsize */
4285                                    0,                   /* flags */
4286                                    NULL,                /* lockfunc */
4287                                    NULL,                /* lockfuncarg */
4288                                    &rxr->htag))) {
4289                 device_printf(dev, "Unable to create RX DMA tag\n");
4290                 goto fail;
4291         }
4292
4293         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4294                                    1, 0,                /* alignment, bounds */
4295                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4296                                    BUS_SPACE_MAXADDR,   /* highaddr */
4297                                    NULL, NULL,          /* filter, filterarg */
4298                                    MJUM9BYTES,          /* maxsize */
4299                                    1,                   /* nsegments */
4300                                    MJUM9BYTES,          /* maxsegsize */
4301                                    0,                   /* flags */
4302                                    NULL,                /* lockfunc */
4303                                    NULL,                /* lockfuncarg */
4304                                    &rxr->ptag))) {
4305                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4306                 goto fail;
4307         }
4308
4309         for (i = 0; i < adapter->num_rx_desc; i++) {
4310                 rxbuf = &rxr->rx_buffers[i];
4311                 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4312                 if (error) {
4313                         device_printf(dev,
4314                             "Unable to create RX head DMA maps\n");
4315                         goto fail;
4316                 }
4317                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4318                 if (error) {
4319                         device_printf(dev,
4320                             "Unable to create RX packet DMA maps\n");
4321                         goto fail;
4322                 }
4323         }
4324
4325         return (0);
4326
4327 fail:
4328         /* Frees all, but can handle partial completion */
4329         igb_free_receive_structures(adapter);
4330         return (error);
4331 }
4332
4333
4334 static void
4335 igb_free_receive_ring(struct rx_ring *rxr)
4336 {
4337         struct  adapter         *adapter = rxr->adapter;
4338         struct igb_rx_buf       *rxbuf;
4339
4340
4341         for (int i = 0; i < adapter->num_rx_desc; i++) {
4342                 rxbuf = &rxr->rx_buffers[i];
4343                 if (rxbuf->m_head != NULL) {
4344                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4345                             BUS_DMASYNC_POSTREAD);
4346                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4347                         rxbuf->m_head->m_flags |= M_PKTHDR;
4348                         m_freem(rxbuf->m_head);
4349                 }
4350                 if (rxbuf->m_pack != NULL) {
4351                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4352                             BUS_DMASYNC_POSTREAD);
4353                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4354                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4355                         m_freem(rxbuf->m_pack);
4356                 }
4357                 rxbuf->m_head = NULL;
4358                 rxbuf->m_pack = NULL;
4359         }
4360 }
4361
4362
4363 /*********************************************************************
4364  *
4365  *  Initialize a receive ring and its buffers.
4366  *
4367  **********************************************************************/
4368 static int
4369 igb_setup_receive_ring(struct rx_ring *rxr)
4370 {
4371         struct  adapter         *adapter;
4372         struct  ifnet           *ifp;
4373         device_t                dev;
4374         struct igb_rx_buf       *rxbuf;
4375         bus_dma_segment_t       pseg[1], hseg[1];
4376         struct lro_ctrl         *lro = &rxr->lro;
4377         int                     rsize, nsegs, error = 0;
4378 #ifdef DEV_NETMAP
4379         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4380         struct netmap_slot *slot;
4381 #endif /* DEV_NETMAP */
4382
4383         adapter = rxr->adapter;
4384         dev = adapter->dev;
4385         ifp = adapter->ifp;
4386
4387         /* Clear the ring contents */
4388         IGB_RX_LOCK(rxr);
4389 #ifdef DEV_NETMAP
4390         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4391 #endif /* DEV_NETMAP */
4392         rsize = roundup2(adapter->num_rx_desc *
4393             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4394         bzero((void *)rxr->rx_base, rsize);
4395
4396         /*
4397         ** Free current RX buffer structures and their mbufs
4398         */
4399         igb_free_receive_ring(rxr);
4400
4401         /* Configure for header split? */
4402         if (igb_header_split)
4403                 rxr->hdr_split = TRUE;
4404
4405         /* Now replenish the ring mbufs */
4406         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4407                 struct mbuf     *mh, *mp;
4408
4409                 rxbuf = &rxr->rx_buffers[j];
4410 #ifdef DEV_NETMAP
4411                 if (slot) {
4412                         /* slot sj is mapped to the j-th NIC-ring entry */
4413                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4414                         uint64_t paddr;
4415                         void *addr;
4416
4417                         addr = PNMB(na, slot + sj, &paddr);
4418                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4419                         /* Update descriptor */
4420                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4421                         continue;
4422                 }
4423 #endif /* DEV_NETMAP */
4424                 if (rxr->hdr_split == FALSE)
4425                         goto skip_head;
4426
4427                 /* First the header */
4428                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4429                 if (rxbuf->m_head == NULL) {
4430                         error = ENOBUFS;
4431                         goto fail;
4432                 }
4433                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4434                 mh = rxbuf->m_head;
4435                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4436                 mh->m_flags |= M_PKTHDR;
4437                 /* Get the memory mapping */
4438                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4439                     rxbuf->hmap, rxbuf->m_head, hseg,
4440                     &nsegs, BUS_DMA_NOWAIT);
4441                 if (error != 0) /* Nothing elegant to do here */
4442                         goto fail;
4443                 bus_dmamap_sync(rxr->htag,
4444                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4445                 /* Update descriptor */
4446                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4447
4448 skip_head:
4449                 /* Now the payload cluster */
4450                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4451                     M_PKTHDR, adapter->rx_mbuf_sz);
4452                 if (rxbuf->m_pack == NULL) {
4453                         error = ENOBUFS;
4454                         goto fail;
4455                 }
4456                 mp = rxbuf->m_pack;
4457                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4458                 /* Get the memory mapping */
4459                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4460                     rxbuf->pmap, mp, pseg,
4461                     &nsegs, BUS_DMA_NOWAIT);
4462                 if (error != 0)
4463                         goto fail;
4464                 bus_dmamap_sync(rxr->ptag,
4465                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4466                 /* Update descriptor */
4467                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4468         }
4469
4470         /* Setup our descriptor indices */
4471         rxr->next_to_check = 0;
4472         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4473         rxr->lro_enabled = FALSE;
4474         rxr->rx_split_packets = 0;
4475         rxr->rx_bytes = 0;
4476
4477         rxr->fmp = NULL;
4478         rxr->lmp = NULL;
4479
4480         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4481             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4482
4483         /*
4484         ** Now set up the LRO interface, we
4485         ** also only do head split when LRO
4486         ** is enabled, since so often they
4487         ** are undesirable in similar setups.
4488         */
4489         if (ifp->if_capenable & IFCAP_LRO) {
4490                 error = tcp_lro_init(lro);
4491                 if (error) {
4492                         device_printf(dev, "LRO Initialization failed!\n");
4493                         goto fail;
4494                 }
4495                 INIT_DEBUGOUT("RX LRO Initialized\n");
4496                 rxr->lro_enabled = TRUE;
4497                 lro->ifp = adapter->ifp;
4498         }
4499
4500         IGB_RX_UNLOCK(rxr);
4501         return (0);
4502
4503 fail:
4504         igb_free_receive_ring(rxr);
4505         IGB_RX_UNLOCK(rxr);
4506         return (error);
4507 }
4508
4509
4510 /*********************************************************************
4511  *
4512  *  Initialize all receive rings.
4513  *
4514  **********************************************************************/
4515 static int
4516 igb_setup_receive_structures(struct adapter *adapter)
4517 {
4518         struct rx_ring *rxr = adapter->rx_rings;
4519         int i;
4520
4521         for (i = 0; i < adapter->num_queues; i++, rxr++)
4522                 if (igb_setup_receive_ring(rxr))
4523                         goto fail;
4524
4525         return (0);
4526 fail:
4527         /*
4528          * Free RX buffers allocated so far, we will only handle
4529          * the rings that completed, the failing case will have
4530          * cleaned up for itself. 'i' is the endpoint.
4531          */
4532         for (int j = 0; j < i; ++j) {
4533                 rxr = &adapter->rx_rings[j];
4534                 IGB_RX_LOCK(rxr);
4535                 igb_free_receive_ring(rxr);
4536                 IGB_RX_UNLOCK(rxr);
4537         }
4538
4539         return (ENOBUFS);
4540 }
4541
4542 /*
4543  * Initialise the RSS mapping for NICs that support multiple transmit/
4544  * receive rings.
4545  */
4546 static void
4547 igb_initialise_rss_mapping(struct adapter *adapter)
4548 {
4549         struct e1000_hw *hw = &adapter->hw;
4550         int i;
4551         int queue_id;
4552         u32 reta;
4553         u32 rss_key[10], mrqc, shift = 0;
4554
4555         /* XXX? */
4556         if (adapter->hw.mac.type == e1000_82575)
4557                 shift = 6;
4558
4559         /*
4560          * The redirection table controls which destination
4561          * queue each bucket redirects traffic to.
4562          * Each DWORD represents four queues, with the LSB
4563          * being the first queue in the DWORD.
4564          *
4565          * This just allocates buckets to queues using round-robin
4566          * allocation.
4567          *
4568          * NOTE: It Just Happens to line up with the default
4569          * RSS allocation method.
4570          */
4571
4572         /* Warning FM follows */
4573         reta = 0;
4574         for (i = 0; i < 128; i++) {
4575 #ifdef  RSS
4576                 queue_id = rss_get_indirection_to_bucket(i);
4577                 /*
4578                  * If we have more queues than buckets, we'll
4579                  * end up mapping buckets to a subset of the
4580                  * queues.
4581                  *
4582                  * If we have more buckets than queues, we'll
4583                  * end up instead assigning multiple buckets
4584                  * to queues.
4585                  *
4586                  * Both are suboptimal, but we need to handle
4587                  * the case so we don't go out of bounds
4588                  * indexing arrays and such.
4589                  */
4590                 queue_id = queue_id % adapter->num_queues;
4591 #else
4592                 queue_id = (i % adapter->num_queues);
4593 #endif
4594                 /* Adjust if required */
4595                 queue_id = queue_id << shift;
4596
4597                 /*
4598                  * The low 8 bits are for hash value (n+0);
4599                  * The next 8 bits are for hash value (n+1), etc.
4600                  */
4601                 reta = reta >> 8;
4602                 reta = reta | ( ((uint32_t) queue_id) << 24);
4603                 if ((i & 3) == 3) {
4604                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4605                         reta = 0;
4606                 }
4607         }
4608
4609         /* Now fill in hash table */
4610
4611         /*
4612          * MRQC: Multiple Receive Queues Command
4613          * Set queuing to RSS control, number depends on the device.
4614          */
4615         mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4616
4617 #ifdef  RSS
4618         /* XXX ew typecasting */
4619         rss_getkey((uint8_t *) &rss_key);
4620 #else
4621         arc4rand(&rss_key, sizeof(rss_key), 0);
4622 #endif
4623         for (i = 0; i < 10; i++)
4624                 E1000_WRITE_REG_ARRAY(hw,
4625                     E1000_RSSRK(0), i, rss_key[i]);
4626
4627         /*
4628          * Configure the RSS fields to hash upon.
4629          */
4630         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4631             E1000_MRQC_RSS_FIELD_IPV4_TCP);
4632         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4633             E1000_MRQC_RSS_FIELD_IPV6_TCP);
4634         mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4635             E1000_MRQC_RSS_FIELD_IPV6_UDP);
4636         mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4637             E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4638
4639         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4640 }
4641
4642 /*********************************************************************
4643  *
4644  *  Enable receive unit.
4645  *
4646  **********************************************************************/
4647 static void
4648 igb_initialize_receive_units(struct adapter *adapter)
4649 {
4650         struct rx_ring  *rxr = adapter->rx_rings;
4651         struct ifnet    *ifp = adapter->ifp;
4652         struct e1000_hw *hw = &adapter->hw;
4653         u32             rctl, rxcsum, psize, srrctl = 0;
4654
4655         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4656
4657         /*
4658          * Make sure receives are disabled while setting
4659          * up the descriptor ring
4660          */
4661         rctl = E1000_READ_REG(hw, E1000_RCTL);
4662         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4663
4664         /*
4665         ** Set up for header split
4666         */
4667         if (igb_header_split) {
4668                 /* Use a standard mbuf for the header */
4669                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4670                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4671         } else
4672                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4673
4674         /*
4675         ** Set up for jumbo frames
4676         */
4677         if (ifp->if_mtu > ETHERMTU) {
4678                 rctl |= E1000_RCTL_LPE;
4679                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4680                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4681                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4682                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4683                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4684                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4685                 }
4686                 /* Set maximum packet len */
4687                 psize = adapter->max_frame_size;
4688                 /* are we on a vlan? */
4689                 if (adapter->ifp->if_vlantrunk != NULL)
4690                         psize += VLAN_TAG_SIZE;
4691                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4692         } else {
4693                 rctl &= ~E1000_RCTL_LPE;
4694                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4695                 rctl |= E1000_RCTL_SZ_2048;
4696         }
4697
4698         /*
4699          * If TX flow control is disabled and there's >1 queue defined,
4700          * enable DROP.
4701          *
4702          * This drops frames rather than hanging the RX MAC for all queues.
4703          */
4704         if ((adapter->num_queues > 1) &&
4705             (adapter->fc == e1000_fc_none ||
4706              adapter->fc == e1000_fc_rx_pause)) {
4707                 srrctl |= E1000_SRRCTL_DROP_EN;
4708         }
4709
4710         /* Setup the Base and Length of the Rx Descriptor Rings */
4711         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4712                 u64 bus_addr = rxr->rxdma.dma_paddr;
4713                 u32 rxdctl;
4714
4715                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4716                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4717                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4718                     (uint32_t)(bus_addr >> 32));
4719                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4720                     (uint32_t)bus_addr);
4721                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4722                 /* Enable this Queue */
4723                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4724                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4725                 rxdctl &= 0xFFF00000;
4726                 rxdctl |= IGB_RX_PTHRESH;
4727                 rxdctl |= IGB_RX_HTHRESH << 8;
4728                 rxdctl |= IGB_RX_WTHRESH << 16;
4729                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4730         }
4731
4732         /*
4733         ** Setup for RX MultiQueue
4734         */
4735         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4736         if (adapter->num_queues >1) {
4737
4738                 /* rss setup */
4739                 igb_initialise_rss_mapping(adapter);
4740
4741                 /*
4742                 ** NOTE: Receive Full-Packet Checksum Offload 
4743                 ** is mutually exclusive with Multiqueue. However
4744                 ** this is not the same as TCP/IP checksums which
4745                 ** still work.
4746                 */
4747                 rxcsum |= E1000_RXCSUM_PCSD;
4748 #if __FreeBSD_version >= 800000
4749                 /* For SCTP Offload */
4750                 if ((hw->mac.type != e1000_82575) &&
4751                     (ifp->if_capenable & IFCAP_RXCSUM))
4752                         rxcsum |= E1000_RXCSUM_CRCOFL;
4753 #endif
4754         } else {
4755                 /* Non RSS setup */
4756                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4757                         rxcsum |= E1000_RXCSUM_IPPCSE;
4758 #if __FreeBSD_version >= 800000
4759                         if (adapter->hw.mac.type != e1000_82575)
4760                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4761 #endif
4762                 } else
4763                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4764         }
4765         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4766
4767         /* Setup the Receive Control Register */
4768         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4769         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4770                    E1000_RCTL_RDMTS_HALF |
4771                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4772         /* Strip CRC bytes. */
4773         rctl |= E1000_RCTL_SECRC;
4774         /* Make sure VLAN Filters are off */
4775         rctl &= ~E1000_RCTL_VFE;
4776         /* Don't store bad packets */
4777         rctl &= ~E1000_RCTL_SBP;
4778
4779         /* Enable Receives */
4780         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4781
4782         /*
4783          * Setup the HW Rx Head and Tail Descriptor Pointers
4784          *   - needs to be after enable
4785          */
4786         for (int i = 0; i < adapter->num_queues; i++) {
4787                 rxr = &adapter->rx_rings[i];
4788                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4789 #ifdef DEV_NETMAP
4790                 /*
4791                  * an init() while a netmap client is active must
4792                  * preserve the rx buffers passed to userspace.
4793                  * In this driver it means we adjust RDT to
4794                  * something different from next_to_refresh
4795                  * (which is not used in netmap mode).
4796                  */
4797                 if (ifp->if_capenable & IFCAP_NETMAP) {
4798                         struct netmap_adapter *na = NA(adapter->ifp);
4799                         struct netmap_kring *kring = &na->rx_rings[i];
4800                         int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4801
4802                         if (t >= adapter->num_rx_desc)
4803                                 t -= adapter->num_rx_desc;
4804                         else if (t < 0)
4805                                 t += adapter->num_rx_desc;
4806                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4807                 } else
4808 #endif /* DEV_NETMAP */
4809                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4810         }
4811         return;
4812 }
4813
4814 /*********************************************************************
4815  *
4816  *  Free receive rings.
4817  *
4818  **********************************************************************/
4819 static void
4820 igb_free_receive_structures(struct adapter *adapter)
4821 {
4822         struct rx_ring *rxr = adapter->rx_rings;
4823
4824         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4825                 struct lro_ctrl *lro = &rxr->lro;
4826                 igb_free_receive_buffers(rxr);
4827                 tcp_lro_free(lro);
4828                 igb_dma_free(adapter, &rxr->rxdma);
4829         }
4830
4831         free(adapter->rx_rings, M_DEVBUF);
4832 }
4833
4834 /*********************************************************************
4835  *
4836  *  Free receive ring data structures.
4837  *
4838  **********************************************************************/
4839 static void
4840 igb_free_receive_buffers(struct rx_ring *rxr)
4841 {
4842         struct adapter          *adapter = rxr->adapter;
4843         struct igb_rx_buf       *rxbuf;
4844         int i;
4845
4846         INIT_DEBUGOUT("free_receive_structures: begin");
4847
4848         /* Cleanup any existing buffers */
4849         if (rxr->rx_buffers != NULL) {
4850                 for (i = 0; i < adapter->num_rx_desc; i++) {
4851                         rxbuf = &rxr->rx_buffers[i];
4852                         if (rxbuf->m_head != NULL) {
4853                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4854                                     BUS_DMASYNC_POSTREAD);
4855                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4856                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4857                                 m_freem(rxbuf->m_head);
4858                         }
4859                         if (rxbuf->m_pack != NULL) {
4860                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4861                                     BUS_DMASYNC_POSTREAD);
4862                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4863                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4864                                 m_freem(rxbuf->m_pack);
4865                         }
4866                         rxbuf->m_head = NULL;
4867                         rxbuf->m_pack = NULL;
4868                         if (rxbuf->hmap != NULL) {
4869                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4870                                 rxbuf->hmap = NULL;
4871                         }
4872                         if (rxbuf->pmap != NULL) {
4873                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4874                                 rxbuf->pmap = NULL;
4875                         }
4876                 }
4877                 if (rxr->rx_buffers != NULL) {
4878                         free(rxr->rx_buffers, M_DEVBUF);
4879                         rxr->rx_buffers = NULL;
4880                 }
4881         }
4882
4883         if (rxr->htag != NULL) {
4884                 bus_dma_tag_destroy(rxr->htag);
4885                 rxr->htag = NULL;
4886         }
4887         if (rxr->ptag != NULL) {
4888                 bus_dma_tag_destroy(rxr->ptag);
4889                 rxr->ptag = NULL;
4890         }
4891 }
4892
4893 static __inline void
4894 igb_rx_discard(struct rx_ring *rxr, int i)
4895 {
4896         struct igb_rx_buf       *rbuf;
4897
4898         rbuf = &rxr->rx_buffers[i];
4899
4900         /* Partially received? Free the chain */
4901         if (rxr->fmp != NULL) {
4902                 rxr->fmp->m_flags |= M_PKTHDR;
4903                 m_freem(rxr->fmp);
4904                 rxr->fmp = NULL;
4905                 rxr->lmp = NULL;
4906         }
4907
4908         /*
4909         ** With advanced descriptors the writeback
4910         ** clobbers the buffer addrs, so its easier
4911         ** to just free the existing mbufs and take
4912         ** the normal refresh path to get new buffers
4913         ** and mapping.
4914         */
4915         if (rbuf->m_head) {
4916                 m_free(rbuf->m_head);
4917                 rbuf->m_head = NULL;
4918                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4919         }
4920
4921         if (rbuf->m_pack) {
4922                 m_free(rbuf->m_pack);
4923                 rbuf->m_pack = NULL;
4924                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4925         }
4926
4927         return;
4928 }
4929
4930 static __inline void
4931 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4932 {
4933
4934         /*
4935          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4936          * should be computed by hardware. Also it should not have VLAN tag in
4937          * ethernet header.
4938          */
4939         if (rxr->lro_enabled &&
4940             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4941             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4942             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4943             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4944             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4945             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4946                 /*
4947                  * Send to the stack if:
4948                  **  - LRO not enabled, or
4949                  **  - no LRO resources, or
4950                  **  - lro enqueue fails
4951                  */
4952                 if (rxr->lro.lro_cnt != 0)
4953                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4954                                 return;
4955         }
4956         IGB_RX_UNLOCK(rxr);
4957         (*ifp->if_input)(ifp, m);
4958         IGB_RX_LOCK(rxr);
4959 }
4960
4961 /*********************************************************************
4962  *
4963  *  This routine executes in interrupt context. It replenishes
4964  *  the mbufs in the descriptor and sends data which has been
4965  *  dma'ed into host memory to upper layer.
4966  *
4967  *  We loop at most count times if count is > 0, or until done if
4968  *  count < 0.
4969  *
4970  *  Return TRUE if more to clean, FALSE otherwise
4971  *********************************************************************/
4972 static bool
4973 igb_rxeof(struct igb_queue *que, int count, int *done)
4974 {
4975         struct adapter          *adapter = que->adapter;
4976         struct rx_ring          *rxr = que->rxr;
4977         struct ifnet            *ifp = adapter->ifp;
4978         struct lro_ctrl         *lro = &rxr->lro;
4979         int                     i, processed = 0, rxdone = 0;
4980         u32                     ptype, staterr = 0;
4981         union e1000_adv_rx_desc *cur;
4982
4983         IGB_RX_LOCK(rxr);
4984         /* Sync the ring. */
4985         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4986             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4987
4988 #ifdef DEV_NETMAP
4989         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4990                 IGB_RX_UNLOCK(rxr);
4991                 return (FALSE);
4992         }
4993 #endif /* DEV_NETMAP */
4994
4995         /* Main clean loop */
4996         for (i = rxr->next_to_check; count != 0;) {
4997                 struct mbuf             *sendmp, *mh, *mp;
4998                 struct igb_rx_buf       *rxbuf;
4999                 u16                     hlen, plen, hdr, vtag, pkt_info;
5000                 bool                    eop = FALSE;
5001  
5002                 cur = &rxr->rx_base[i];
5003                 staterr = le32toh(cur->wb.upper.status_error);
5004                 if ((staterr & E1000_RXD_STAT_DD) == 0)
5005                         break;
5006                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
5007                         break;
5008                 count--;
5009                 sendmp = mh = mp = NULL;
5010                 cur->wb.upper.status_error = 0;
5011                 rxbuf = &rxr->rx_buffers[i];
5012                 plen = le16toh(cur->wb.upper.length);
5013                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
5014                 if (((adapter->hw.mac.type == e1000_i350) ||
5015                     (adapter->hw.mac.type == e1000_i354)) &&
5016                     (staterr & E1000_RXDEXT_STATERR_LB))
5017                         vtag = be16toh(cur->wb.upper.vlan);
5018                 else
5019                         vtag = le16toh(cur->wb.upper.vlan);
5020                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
5021                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
5022                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
5023
5024                 /*
5025                  * Free the frame (all segments) if we're at EOP and
5026                  * it's an error.
5027                  *
5028                  * The datasheet states that EOP + status is only valid for
5029                  * the final segment in a multi-segment frame.
5030                  */
5031                 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
5032                         adapter->dropped_pkts++;
5033                         ++rxr->rx_discarded;
5034                         igb_rx_discard(rxr, i);
5035                         goto next_desc;
5036                 }
5037
5038                 /*
5039                 ** The way the hardware is configured to
5040                 ** split, it will ONLY use the header buffer
5041                 ** when header split is enabled, otherwise we
5042                 ** get normal behavior, ie, both header and
5043                 ** payload are DMA'd into the payload buffer.
5044                 **
5045                 ** The fmp test is to catch the case where a
5046                 ** packet spans multiple descriptors, in that
5047                 ** case only the first header is valid.
5048                 */
5049                 if (rxr->hdr_split && rxr->fmp == NULL) {
5050                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5051                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5052                             E1000_RXDADV_HDRBUFLEN_SHIFT;
5053                         if (hlen > IGB_HDR_BUF)
5054                                 hlen = IGB_HDR_BUF;
5055                         mh = rxr->rx_buffers[i].m_head;
5056                         mh->m_len = hlen;
5057                         /* clear buf pointer for refresh */
5058                         rxbuf->m_head = NULL;
5059                         /*
5060                         ** Get the payload length, this
5061                         ** could be zero if its a small
5062                         ** packet.
5063                         */
5064                         if (plen > 0) {
5065                                 mp = rxr->rx_buffers[i].m_pack;
5066                                 mp->m_len = plen;
5067                                 mh->m_next = mp;
5068                                 /* clear buf pointer */
5069                                 rxbuf->m_pack = NULL;
5070                                 rxr->rx_split_packets++;
5071                         }
5072                 } else {
5073                         /*
5074                         ** Either no header split, or a
5075                         ** secondary piece of a fragmented
5076                         ** split packet.
5077                         */
5078                         mh = rxr->rx_buffers[i].m_pack;
5079                         mh->m_len = plen;
5080                         /* clear buf info for refresh */
5081                         rxbuf->m_pack = NULL;
5082                 }
5083                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5084
5085                 ++processed; /* So we know when to refresh */
5086
5087                 /* Initial frame - setup */
5088                 if (rxr->fmp == NULL) {
5089                         mh->m_pkthdr.len = mh->m_len;
5090                         /* Save the head of the chain */
5091                         rxr->fmp = mh;
5092                         rxr->lmp = mh;
5093                         if (mp != NULL) {
5094                                 /* Add payload if split */
5095                                 mh->m_pkthdr.len += mp->m_len;
5096                                 rxr->lmp = mh->m_next;
5097                         }
5098                 } else {
5099                         /* Chain mbuf's together */
5100                         rxr->lmp->m_next = mh;
5101                         rxr->lmp = rxr->lmp->m_next;
5102                         rxr->fmp->m_pkthdr.len += mh->m_len;
5103                 }
5104
5105                 if (eop) {
5106                         rxr->fmp->m_pkthdr.rcvif = ifp;
5107                         rxr->rx_packets++;
5108                         /* capture data for AIM */
5109                         rxr->packets++;
5110                         rxr->bytes += rxr->fmp->m_pkthdr.len;
5111                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5112
5113                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5114                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
5115
5116                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5117                             (staterr & E1000_RXD_STAT_VP) != 0) {
5118                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5119                                 rxr->fmp->m_flags |= M_VLANTAG;
5120                         }
5121
5122                         /*
5123                          * In case of multiqueue, we have RXCSUM.PCSD bit set
5124                          * and never cleared. This means we have RSS hash
5125                          * available to be used.
5126                          */
5127                         if (adapter->num_queues > 1) {
5128                                 rxr->fmp->m_pkthdr.flowid = 
5129                                     le32toh(cur->wb.lower.hi_dword.rss);
5130                                 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5131                                         case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5132                                                 M_HASHTYPE_SET(rxr->fmp,
5133                                                     M_HASHTYPE_RSS_TCP_IPV4);
5134                                         break;
5135                                         case E1000_RXDADV_RSSTYPE_IPV4:
5136                                                 M_HASHTYPE_SET(rxr->fmp,
5137                                                     M_HASHTYPE_RSS_IPV4);
5138                                         break;
5139                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5140                                                 M_HASHTYPE_SET(rxr->fmp,
5141                                                     M_HASHTYPE_RSS_TCP_IPV6);
5142                                         break;
5143                                         case E1000_RXDADV_RSSTYPE_IPV6_EX:
5144                                                 M_HASHTYPE_SET(rxr->fmp,
5145                                                     M_HASHTYPE_RSS_IPV6_EX);
5146                                         break;
5147                                         case E1000_RXDADV_RSSTYPE_IPV6:
5148                                                 M_HASHTYPE_SET(rxr->fmp,
5149                                                     M_HASHTYPE_RSS_IPV6);
5150                                         break;
5151                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5152                                                 M_HASHTYPE_SET(rxr->fmp,
5153                                                     M_HASHTYPE_RSS_TCP_IPV6_EX);
5154                                         break;
5155                                         default:
5156                                                 /* XXX fallthrough */
5157                                                 M_HASHTYPE_SET(rxr->fmp,
5158                                                     M_HASHTYPE_OPAQUE_HASH);
5159                                 }
5160                         } else {
5161 #ifndef IGB_LEGACY_TX
5162                                 rxr->fmp->m_pkthdr.flowid = que->msix;
5163                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5164 #endif
5165                         }
5166                         sendmp = rxr->fmp;
5167                         /* Make sure to set M_PKTHDR. */
5168                         sendmp->m_flags |= M_PKTHDR;
5169                         rxr->fmp = NULL;
5170                         rxr->lmp = NULL;
5171                 }
5172
5173 next_desc:
5174                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5175                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5176
5177                 /* Advance our pointers to the next descriptor. */
5178                 if (++i == adapter->num_rx_desc)
5179                         i = 0;
5180                 /*
5181                 ** Send to the stack or LRO
5182                 */
5183                 if (sendmp != NULL) {
5184                         rxr->next_to_check = i;
5185                         igb_rx_input(rxr, ifp, sendmp, ptype);
5186                         i = rxr->next_to_check;
5187                         rxdone++;
5188                 }
5189
5190                 /* Every 8 descriptors we go to refresh mbufs */
5191                 if (processed == 8) {
5192                         igb_refresh_mbufs(rxr, i);
5193                         processed = 0;
5194                 }
5195         }
5196
5197         /* Catch any remainders */
5198         if (igb_rx_unrefreshed(rxr))
5199                 igb_refresh_mbufs(rxr, i);
5200
5201         rxr->next_to_check = i;
5202
5203         /*
5204          * Flush any outstanding LRO work
5205          */
5206         tcp_lro_flush_all(lro);
5207
5208         if (done != NULL)
5209                 *done += rxdone;
5210
5211         IGB_RX_UNLOCK(rxr);
5212         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5213 }
5214
5215 /*********************************************************************
5216  *
5217  *  Verify that the hardware indicated that the checksum is valid.
5218  *  Inform the stack about the status of checksum so that stack
5219  *  doesn't spend time verifying the checksum.
5220  *
5221  *********************************************************************/
5222 static void
5223 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5224 {
5225         u16 status = (u16)staterr;
5226         u8  errors = (u8) (staterr >> 24);
5227         int sctp;
5228
5229         /* Ignore Checksum bit is set */
5230         if (status & E1000_RXD_STAT_IXSM) {
5231                 mp->m_pkthdr.csum_flags = 0;
5232                 return;
5233         }
5234
5235         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5236             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5237                 sctp = 1;
5238         else
5239                 sctp = 0;
5240         if (status & E1000_RXD_STAT_IPCS) {
5241                 /* Did it pass? */
5242                 if (!(errors & E1000_RXD_ERR_IPE)) {
5243                         /* IP Checksum Good */
5244                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5245                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5246                 } else
5247                         mp->m_pkthdr.csum_flags = 0;
5248         }
5249
5250         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5251                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5252 #if __FreeBSD_version >= 800000
5253                 if (sctp) /* reassign */
5254                         type = CSUM_SCTP_VALID;
5255 #endif
5256                 /* Did it pass? */
5257                 if (!(errors & E1000_RXD_ERR_TCPE)) {
5258                         mp->m_pkthdr.csum_flags |= type;
5259                         if (sctp == 0)
5260                                 mp->m_pkthdr.csum_data = htons(0xffff);
5261                 }
5262         }
5263         return;
5264 }
5265
5266 /*
5267  * This routine is run via an vlan
5268  * config EVENT
5269  */
5270 static void
5271 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5272 {
5273         struct adapter  *adapter = ifp->if_softc;
5274         u32             index, bit;
5275
5276         if (ifp->if_softc !=  arg)   /* Not our event */
5277                 return;
5278
5279         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5280                 return;
5281
5282         IGB_CORE_LOCK(adapter);
5283         index = (vtag >> 5) & 0x7F;
5284         bit = vtag & 0x1F;
5285         adapter->shadow_vfta[index] |= (1 << bit);
5286         ++adapter->num_vlans;
5287         /* Change hw filter setting */
5288         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5289                 igb_setup_vlan_hw_support(adapter);
5290         IGB_CORE_UNLOCK(adapter);
5291 }
5292
5293 /*
5294  * This routine is run via an vlan
5295  * unconfig EVENT
5296  */
5297 static void
5298 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5299 {
5300         struct adapter  *adapter = ifp->if_softc;
5301         u32             index, bit;
5302
5303         if (ifp->if_softc !=  arg)
5304                 return;
5305
5306         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5307                 return;
5308
5309         IGB_CORE_LOCK(adapter);
5310         index = (vtag >> 5) & 0x7F;
5311         bit = vtag & 0x1F;
5312         adapter->shadow_vfta[index] &= ~(1 << bit);
5313         --adapter->num_vlans;
5314         /* Change hw filter setting */
5315         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5316                 igb_setup_vlan_hw_support(adapter);
5317         IGB_CORE_UNLOCK(adapter);
5318 }
5319
5320 static void
5321 igb_setup_vlan_hw_support(struct adapter *adapter)
5322 {
5323         struct e1000_hw *hw = &adapter->hw;
5324         struct ifnet    *ifp = adapter->ifp;
5325         u32             reg;
5326
5327         if (adapter->vf_ifp) {
5328                 e1000_rlpml_set_vf(hw,
5329                     adapter->max_frame_size + VLAN_TAG_SIZE);
5330                 return;
5331         }
5332
5333         reg = E1000_READ_REG(hw, E1000_CTRL);
5334         reg |= E1000_CTRL_VME;
5335         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5336
5337         /* Enable the Filter Table */
5338         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5339                 reg = E1000_READ_REG(hw, E1000_RCTL);
5340                 reg &= ~E1000_RCTL_CFIEN;
5341                 reg |= E1000_RCTL_VFE;
5342                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5343         }
5344
5345         /* Update the frame size */
5346         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5347             adapter->max_frame_size + VLAN_TAG_SIZE);
5348
5349         /* Don't bother with table if no vlans */
5350         if ((adapter->num_vlans == 0) ||
5351             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5352                 return;
5353         /*
5354         ** A soft reset zero's out the VFTA, so
5355         ** we need to repopulate it now.
5356         */
5357         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5358                 if (adapter->shadow_vfta[i] != 0) {
5359                         if (adapter->vf_ifp)
5360                                 e1000_vfta_set_vf(hw,
5361                                     adapter->shadow_vfta[i], TRUE);
5362                         else
5363                                 e1000_write_vfta(hw,
5364                                     i, adapter->shadow_vfta[i]);
5365                 }
5366 }
5367
5368 static void
5369 igb_enable_intr(struct adapter *adapter)
5370 {
5371         /* With RSS set up what to auto clear */
5372         if (adapter->msix_mem) {
5373                 u32 mask = (adapter->que_mask | adapter->link_mask);
5374                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5375                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5376                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5377                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5378                     E1000_IMS_LSC);
5379         } else {
5380                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5381                     IMS_ENABLE_MASK);
5382         }
5383         E1000_WRITE_FLUSH(&adapter->hw);
5384
5385         return;
5386 }
5387
5388 static void
5389 igb_disable_intr(struct adapter *adapter)
5390 {
5391         if (adapter->msix_mem) {
5392                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5393                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5394         } 
5395         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5396         E1000_WRITE_FLUSH(&adapter->hw);
5397         return;
5398 }
5399
5400 /*
5401  * Bit of a misnomer, what this really means is
5402  * to enable OS management of the system... aka
5403  * to disable special hardware management features 
5404  */
5405 static void
5406 igb_init_manageability(struct adapter *adapter)
5407 {
5408         if (adapter->has_manage) {
5409                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5410                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5411
5412                 /* disable hardware interception of ARP */
5413                 manc &= ~(E1000_MANC_ARP_EN);
5414
5415                 /* enable receiving management packets to the host */
5416                 manc |= E1000_MANC_EN_MNG2HOST;
5417                 manc2h |= 1 << 5;  /* Mng Port 623 */
5418                 manc2h |= 1 << 6;  /* Mng Port 664 */
5419                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5420                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5421         }
5422 }
5423
5424 /*
5425  * Give control back to hardware management
5426  * controller if there is one.
5427  */
5428 static void
5429 igb_release_manageability(struct adapter *adapter)
5430 {
5431         if (adapter->has_manage) {
5432                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5433
5434                 /* re-enable hardware interception of ARP */
5435                 manc |= E1000_MANC_ARP_EN;
5436                 manc &= ~E1000_MANC_EN_MNG2HOST;
5437
5438                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5439         }
5440 }
5441
5442 /*
5443  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5444  * For ASF and Pass Through versions of f/w this means that
5445  * the driver is loaded. 
5446  *
5447  */
5448 static void
5449 igb_get_hw_control(struct adapter *adapter)
5450 {
5451         u32 ctrl_ext;
5452
5453         if (adapter->vf_ifp)
5454                 return;
5455
5456         /* Let firmware know the driver has taken over */
5457         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5458         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5459             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5460 }
5461
5462 /*
5463  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5464  * For ASF and Pass Through versions of f/w this means that the
5465  * driver is no longer loaded.
5466  *
5467  */
5468 static void
5469 igb_release_hw_control(struct adapter *adapter)
5470 {
5471         u32 ctrl_ext;
5472
5473         if (adapter->vf_ifp)
5474                 return;
5475
5476         /* Let firmware taken over control of h/w */
5477         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5478         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5479             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5480 }
5481
5482 static int
5483 igb_is_valid_ether_addr(uint8_t *addr)
5484 {
5485         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5486
5487         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5488                 return (FALSE);
5489         }
5490
5491         return (TRUE);
5492 }
5493
5494
5495 /*
5496  * Enable PCI Wake On Lan capability
5497  */
5498 static void
5499 igb_enable_wakeup(device_t dev)
5500 {
5501         u16     cap, status;
5502         u8      id;
5503
5504         /* First find the capabilities pointer*/
5505         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5506         /* Read the PM Capabilities */
5507         id = pci_read_config(dev, cap, 1);
5508         if (id != PCIY_PMG)     /* Something wrong */
5509                 return;
5510         /* OK, we have the power capabilities, so
5511            now get the status register */
5512         cap += PCIR_POWER_STATUS;
5513         status = pci_read_config(dev, cap, 2);
5514         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5515         pci_write_config(dev, cap, status, 2);
5516         return;
5517 }
5518
5519 static void
5520 igb_led_func(void *arg, int onoff)
5521 {
5522         struct adapter  *adapter = arg;
5523
5524         IGB_CORE_LOCK(adapter);
5525         if (onoff) {
5526                 e1000_setup_led(&adapter->hw);
5527                 e1000_led_on(&adapter->hw);
5528         } else {
5529                 e1000_led_off(&adapter->hw);
5530                 e1000_cleanup_led(&adapter->hw);
5531         }
5532         IGB_CORE_UNLOCK(adapter);
5533 }
5534
5535 static uint64_t
5536 igb_get_vf_counter(if_t ifp, ift_counter cnt)
5537 {
5538         struct adapter *adapter;
5539         struct e1000_vf_stats *stats;
5540 #ifndef IGB_LEGACY_TX
5541         struct tx_ring *txr;
5542         uint64_t rv;
5543 #endif
5544
5545         adapter = if_getsoftc(ifp);
5546         stats = (struct e1000_vf_stats *)adapter->stats;
5547
5548         switch (cnt) {
5549         case IFCOUNTER_IPACKETS:
5550                 return (stats->gprc);
5551         case IFCOUNTER_OPACKETS:
5552                 return (stats->gptc);
5553         case IFCOUNTER_IBYTES:
5554                 return (stats->gorc);
5555         case IFCOUNTER_OBYTES:
5556                 return (stats->gotc);
5557         case IFCOUNTER_IMCASTS:
5558                 return (stats->mprc);
5559         case IFCOUNTER_IERRORS:
5560                 return (adapter->dropped_pkts);
5561         case IFCOUNTER_OERRORS:
5562                 return (adapter->watchdog_events);
5563 #ifndef IGB_LEGACY_TX
5564         case IFCOUNTER_OQDROPS:
5565                 rv = 0;
5566                 txr = adapter->tx_rings;
5567                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5568                         rv += txr->br->br_drops;
5569                 return (rv);
5570 #endif
5571         default:
5572                 return (if_get_counter_default(ifp, cnt));
5573         }
5574 }
5575
5576 static uint64_t
5577 igb_get_counter(if_t ifp, ift_counter cnt)
5578 {
5579         struct adapter *adapter;
5580         struct e1000_hw_stats *stats;
5581 #ifndef IGB_LEGACY_TX
5582         struct tx_ring *txr;
5583         uint64_t rv;
5584 #endif
5585
5586         adapter = if_getsoftc(ifp);
5587         if (adapter->vf_ifp)
5588                 return (igb_get_vf_counter(ifp, cnt));
5589
5590         stats = (struct e1000_hw_stats *)adapter->stats;
5591
5592         switch (cnt) {
5593         case IFCOUNTER_IPACKETS:
5594                 return (stats->gprc);
5595         case IFCOUNTER_OPACKETS:
5596                 return (stats->gptc);
5597         case IFCOUNTER_IBYTES:
5598                 return (stats->gorc);
5599         case IFCOUNTER_OBYTES:
5600                 return (stats->gotc);
5601         case IFCOUNTER_IMCASTS:
5602                 return (stats->mprc);
5603         case IFCOUNTER_OMCASTS:
5604                 return (stats->mptc);
5605         case IFCOUNTER_IERRORS:
5606                 return (adapter->dropped_pkts + stats->rxerrc +
5607                     stats->crcerrs + stats->algnerrc +
5608                     stats->ruc + stats->roc + stats->cexterr);
5609         case IFCOUNTER_OERRORS:
5610                 return (stats->ecol + stats->latecol +
5611                     adapter->watchdog_events);
5612         case IFCOUNTER_COLLISIONS:
5613                 return (stats->colc);
5614         case IFCOUNTER_IQDROPS:
5615                 return (stats->mpc);
5616 #ifndef IGB_LEGACY_TX
5617         case IFCOUNTER_OQDROPS:
5618                 rv = 0;
5619                 txr = adapter->tx_rings;
5620                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5621                         rv += txr->br->br_drops;
5622                 return (rv);
5623 #endif
5624         default:
5625                 return (if_get_counter_default(ifp, cnt));
5626         }
5627 }
5628
5629 /**********************************************************************
5630  *
5631  *  Update the board statistics counters.
5632  *
5633  **********************************************************************/
5634 static void
5635 igb_update_stats_counters(struct adapter *adapter)
5636 {
5637         struct e1000_hw         *hw = &adapter->hw;
5638         struct e1000_hw_stats   *stats;
5639
5640         /* 
5641         ** The virtual function adapter has only a
5642         ** small controlled set of stats, do only 
5643         ** those and return.
5644         */
5645         if (adapter->vf_ifp) {
5646                 igb_update_vf_stats_counters(adapter);
5647                 return;
5648         }
5649
5650         stats = (struct e1000_hw_stats  *)adapter->stats;
5651
5652         if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5653            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5654                 stats->symerrs +=
5655                     E1000_READ_REG(hw,E1000_SYMERRS);
5656                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5657         }
5658
5659         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5660         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5661         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5662         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5663
5664         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5665         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5666         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5667         stats->dc += E1000_READ_REG(hw, E1000_DC);
5668         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5669         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5670         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5671         /*
5672         ** For watchdog management we need to know if we have been
5673         ** paused during the last interval, so capture that here.
5674         */ 
5675         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5676         stats->xoffrxc += adapter->pause_frames;
5677         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5678         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5679         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5680         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5681         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5682         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5683         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5684         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5685         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5686         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5687         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5688         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5689
5690         /* For the 64-bit byte counters the low dword must be read first. */
5691         /* Both registers clear on the read of the high dword */
5692
5693         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5694             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5695         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5696             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5697
5698         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5699         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5700         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5701         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5702         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5703
5704         stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5705         stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5706         stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5707
5708         stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5709             ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5710         stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5711             ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5712
5713         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5714         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5715         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5716         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5717         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5718         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5719         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5720         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5721         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5722         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5723
5724         /* Interrupt Counts */
5725
5726         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5727         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5728         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5729         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5730         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5731         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5732         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5733         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5734         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5735
5736         /* Host to Card Statistics */
5737
5738         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5739         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5740         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5741         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5742         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5743         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5744         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5745         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5746             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5747         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5748             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5749         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5750         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5751         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5752
5753         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5754         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5755         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5756         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5757         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5758         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5759
5760         /* Driver specific counters */
5761         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5762         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5763         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5764         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5765         adapter->packet_buf_alloc_tx =
5766             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5767         adapter->packet_buf_alloc_rx =
5768             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5769 }
5770
5771
5772 /**********************************************************************
5773  *
5774  *  Initialize the VF board statistics counters.
5775  *
5776  **********************************************************************/
5777 static void
5778 igb_vf_init_stats(struct adapter *adapter)
5779 {
5780         struct e1000_hw *hw = &adapter->hw;
5781         struct e1000_vf_stats   *stats;
5782
5783         stats = (struct e1000_vf_stats  *)adapter->stats;
5784         if (stats == NULL)
5785                 return;
5786         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5787         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5788         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5789         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5790         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5791 }
5792  
5793 /**********************************************************************
5794  *
5795  *  Update the VF board statistics counters.
5796  *
5797  **********************************************************************/
5798 static void
5799 igb_update_vf_stats_counters(struct adapter *adapter)
5800 {
5801         struct e1000_hw *hw = &adapter->hw;
5802         struct e1000_vf_stats   *stats;
5803
5804         if (adapter->link_speed == 0)
5805                 return;
5806
5807         stats = (struct e1000_vf_stats  *)adapter->stats;
5808
5809         UPDATE_VF_REG(E1000_VFGPRC,
5810             stats->last_gprc, stats->gprc);
5811         UPDATE_VF_REG(E1000_VFGORC,
5812             stats->last_gorc, stats->gorc);
5813         UPDATE_VF_REG(E1000_VFGPTC,
5814             stats->last_gptc, stats->gptc);
5815         UPDATE_VF_REG(E1000_VFGOTC,
5816             stats->last_gotc, stats->gotc);
5817         UPDATE_VF_REG(E1000_VFMPRC,
5818             stats->last_mprc, stats->mprc);
5819 }
5820
5821 /* Export a single 32-bit register via a read-only sysctl. */
5822 static int
5823 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5824 {
5825         struct adapter *adapter;
5826         u_int val;
5827
5828         adapter = oidp->oid_arg1;
5829         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5830         return (sysctl_handle_int(oidp, &val, 0, req));
5831 }
5832
5833 /*
5834 **  Tuneable interrupt rate handler
5835 */
5836 static int
5837 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5838 {
5839         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5840         int                     error;
5841         u32                     reg, usec, rate;
5842                         
5843         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5844         usec = ((reg & 0x7FFC) >> 2);
5845         if (usec > 0)
5846                 rate = 1000000 / usec;
5847         else
5848                 rate = 0;
5849         error = sysctl_handle_int(oidp, &rate, 0, req);
5850         if (error || !req->newptr)
5851                 return error;
5852         return 0;
5853 }
5854
5855 /*
5856  * Add sysctl variables, one per statistic, to the system.
5857  */
5858 static void
5859 igb_add_hw_stats(struct adapter *adapter)
5860 {
5861         device_t dev = adapter->dev;
5862
5863         struct tx_ring *txr = adapter->tx_rings;
5864         struct rx_ring *rxr = adapter->rx_rings;
5865
5866         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5867         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5868         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5869         struct e1000_hw_stats *stats = adapter->stats;
5870
5871         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5872         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5873
5874 #define QUEUE_NAME_LEN 32
5875         char namebuf[QUEUE_NAME_LEN];
5876
5877         /* Driver Statistics */
5878         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5879                         CTLFLAG_RD, &adapter->dropped_pkts,
5880                         "Driver dropped packets");
5881         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 
5882                         CTLFLAG_RD, &adapter->link_irq,
5883                         "Link MSIX IRQ Handled");
5884         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5885                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5886                         "Defragmenting mbuf chain failed");
5887         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5888                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5889                         "Driver tx dma failure in xmit");
5890         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5891                         CTLFLAG_RD, &adapter->rx_overruns,
5892                         "RX overruns");
5893         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5894                         CTLFLAG_RD, &adapter->watchdog_events,
5895                         "Watchdog timeouts");
5896
5897         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5898                         CTLFLAG_RD, &adapter->device_control,
5899                         "Device Control Register");
5900         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5901                         CTLFLAG_RD, &adapter->rx_control,
5902                         "Receiver Control Register");
5903         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5904                         CTLFLAG_RD, &adapter->int_mask,
5905                         "Interrupt Mask");
5906         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5907                         CTLFLAG_RD, &adapter->eint_mask,
5908                         "Extended Interrupt Mask");
5909         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5910                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5911                         "Transmit Buffer Packet Allocation");
5912         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5913                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5914                         "Receive Buffer Packet Allocation");
5915         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5916                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5917                         "Flow Control High Watermark");
5918         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5919                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5920                         "Flow Control Low Watermark");
5921
5922         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5923                 struct lro_ctrl *lro = &rxr->lro;
5924
5925                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5926                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5927                                             CTLFLAG_RD, NULL, "Queue Name");
5928                 queue_list = SYSCTL_CHILDREN(queue_node);
5929
5930                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5931                                 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5932                                 sizeof(&adapter->queues[i]),
5933                                 igb_sysctl_interrupt_rate_handler,
5934                                 "IU", "Interrupt Rate");
5935
5936                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5937                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5938                                 igb_sysctl_reg_handler, "IU",
5939                                 "Transmit Descriptor Head");
5940                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5941                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5942                                 igb_sysctl_reg_handler, "IU",
5943                                 "Transmit Descriptor Tail");
5944                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5945                                 CTLFLAG_RD, &txr->no_desc_avail,
5946                                 "Queue Descriptors Unavailable");
5947                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5948                                 CTLFLAG_RD, &txr->total_packets,
5949                                 "Queue Packets Transmitted");
5950
5951                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5952                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5953                                 igb_sysctl_reg_handler, "IU",
5954                                 "Receive Descriptor Head");
5955                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5956                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5957                                 igb_sysctl_reg_handler, "IU",
5958                                 "Receive Descriptor Tail");
5959                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5960                                 CTLFLAG_RD, &rxr->rx_packets,
5961                                 "Queue Packets Received");
5962                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5963                                 CTLFLAG_RD, &rxr->rx_bytes,
5964                                 "Queue Bytes Received");
5965                 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
5966                                 CTLFLAG_RD, &lro->lro_queued, 0,
5967                                 "LRO Queued");
5968                 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
5969                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5970                                 "LRO Flushed");
5971         }
5972
5973         /* MAC stats get their own sub node */
5974
5975         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5976                                     CTLFLAG_RD, NULL, "MAC Statistics");
5977         stat_list = SYSCTL_CHILDREN(stat_node);
5978
5979         /*
5980         ** VF adapter has a very limited set of stats
5981         ** since its not managing the metal, so to speak.
5982         */
5983         if (adapter->vf_ifp) {
5984         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5985                         CTLFLAG_RD, &stats->gprc,
5986                         "Good Packets Received");
5987         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5988                         CTLFLAG_RD, &stats->gptc,
5989                         "Good Packets Transmitted");
5990         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5991                         CTLFLAG_RD, &stats->gorc, 
5992                         "Good Octets Received"); 
5993         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5994                         CTLFLAG_RD, &stats->gotc, 
5995                         "Good Octets Transmitted"); 
5996         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5997                         CTLFLAG_RD, &stats->mprc,
5998                         "Multicast Packets Received");
5999                 return;
6000         }
6001
6002         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
6003                         CTLFLAG_RD, &stats->ecol,
6004                         "Excessive collisions");
6005         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
6006                         CTLFLAG_RD, &stats->scc,
6007                         "Single collisions");
6008         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
6009                         CTLFLAG_RD, &stats->mcc,
6010                         "Multiple collisions");
6011         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
6012                         CTLFLAG_RD, &stats->latecol,
6013                         "Late collisions");
6014         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
6015                         CTLFLAG_RD, &stats->colc,
6016                         "Collision Count");
6017         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
6018                         CTLFLAG_RD, &stats->symerrs,
6019                         "Symbol Errors");
6020         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
6021                         CTLFLAG_RD, &stats->sec,
6022                         "Sequence Errors");
6023         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
6024                         CTLFLAG_RD, &stats->dc,
6025                         "Defer Count");
6026         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
6027                         CTLFLAG_RD, &stats->mpc,
6028                         "Missed Packets");
6029         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
6030                         CTLFLAG_RD, &stats->rlec,
6031                         "Receive Length Errors");
6032         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
6033                         CTLFLAG_RD, &stats->rnbc,
6034                         "Receive No Buffers");
6035         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
6036                         CTLFLAG_RD, &stats->ruc,
6037                         "Receive Undersize");
6038         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
6039                         CTLFLAG_RD, &stats->rfc,
6040                         "Fragmented Packets Received");
6041         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
6042                         CTLFLAG_RD, &stats->roc,
6043                         "Oversized Packets Received");
6044         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
6045                         CTLFLAG_RD, &stats->rjc,
6046                         "Recevied Jabber");
6047         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6048                         CTLFLAG_RD, &stats->rxerrc,
6049                         "Receive Errors");
6050         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6051                         CTLFLAG_RD, &stats->crcerrs,
6052                         "CRC errors");
6053         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6054                         CTLFLAG_RD, &stats->algnerrc,
6055                         "Alignment Errors");
6056         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6057                         CTLFLAG_RD, &stats->tncrs,
6058                         "Transmit with No CRS");
6059         /* On 82575 these are collision counts */
6060         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6061                         CTLFLAG_RD, &stats->cexterr,
6062                         "Collision/Carrier extension errors");
6063         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6064                         CTLFLAG_RD, &stats->xonrxc,
6065                         "XON Received");
6066         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6067                         CTLFLAG_RD, &stats->xontxc,
6068                         "XON Transmitted");
6069         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6070                         CTLFLAG_RD, &stats->xoffrxc,
6071                         "XOFF Received");
6072         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6073                         CTLFLAG_RD, &stats->xofftxc,
6074                         "XOFF Transmitted");
6075         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6076                         CTLFLAG_RD, &stats->fcruc,
6077                         "Unsupported Flow Control Received");
6078         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6079                         CTLFLAG_RD, &stats->mgprc,
6080                         "Management Packets Received");
6081         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6082                         CTLFLAG_RD, &stats->mgpdc,
6083                         "Management Packets Dropped");
6084         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6085                         CTLFLAG_RD, &stats->mgptc,
6086                         "Management Packets Transmitted");
6087         /* Packet Reception Stats */
6088         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6089                         CTLFLAG_RD, &stats->tpr,
6090                         "Total Packets Received");
6091         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6092                         CTLFLAG_RD, &stats->gprc,
6093                         "Good Packets Received");
6094         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6095                         CTLFLAG_RD, &stats->bprc,
6096                         "Broadcast Packets Received");
6097         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6098                         CTLFLAG_RD, &stats->mprc,
6099                         "Multicast Packets Received");
6100         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6101                         CTLFLAG_RD, &stats->prc64,
6102                         "64 byte frames received");
6103         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6104                         CTLFLAG_RD, &stats->prc127,
6105                         "65-127 byte frames received");
6106         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6107                         CTLFLAG_RD, &stats->prc255,
6108                         "128-255 byte frames received");
6109         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6110                         CTLFLAG_RD, &stats->prc511,
6111                         "256-511 byte frames received");
6112         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6113                         CTLFLAG_RD, &stats->prc1023,
6114                         "512-1023 byte frames received");
6115         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6116                         CTLFLAG_RD, &stats->prc1522,
6117                         "1023-1522 byte frames received");
6118         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
6119                         CTLFLAG_RD, &stats->gorc, 
6120                         "Good Octets Received");
6121         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 
6122                         CTLFLAG_RD, &stats->tor, 
6123                         "Total Octets Received");
6124
6125         /* Packet Transmission Stats */
6126         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
6127                         CTLFLAG_RD, &stats->gotc, 
6128                         "Good Octets Transmitted"); 
6129         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 
6130                         CTLFLAG_RD, &stats->tot, 
6131                         "Total Octets Transmitted");
6132         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6133                         CTLFLAG_RD, &stats->tpt,
6134                         "Total Packets Transmitted");
6135         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6136                         CTLFLAG_RD, &stats->gptc,
6137                         "Good Packets Transmitted");
6138         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6139                         CTLFLAG_RD, &stats->bptc,
6140                         "Broadcast Packets Transmitted");
6141         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6142                         CTLFLAG_RD, &stats->mptc,
6143                         "Multicast Packets Transmitted");
6144         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6145                         CTLFLAG_RD, &stats->ptc64,
6146                         "64 byte frames transmitted");
6147         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6148                         CTLFLAG_RD, &stats->ptc127,
6149                         "65-127 byte frames transmitted");
6150         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6151                         CTLFLAG_RD, &stats->ptc255,
6152                         "128-255 byte frames transmitted");
6153         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6154                         CTLFLAG_RD, &stats->ptc511,
6155                         "256-511 byte frames transmitted");
6156         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6157                         CTLFLAG_RD, &stats->ptc1023,
6158                         "512-1023 byte frames transmitted");
6159         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6160                         CTLFLAG_RD, &stats->ptc1522,
6161                         "1024-1522 byte frames transmitted");
6162         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6163                         CTLFLAG_RD, &stats->tsctc,
6164                         "TSO Contexts Transmitted");
6165         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6166                         CTLFLAG_RD, &stats->tsctfc,
6167                         "TSO Contexts Failed");
6168
6169
6170         /* Interrupt Stats */
6171
6172         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
6173                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
6174         int_list = SYSCTL_CHILDREN(int_node);
6175
6176         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6177                         CTLFLAG_RD, &stats->iac,
6178                         "Interrupt Assertion Count");
6179
6180         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6181                         CTLFLAG_RD, &stats->icrxptc,
6182                         "Interrupt Cause Rx Pkt Timer Expire Count");
6183
6184         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6185                         CTLFLAG_RD, &stats->icrxatc,
6186                         "Interrupt Cause Rx Abs Timer Expire Count");
6187
6188         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6189                         CTLFLAG_RD, &stats->ictxptc,
6190                         "Interrupt Cause Tx Pkt Timer Expire Count");
6191
6192         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6193                         CTLFLAG_RD, &stats->ictxatc,
6194                         "Interrupt Cause Tx Abs Timer Expire Count");
6195
6196         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6197                         CTLFLAG_RD, &stats->ictxqec,
6198                         "Interrupt Cause Tx Queue Empty Count");
6199
6200         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6201                         CTLFLAG_RD, &stats->ictxqmtc,
6202                         "Interrupt Cause Tx Queue Min Thresh Count");
6203
6204         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6205                         CTLFLAG_RD, &stats->icrxdmtc,
6206                         "Interrupt Cause Rx Desc Min Thresh Count");
6207
6208         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6209                         CTLFLAG_RD, &stats->icrxoc,
6210                         "Interrupt Cause Receiver Overrun Count");
6211
6212         /* Host to Card Stats */
6213
6214         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
6215                                     CTLFLAG_RD, NULL, 
6216                                     "Host to Card Statistics");
6217
6218         host_list = SYSCTL_CHILDREN(host_node);
6219
6220         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6221                         CTLFLAG_RD, &stats->cbtmpc,
6222                         "Circuit Breaker Tx Packet Count");
6223
6224         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6225                         CTLFLAG_RD, &stats->htdpmc,
6226                         "Host Transmit Discarded Packets");
6227
6228         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6229                         CTLFLAG_RD, &stats->rpthc,
6230                         "Rx Packets To Host");
6231
6232         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6233                         CTLFLAG_RD, &stats->cbrmpc,
6234                         "Circuit Breaker Rx Packet Count");
6235
6236         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6237                         CTLFLAG_RD, &stats->cbrdpc,
6238                         "Circuit Breaker Rx Dropped Count");
6239
6240         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6241                         CTLFLAG_RD, &stats->hgptc,
6242                         "Host Good Packets Tx Count");
6243
6244         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6245                         CTLFLAG_RD, &stats->htcbdpc,
6246                         "Host Tx Circuit Breaker Dropped Count");
6247
6248         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6249                         CTLFLAG_RD, &stats->hgorc,
6250                         "Host Good Octets Received Count");
6251
6252         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6253                         CTLFLAG_RD, &stats->hgotc,
6254                         "Host Good Octets Transmit Count");
6255
6256         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6257                         CTLFLAG_RD, &stats->lenerrs,
6258                         "Length Errors");
6259
6260         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6261                         CTLFLAG_RD, &stats->scvpc,
6262                         "SerDes/SGMII Code Violation Pkt Count");
6263
6264         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6265                         CTLFLAG_RD, &stats->hrmpc,
6266                         "Header Redirection Missed Packet Count");
6267 }
6268
6269
6270 /**********************************************************************
6271  *
6272  *  This routine provides a way to dump out the adapter eeprom,
6273  *  often a useful debug/service tool. This only dumps the first
6274  *  32 words, stuff that matters is in that extent.
6275  *
6276  **********************************************************************/
6277 static int
6278 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6279 {
6280         struct adapter *adapter;
6281         int error;
6282         int result;
6283
6284         result = -1;
6285         error = sysctl_handle_int(oidp, &result, 0, req);
6286
6287         if (error || !req->newptr)
6288                 return (error);
6289
6290         /*
6291          * This value will cause a hex dump of the
6292          * first 32 16-bit words of the EEPROM to
6293          * the screen.
6294          */
6295         if (result == 1) {
6296                 adapter = (struct adapter *)arg1;
6297                 igb_print_nvm_info(adapter);
6298         }
6299
6300         return (error);
6301 }
6302
6303 static void
6304 igb_print_nvm_info(struct adapter *adapter)
6305 {
6306         u16     eeprom_data;
6307         int     i, j, row = 0;
6308
6309         /* Its a bit crude, but it gets the job done */
6310         printf("\nInterface EEPROM Dump:\n");
6311         printf("Offset\n0x0000  ");
6312         for (i = 0, j = 0; i < 32; i++, j++) {
6313                 if (j == 8) { /* Make the offset block */
6314                         j = 0; ++row;
6315                         printf("\n0x00%x0  ",row);
6316                 }
6317                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6318                 printf("%04x ", eeprom_data);
6319         }
6320         printf("\n");
6321 }
6322
6323 static void
6324 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6325         const char *description, int *limit, int value)
6326 {
6327         *limit = value;
6328         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6329             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6330             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6331 }
6332
6333 /*
6334 ** Set flow control using sysctl:
6335 ** Flow control values:
6336 **      0 - off
6337 **      1 - rx pause
6338 **      2 - tx pause
6339 **      3 - full
6340 */
6341 static int
6342 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6343 {
6344         int             error;
6345         static int      input = 3; /* default is full */
6346         struct adapter  *adapter = (struct adapter *) arg1;
6347
6348         error = sysctl_handle_int(oidp, &input, 0, req);
6349
6350         if ((error) || (req->newptr == NULL))
6351                 return (error);
6352
6353         switch (input) {
6354                 case e1000_fc_rx_pause:
6355                 case e1000_fc_tx_pause:
6356                 case e1000_fc_full:
6357                 case e1000_fc_none:
6358                         adapter->hw.fc.requested_mode = input;
6359                         adapter->fc = input;
6360                         break;
6361                 default:
6362                         /* Do nothing */
6363                         return (error);
6364         }
6365
6366         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6367         e1000_force_mac_fc(&adapter->hw);
6368         /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6369         return (error);
6370 }
6371
6372 /*
6373 ** Manage DMA Coalesce:
6374 ** Control values:
6375 **      0/1 - off/on
6376 **      Legal timer values are:
6377 **      250,500,1000-10000 in thousands
6378 */
6379 static int
6380 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6381 {
6382         struct adapter *adapter = (struct adapter *) arg1;
6383         int             error;
6384
6385         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6386
6387         if ((error) || (req->newptr == NULL))
6388                 return (error);
6389
6390         switch (adapter->dmac) {
6391                 case 0:
6392                         /* Disabling */
6393                         break;
6394                 case 1: /* Just enable and use default */
6395                         adapter->dmac = 1000;
6396                         break;
6397                 case 250:
6398                 case 500:
6399                 case 1000:
6400                 case 2000:
6401                 case 3000:
6402                 case 4000:
6403                 case 5000:
6404                 case 6000:
6405                 case 7000:
6406                 case 8000:
6407                 case 9000:
6408                 case 10000:
6409                         /* Legal values - allow */
6410                         break;
6411                 default:
6412                         /* Do nothing, illegal value */
6413                         adapter->dmac = 0;
6414                         return (EINVAL);
6415         }
6416         /* Reinit the interface */
6417         igb_init(adapter);
6418         return (error);
6419 }
6420
6421 /*
6422 ** Manage Energy Efficient Ethernet:
6423 ** Control values:
6424 **     0/1 - enabled/disabled
6425 */
6426 static int
6427 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6428 {
6429         struct adapter  *adapter = (struct adapter *) arg1;
6430         int             error, value;
6431
6432         value = adapter->hw.dev_spec._82575.eee_disable;
6433         error = sysctl_handle_int(oidp, &value, 0, req);
6434         if (error || req->newptr == NULL)
6435                 return (error);
6436         IGB_CORE_LOCK(adapter);
6437         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6438         igb_init_locked(adapter);
6439         IGB_CORE_UNLOCK(adapter);
6440         return (0);
6441 }