]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_igb.c
Import libcxxrt master 516a65c109eb0a01e5e95fbef455eb3215135cef.
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "opt_rss.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #include "opt_altq.h"
43 #endif
44
45 #include "if_igb.h"
46
47 /*********************************************************************
48  *  Driver version:
49  *********************************************************************/
50 char igb_driver_version[] = "2.5.3-k";
51
52
53 /*********************************************************************
54  *  PCI Device ID Table
55  *
56  *  Used by probe to select devices to load on
57  *  Last field stores an index into e1000_strings
58  *  Last entry must be all 0s
59  *
60  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61  *********************************************************************/
62
63 static igb_vendor_info_t igb_vendor_info_array[] =
64 {
65         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
72         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
79         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
81         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
88         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,  0, 0, 0},
89         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
90         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,  0, 0, 0},
91         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
93         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,  0, 0, 0},
98         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
99         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,  0, 0, 0},
100         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
101         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,  0, 0, 0},
104         /* required last entry */
105         {0, 0, 0, 0, 0}
106 };
107
108 /*********************************************************************
109  *  Table of branding strings for all supported NICs.
110  *********************************************************************/
111
112 static char *igb_strings[] = {
113         "Intel(R) PRO/1000 Network Connection"
114 };
115
116 /*********************************************************************
117  *  Function prototypes
118  *********************************************************************/
119 static int      igb_probe(device_t);
120 static int      igb_attach(device_t);
121 static int      igb_detach(device_t);
122 static int      igb_shutdown(device_t);
123 static int      igb_suspend(device_t);
124 static int      igb_resume(device_t);
125 #ifndef IGB_LEGACY_TX
126 static int      igb_mq_start(struct ifnet *, struct mbuf *);
127 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128 static void     igb_qflush(struct ifnet *);
129 static void     igb_deferred_mq_start(void *, int);
130 #else
131 static void     igb_start(struct ifnet *);
132 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133 #endif
134 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
135 static uint64_t igb_get_counter(if_t, ift_counter);
136 static void     igb_init(void *);
137 static void     igb_init_locked(struct adapter *);
138 static void     igb_stop(void *);
139 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
140 static int      igb_media_change(struct ifnet *);
141 static void     igb_identify_hardware(struct adapter *);
142 static int      igb_allocate_pci_resources(struct adapter *);
143 static int      igb_allocate_msix(struct adapter *);
144 static int      igb_allocate_legacy(struct adapter *);
145 static int      igb_setup_msix(struct adapter *);
146 static void     igb_free_pci_resources(struct adapter *);
147 static void     igb_local_timer(void *);
148 static void     igb_reset(struct adapter *);
149 static int      igb_setup_interface(device_t, struct adapter *);
150 static int      igb_allocate_queues(struct adapter *);
151 static void     igb_configure_queues(struct adapter *);
152
153 static int      igb_allocate_transmit_buffers(struct tx_ring *);
154 static void     igb_setup_transmit_structures(struct adapter *);
155 static void     igb_setup_transmit_ring(struct tx_ring *);
156 static void     igb_initialize_transmit_units(struct adapter *);
157 static void     igb_free_transmit_structures(struct adapter *);
158 static void     igb_free_transmit_buffers(struct tx_ring *);
159
160 static int      igb_allocate_receive_buffers(struct rx_ring *);
161 static int      igb_setup_receive_structures(struct adapter *);
162 static int      igb_setup_receive_ring(struct rx_ring *);
163 static void     igb_initialize_receive_units(struct adapter *);
164 static void     igb_free_receive_structures(struct adapter *);
165 static void     igb_free_receive_buffers(struct rx_ring *);
166 static void     igb_free_receive_ring(struct rx_ring *);
167
168 static void     igb_enable_intr(struct adapter *);
169 static void     igb_disable_intr(struct adapter *);
170 static void     igb_update_stats_counters(struct adapter *);
171 static bool     igb_txeof(struct tx_ring *);
172
173 static __inline void igb_rx_discard(struct rx_ring *, int);
174 static __inline void igb_rx_input(struct rx_ring *,
175                     struct ifnet *, struct mbuf *, u32);
176
177 static bool     igb_rxeof(struct igb_queue *, int, int *);
178 static void     igb_rx_checksum(u32, struct mbuf *, u32);
179 static int      igb_tx_ctx_setup(struct tx_ring *,
180                     struct mbuf *, u32 *, u32 *);
181 static int      igb_tso_setup(struct tx_ring *,
182                     struct mbuf *, u32 *, u32 *);
183 static void     igb_set_promisc(struct adapter *);
184 static void     igb_disable_promisc(struct adapter *);
185 static void     igb_set_multi(struct adapter *);
186 static void     igb_update_link_status(struct adapter *);
187 static void     igb_refresh_mbufs(struct rx_ring *, int);
188
189 static void     igb_register_vlan(void *, struct ifnet *, u16);
190 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
191 static void     igb_setup_vlan_hw_support(struct adapter *);
192
193 static int      igb_xmit(struct tx_ring *, struct mbuf **);
194 static int      igb_dma_malloc(struct adapter *, bus_size_t,
195                     struct igb_dma_alloc *, int);
196 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198 static void     igb_print_nvm_info(struct adapter *);
199 static int      igb_is_valid_ether_addr(u8 *);
200 static void     igb_add_hw_stats(struct adapter *);
201
202 static void     igb_vf_init_stats(struct adapter *);
203 static void     igb_update_vf_stats_counters(struct adapter *);
204
205 /* Management and WOL Support */
206 static void     igb_init_manageability(struct adapter *);
207 static void     igb_release_manageability(struct adapter *);
208 static void     igb_get_hw_control(struct adapter *);
209 static void     igb_release_hw_control(struct adapter *);
210 static void     igb_enable_wakeup(device_t);
211 static void     igb_led_func(void *, int);
212
213 static int      igb_irq_fast(void *);
214 static void     igb_msix_que(void *);
215 static void     igb_msix_link(void *);
216 static void     igb_handle_que(void *context, int pending);
217 static void     igb_handle_link(void *context, int pending);
218 static void     igb_handle_link_locked(struct adapter *);
219
220 static void     igb_set_sysctl_value(struct adapter *, const char *,
221                     const char *, int *, int);
222 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226 #ifdef DEVICE_POLLING
227 static poll_handler_t igb_poll;
228 #endif /* POLLING */
229
230 /*********************************************************************
231  *  FreeBSD Device Interface Entry Points
232  *********************************************************************/
233
234 static device_method_t igb_methods[] = {
235         /* Device interface */
236         DEVMETHOD(device_probe, igb_probe),
237         DEVMETHOD(device_attach, igb_attach),
238         DEVMETHOD(device_detach, igb_detach),
239         DEVMETHOD(device_shutdown, igb_shutdown),
240         DEVMETHOD(device_suspend, igb_suspend),
241         DEVMETHOD(device_resume, igb_resume),
242         DEVMETHOD_END
243 };
244
245 static driver_t igb_driver = {
246         "igb", igb_methods, sizeof(struct adapter),
247 };
248
249 static devclass_t igb_devclass;
250 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251 MODULE_DEPEND(igb, pci, 1, 1, 1);
252 MODULE_DEPEND(igb, ether, 1, 1, 1);
253 #ifdef DEV_NETMAP
254 MODULE_DEPEND(igb, netmap, 1, 1, 1);
255 #endif /* DEV_NETMAP */
256
257 /*********************************************************************
258  *  Tunable default values.
259  *********************************************************************/
260
261 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263 /* Descriptor defaults */
264 static int igb_rxd = IGB_DEFAULT_RXD;
265 static int igb_txd = IGB_DEFAULT_TXD;
266 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267     "Number of receive descriptors per queue");
268 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269     "Number of transmit descriptors per queue");
270
271 /*
272 ** AIM: Adaptive Interrupt Moderation
273 ** which means that the interrupt rate
274 ** is varied over time based on the
275 ** traffic for that interrupt vector
276 */
277 static int igb_enable_aim = TRUE;
278 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279     "Enable adaptive interrupt moderation");
280
281 /*
282  * MSIX should be the default for best performance,
283  * but this allows it to be forced off for testing.
284  */         
285 static int igb_enable_msix = 1;
286 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287     "Enable MSI-X interrupts");
288
289 /*
290 ** Tuneable Interrupt rate
291 */
292 static int igb_max_interrupt_rate = 8000;
293 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296 #ifndef IGB_LEGACY_TX
297 /*
298 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
299 */
300 static int igb_buf_ring_size = IGB_BR_SIZE;
301 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302     &igb_buf_ring_size, 0, "Size of the bufring");
303 #endif
304
305 /*
306 ** Header split causes the packet header to
307 ** be dma'd to a seperate mbuf from the payload.
308 ** this can have memory alignment benefits. But
309 ** another plus is that small packets often fit
310 ** into the header and thus use no cluster. Its
311 ** a very workload dependent type feature.
312 */
313 static int igb_header_split = FALSE;
314 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315     "Enable receive mbuf header split");
316
317 /*
318 ** This will autoconfigure based on the
319 ** number of CPUs and max supported
320 ** MSIX messages if left at 0.
321 */
322 static int igb_num_queues = 0;
323 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324     "Number of queues to configure, 0 indicates autoconfigure");
325
326 /*
327 ** Global variable to store last used CPU when binding queues
328 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
329 ** queue is bound to a cpu.
330 */
331 static int igb_last_bind_cpu = -1;
332
333 /* How many packets rxeof tries to clean at a time */
334 static int igb_rx_process_limit = 100;
335 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336     &igb_rx_process_limit, 0,
337     "Maximum number of received packets to process at a time, -1 means unlimited");
338
339 /* How many packets txeof tries to clean at a time */
340 static int igb_tx_process_limit = -1;
341 SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342     &igb_tx_process_limit, 0,
343     "Maximum number of sent packets to process at a time, -1 means unlimited");
344
345 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
346 #include <dev/netmap/if_igb_netmap.h>
347 #endif /* DEV_NETMAP */
348 /*********************************************************************
349  *  Device identification routine
350  *
351  *  igb_probe determines if the driver should be loaded on
352  *  adapter based on PCI vendor/device id of the adapter.
353  *
354  *  return BUS_PROBE_DEFAULT on success, positive on failure
355  *********************************************************************/
356
357 static int
358 igb_probe(device_t dev)
359 {
360         char            adapter_name[256];
361         uint16_t        pci_vendor_id = 0;
362         uint16_t        pci_device_id = 0;
363         uint16_t        pci_subvendor_id = 0;
364         uint16_t        pci_subdevice_id = 0;
365         igb_vendor_info_t *ent;
366
367         INIT_DEBUGOUT("igb_probe: begin");
368
369         pci_vendor_id = pci_get_vendor(dev);
370         if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371                 return (ENXIO);
372
373         pci_device_id = pci_get_device(dev);
374         pci_subvendor_id = pci_get_subvendor(dev);
375         pci_subdevice_id = pci_get_subdevice(dev);
376
377         ent = igb_vendor_info_array;
378         while (ent->vendor_id != 0) {
379                 if ((pci_vendor_id == ent->vendor_id) &&
380                     (pci_device_id == ent->device_id) &&
381
382                     ((pci_subvendor_id == ent->subvendor_id) ||
383                     (ent->subvendor_id == 0)) &&
384
385                     ((pci_subdevice_id == ent->subdevice_id) ||
386                     (ent->subdevice_id == 0))) {
387                         sprintf(adapter_name, "%s, Version - %s",
388                                 igb_strings[ent->index],
389                                 igb_driver_version);
390                         device_set_desc_copy(dev, adapter_name);
391                         return (BUS_PROBE_DEFAULT);
392                 }
393                 ent++;
394         }
395         return (ENXIO);
396 }
397
398 /*********************************************************************
399  *  Device initialization routine
400  *
401  *  The attach entry point is called when the driver is being loaded.
402  *  This routine identifies the type of hardware, allocates all resources
403  *  and initializes the hardware.
404  *
405  *  return 0 on success, positive on failure
406  *********************************************************************/
407
408 static int
409 igb_attach(device_t dev)
410 {
411         struct adapter  *adapter;
412         int             error = 0;
413         u16             eeprom_data;
414
415         INIT_DEBUGOUT("igb_attach: begin");
416
417         if (resource_disabled("igb", device_get_unit(dev))) {
418                 device_printf(dev, "Disabled by device hint\n");
419                 return (ENXIO);
420         }
421
422         adapter = device_get_softc(dev);
423         adapter->dev = adapter->osdep.dev = dev;
424         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425
426         /* SYSCTLs */
427         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430             igb_sysctl_nvm_info, "I", "NVM Information");
431
432         igb_set_sysctl_value(adapter, "enable_aim",
433             "Interrupt Moderation", &adapter->enable_aim,
434             igb_enable_aim);
435
436         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440
441         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442
443         /* Determine hardware and mac info */
444         igb_identify_hardware(adapter);
445
446         /* Setup PCI resources */
447         if (igb_allocate_pci_resources(adapter)) {
448                 device_printf(dev, "Allocation of PCI resources failed\n");
449                 error = ENXIO;
450                 goto err_pci;
451         }
452
453         /* Do Shared Code initialization */
454         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455                 device_printf(dev, "Setup of Shared code failed\n");
456                 error = ENXIO;
457                 goto err_pci;
458         }
459
460         e1000_get_bus_info(&adapter->hw);
461
462         /* Sysctls for limiting the amount of work done in the taskqueues */
463         igb_set_sysctl_value(adapter, "rx_processing_limit",
464             "max number of rx packets to process",
465             &adapter->rx_process_limit, igb_rx_process_limit);
466
467         igb_set_sysctl_value(adapter, "tx_processing_limit",
468             "max number of tx packets to process",
469             &adapter->tx_process_limit, igb_tx_process_limit);
470
471         /*
472          * Validate number of transmit and receive descriptors. It
473          * must not exceed hardware maximum, and must be multiple
474          * of E1000_DBA_ALIGN.
475          */
476         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479                     IGB_DEFAULT_TXD, igb_txd);
480                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
481         } else
482                 adapter->num_tx_desc = igb_txd;
483         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486                     IGB_DEFAULT_RXD, igb_rxd);
487                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
488         } else
489                 adapter->num_rx_desc = igb_rxd;
490
491         adapter->hw.mac.autoneg = DO_AUTO_NEG;
492         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494
495         /* Copper options */
496         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
498                 adapter->hw.phy.disable_polarity_correction = FALSE;
499                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500         }
501
502         /*
503          * Set the frame limits assuming
504          * standard ethernet sized frames.
505          */
506         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507
508         /*
509         ** Allocate and Setup Queues
510         */
511         if (igb_allocate_queues(adapter)) {
512                 error = ENOMEM;
513                 goto err_pci;
514         }
515
516         /* Allocate the appropriate stats memory */
517         if (adapter->vf_ifp) {
518                 adapter->stats =
519                     (struct e1000_vf_stats *)malloc(sizeof \
520                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521                 igb_vf_init_stats(adapter);
522         } else
523                 adapter->stats =
524                     (struct e1000_hw_stats *)malloc(sizeof \
525                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526         if (adapter->stats == NULL) {
527                 device_printf(dev, "Can not allocate stats memory\n");
528                 error = ENOMEM;
529                 goto err_late;
530         }
531
532         /* Allocate multicast array memory. */
533         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535         if (adapter->mta == NULL) {
536                 device_printf(dev, "Can not allocate multicast setup array\n");
537                 error = ENOMEM;
538                 goto err_late;
539         }
540
541         /* Some adapter-specific advanced features */
542         if (adapter->hw.mac.type >= e1000_i350) {
543                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550                     adapter, 0, igb_sysctl_eee, "I",
551                     "Disable Energy Efficient Ethernet");
552                 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553                         if (adapter->hw.mac.type == e1000_i354)
554                                 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
555                         else
556                                 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
557                 }
558         }
559
560         /*
561         ** Start from a known state, this is
562         ** important in reading the nvm and
563         ** mac from that.
564         */
565         e1000_reset_hw(&adapter->hw);
566
567         /* Make sure we have a good EEPROM before we read from it */
568         if (((adapter->hw.mac.type != e1000_i210) &&
569             (adapter->hw.mac.type != e1000_i211)) &&
570             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571                 /*
572                 ** Some PCI-E parts fail the first check due to
573                 ** the link being in sleep state, call it again,
574                 ** if it fails a second time its a real issue.
575                 */
576                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577                         device_printf(dev,
578                             "The EEPROM Checksum Is Not Valid\n");
579                         error = EIO;
580                         goto err_late;
581                 }
582         }
583
584         /*
585         ** Copy the permanent MAC address out of the EEPROM
586         */
587         if (e1000_read_mac_addr(&adapter->hw) < 0) {
588                 device_printf(dev, "EEPROM read error while reading MAC"
589                     " address\n");
590                 error = EIO;
591                 goto err_late;
592         }
593         /* Check its sanity */
594         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595                 device_printf(dev, "Invalid MAC address\n");
596                 error = EIO;
597                 goto err_late;
598         }
599
600         /* Setup OS specific network interface */
601         if (igb_setup_interface(dev, adapter) != 0)
602                 goto err_late;
603
604         /* Now get a good starting state */
605         igb_reset(adapter);
606
607         /* Initialize statistics */
608         igb_update_stats_counters(adapter);
609
610         adapter->hw.mac.get_link_status = 1;
611         igb_update_link_status(adapter);
612
613         /* Indicate SOL/IDER usage */
614         if (e1000_check_reset_block(&adapter->hw))
615                 device_printf(dev,
616                     "PHY reset is blocked due to SOL/IDER session.\n");
617
618         /* Determine if we have to control management hardware */
619         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620
621         /*
622          * Setup Wake-on-Lan
623          */
624         /* APME bit in EEPROM is mapped to WUC.APME */
625         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626         if (eeprom_data)
627                 adapter->wol = E1000_WUFC_MAG;
628
629         /* Register for VLAN events */
630         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634
635         igb_add_hw_stats(adapter);
636
637         /* Tell the stack that the interface is not active */
638         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
640
641         adapter->led_dev = led_create(igb_led_func, adapter,
642             device_get_nameunit(dev));
643
644         /* 
645         ** Configure Interrupts
646         */
647         if ((adapter->msix > 1) && (igb_enable_msix))
648                 error = igb_allocate_msix(adapter);
649         else /* MSI or Legacy */
650                 error = igb_allocate_legacy(adapter);
651         if (error)
652                 goto err_late;
653
654 #ifdef DEV_NETMAP
655         igb_netmap_attach(adapter);
656 #endif /* DEV_NETMAP */
657         INIT_DEBUGOUT("igb_attach: end");
658
659         return (0);
660
661 err_late:
662         igb_detach(dev);
663         igb_free_transmit_structures(adapter);
664         igb_free_receive_structures(adapter);
665         igb_release_hw_control(adapter);
666 err_pci:
667         igb_free_pci_resources(adapter);
668         if (adapter->ifp != NULL)
669                 if_free(adapter->ifp);
670         free(adapter->mta, M_DEVBUF);
671         IGB_CORE_LOCK_DESTROY(adapter);
672
673         return (error);
674 }
675
676 /*********************************************************************
677  *  Device removal routine
678  *
679  *  The detach entry point is called when the driver is being removed.
680  *  This routine stops the adapter and deallocates all the resources
681  *  that were allocated for driver operation.
682  *
683  *  return 0 on success, positive on failure
684  *********************************************************************/
685
686 static int
687 igb_detach(device_t dev)
688 {
689         struct adapter  *adapter = device_get_softc(dev);
690         struct ifnet    *ifp = adapter->ifp;
691
692         INIT_DEBUGOUT("igb_detach: begin");
693
694         /* Make sure VLANS are not using driver */
695         if (adapter->ifp->if_vlantrunk != NULL) {
696                 device_printf(dev,"Vlan in use, detach first\n");
697                 return (EBUSY);
698         }
699
700         ether_ifdetach(adapter->ifp);
701
702         if (adapter->led_dev != NULL)
703                 led_destroy(adapter->led_dev);
704
705 #ifdef DEVICE_POLLING
706         if (ifp->if_capenable & IFCAP_POLLING)
707                 ether_poll_deregister(ifp);
708 #endif
709
710         IGB_CORE_LOCK(adapter);
711         adapter->in_detach = 1;
712         igb_stop(adapter);
713         IGB_CORE_UNLOCK(adapter);
714
715         e1000_phy_hw_reset(&adapter->hw);
716
717         /* Give control back to firmware */
718         igb_release_manageability(adapter);
719         igb_release_hw_control(adapter);
720
721         if (adapter->wol) {
722                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
723                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
724                 igb_enable_wakeup(dev);
725         }
726
727         /* Unregister VLAN events */
728         if (adapter->vlan_attach != NULL)
729                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730         if (adapter->vlan_detach != NULL)
731                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
732
733         callout_drain(&adapter->timer);
734
735 #ifdef DEV_NETMAP
736         netmap_detach(adapter->ifp);
737 #endif /* DEV_NETMAP */
738         igb_free_pci_resources(adapter);
739         bus_generic_detach(dev);
740         if_free(ifp);
741
742         igb_free_transmit_structures(adapter);
743         igb_free_receive_structures(adapter);
744         if (adapter->mta != NULL)
745                 free(adapter->mta, M_DEVBUF);
746
747         IGB_CORE_LOCK_DESTROY(adapter);
748
749         return (0);
750 }
751
752 /*********************************************************************
753  *
754  *  Shutdown entry point
755  *
756  **********************************************************************/
757
758 static int
759 igb_shutdown(device_t dev)
760 {
761         return igb_suspend(dev);
762 }
763
764 /*
765  * Suspend/resume device methods.
766  */
767 static int
768 igb_suspend(device_t dev)
769 {
770         struct adapter *adapter = device_get_softc(dev);
771
772         IGB_CORE_LOCK(adapter);
773
774         igb_stop(adapter);
775
776         igb_release_manageability(adapter);
777         igb_release_hw_control(adapter);
778
779         if (adapter->wol) {
780                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
781                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
782                 igb_enable_wakeup(dev);
783         }
784
785         IGB_CORE_UNLOCK(adapter);
786
787         return bus_generic_suspend(dev);
788 }
789
790 static int
791 igb_resume(device_t dev)
792 {
793         struct adapter *adapter = device_get_softc(dev);
794         struct tx_ring  *txr = adapter->tx_rings;
795         struct ifnet *ifp = adapter->ifp;
796
797         IGB_CORE_LOCK(adapter);
798         igb_init_locked(adapter);
799         igb_init_manageability(adapter);
800
801         if ((ifp->if_flags & IFF_UP) &&
802             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
803                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
804                         IGB_TX_LOCK(txr);
805 #ifndef IGB_LEGACY_TX
806                         /* Process the stack queue only if not depleted */
807                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
808                             !drbr_empty(ifp, txr->br))
809                                 igb_mq_start_locked(ifp, txr);
810 #else
811                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
812                                 igb_start_locked(txr, ifp);
813 #endif
814                         IGB_TX_UNLOCK(txr);
815                 }
816         }
817         IGB_CORE_UNLOCK(adapter);
818
819         return bus_generic_resume(dev);
820 }
821
822
823 #ifdef IGB_LEGACY_TX
824
825 /*********************************************************************
826  *  Transmit entry point
827  *
828  *  igb_start is called by the stack to initiate a transmit.
829  *  The driver will remain in this routine as long as there are
830  *  packets to transmit and transmit resources are available.
831  *  In case resources are not available stack is notified and
832  *  the packet is requeued.
833  **********************************************************************/
834
835 static void
836 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
837 {
838         struct adapter  *adapter = ifp->if_softc;
839         struct mbuf     *m_head;
840
841         IGB_TX_LOCK_ASSERT(txr);
842
843         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
844             IFF_DRV_RUNNING)
845                 return;
846         if (!adapter->link_active)
847                 return;
848
849         /* Call cleanup if number of TX descriptors low */
850         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
851                 igb_txeof(txr);
852
853         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
854                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
855                         txr->queue_status |= IGB_QUEUE_DEPLETED;
856                         break;
857                 }
858                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
859                 if (m_head == NULL)
860                         break;
861                 /*
862                  *  Encapsulation can modify our pointer, and or make it
863                  *  NULL on failure.  In that event, we can't requeue.
864                  */
865                 if (igb_xmit(txr, &m_head)) {
866                         if (m_head != NULL)
867                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
868                         if (txr->tx_avail <= IGB_MAX_SCATTER)
869                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
870                         break;
871                 }
872
873                 /* Send a copy of the frame to the BPF listener */
874                 ETHER_BPF_MTAP(ifp, m_head);
875
876                 /* Set watchdog on */
877                 txr->watchdog_time = ticks;
878                 txr->queue_status |= IGB_QUEUE_WORKING;
879         }
880 }
881  
882 /*
883  * Legacy TX driver routine, called from the
884  * stack, always uses tx[0], and spins for it.
885  * Should not be used with multiqueue tx
886  */
887 static void
888 igb_start(struct ifnet *ifp)
889 {
890         struct adapter  *adapter = ifp->if_softc;
891         struct tx_ring  *txr = adapter->tx_rings;
892
893         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
894                 IGB_TX_LOCK(txr);
895                 igb_start_locked(txr, ifp);
896                 IGB_TX_UNLOCK(txr);
897         }
898         return;
899 }
900
901 #else /* ~IGB_LEGACY_TX */
902
903 /*
904 ** Multiqueue Transmit Entry:
905 **  quick turnaround to the stack
906 **
907 */
908 static int
909 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
910 {
911         struct adapter          *adapter = ifp->if_softc;
912         struct igb_queue        *que;
913         struct tx_ring          *txr;
914         int                     i, err = 0;
915 #ifdef  RSS
916         uint32_t                bucket_id;
917 #endif
918
919         /* Which queue to use */
920         /*
921          * When doing RSS, map it to the same outbound queue
922          * as the incoming flow would be mapped to.
923          *
924          * If everything is setup correctly, it should be the
925          * same bucket that the current CPU we're on is.
926          */
927         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
928 #ifdef  RSS
929                 if (rss_hash2bucket(m->m_pkthdr.flowid,
930                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
931                         /* XXX TODO: spit out something if bucket_id > num_queues? */
932                         i = bucket_id % adapter->num_queues;
933                 } else {
934 #endif
935                         i = m->m_pkthdr.flowid % adapter->num_queues;
936 #ifdef  RSS
937                 }
938 #endif
939         } else {
940                 i = curcpu % adapter->num_queues;
941         }
942         txr = &adapter->tx_rings[i];
943         que = &adapter->queues[i];
944
945         err = drbr_enqueue(ifp, txr->br, m);
946         if (err)
947                 return (err);
948         if (IGB_TX_TRYLOCK(txr)) {
949                 igb_mq_start_locked(ifp, txr);
950                 IGB_TX_UNLOCK(txr);
951         } else
952                 taskqueue_enqueue(que->tq, &txr->txq_task);
953
954         return (0);
955 }
956
957 static int
958 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
959 {
960         struct adapter  *adapter = txr->adapter;
961         struct mbuf     *next;
962         int             err = 0, enq = 0;
963
964         IGB_TX_LOCK_ASSERT(txr);
965
966         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
967             adapter->link_active == 0)
968                 return (ENETDOWN);
969
970         /* Process the queue */
971         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
972                 if ((err = igb_xmit(txr, &next)) != 0) {
973                         if (next == NULL) {
974                                 /* It was freed, move forward */
975                                 drbr_advance(ifp, txr->br);
976                         } else {
977                                 /* 
978                                  * Still have one left, it may not be
979                                  * the same since the transmit function
980                                  * may have changed it.
981                                  */
982                                 drbr_putback(ifp, txr->br, next);
983                         }
984                         break;
985                 }
986                 drbr_advance(ifp, txr->br);
987                 enq++;
988                 if (next->m_flags & M_MCAST && adapter->vf_ifp)
989                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
990                 ETHER_BPF_MTAP(ifp, next);
991                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
992                         break;
993         }
994         if (enq > 0) {
995                 /* Set the watchdog */
996                 txr->queue_status |= IGB_QUEUE_WORKING;
997                 txr->watchdog_time = ticks;
998         }
999         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1000                 igb_txeof(txr);
1001         if (txr->tx_avail <= IGB_MAX_SCATTER)
1002                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1003         return (err);
1004 }
1005
1006 /*
1007  * Called from a taskqueue to drain queued transmit packets.
1008  */
1009 static void
1010 igb_deferred_mq_start(void *arg, int pending)
1011 {
1012         struct tx_ring *txr = arg;
1013         struct adapter *adapter = txr->adapter;
1014         struct ifnet *ifp = adapter->ifp;
1015
1016         IGB_TX_LOCK(txr);
1017         if (!drbr_empty(ifp, txr->br))
1018                 igb_mq_start_locked(ifp, txr);
1019         IGB_TX_UNLOCK(txr);
1020 }
1021
1022 /*
1023 ** Flush all ring buffers
1024 */
1025 static void
1026 igb_qflush(struct ifnet *ifp)
1027 {
1028         struct adapter  *adapter = ifp->if_softc;
1029         struct tx_ring  *txr = adapter->tx_rings;
1030         struct mbuf     *m;
1031
1032         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1033                 IGB_TX_LOCK(txr);
1034                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1035                         m_freem(m);
1036                 IGB_TX_UNLOCK(txr);
1037         }
1038         if_qflush(ifp);
1039 }
1040 #endif /* ~IGB_LEGACY_TX */
1041
1042 /*********************************************************************
1043  *  Ioctl entry point
1044  *
1045  *  igb_ioctl is called when the user wants to configure the
1046  *  interface.
1047  *
1048  *  return 0 on success, positive on failure
1049  **********************************************************************/
1050
1051 static int
1052 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1053 {
1054         struct adapter  *adapter = ifp->if_softc;
1055         struct ifreq    *ifr = (struct ifreq *)data;
1056 #if defined(INET) || defined(INET6)
1057         struct ifaddr   *ifa = (struct ifaddr *)data;
1058 #endif
1059         bool            avoid_reset = FALSE;
1060         int             error = 0;
1061
1062         if (adapter->in_detach)
1063                 return (error);
1064
1065         switch (command) {
1066         case SIOCSIFADDR:
1067 #ifdef INET
1068                 if (ifa->ifa_addr->sa_family == AF_INET)
1069                         avoid_reset = TRUE;
1070 #endif
1071 #ifdef INET6
1072                 if (ifa->ifa_addr->sa_family == AF_INET6)
1073                         avoid_reset = TRUE;
1074 #endif
1075                 /*
1076                 ** Calling init results in link renegotiation,
1077                 ** so we avoid doing it when possible.
1078                 */
1079                 if (avoid_reset) {
1080                         ifp->if_flags |= IFF_UP;
1081                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1082                                 igb_init(adapter);
1083 #ifdef INET
1084                         if (!(ifp->if_flags & IFF_NOARP))
1085                                 arp_ifinit(ifp, ifa);
1086 #endif
1087                 } else
1088                         error = ether_ioctl(ifp, command, data);
1089                 break;
1090         case SIOCSIFMTU:
1091             {
1092                 int max_frame_size;
1093
1094                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1095
1096                 IGB_CORE_LOCK(adapter);
1097                 max_frame_size = 9234;
1098                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1099                     ETHER_CRC_LEN) {
1100                         IGB_CORE_UNLOCK(adapter);
1101                         error = EINVAL;
1102                         break;
1103                 }
1104
1105                 ifp->if_mtu = ifr->ifr_mtu;
1106                 adapter->max_frame_size =
1107                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1108                 igb_init_locked(adapter);
1109                 IGB_CORE_UNLOCK(adapter);
1110                 break;
1111             }
1112         case SIOCSIFFLAGS:
1113                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1114                     SIOCSIFFLAGS (Set Interface Flags)");
1115                 IGB_CORE_LOCK(adapter);
1116                 if (ifp->if_flags & IFF_UP) {
1117                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1118                                 if ((ifp->if_flags ^ adapter->if_flags) &
1119                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1120                                         igb_disable_promisc(adapter);
1121                                         igb_set_promisc(adapter);
1122                                 }
1123                         } else
1124                                 igb_init_locked(adapter);
1125                 } else
1126                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1127                                 igb_stop(adapter);
1128                 adapter->if_flags = ifp->if_flags;
1129                 IGB_CORE_UNLOCK(adapter);
1130                 break;
1131         case SIOCADDMULTI:
1132         case SIOCDELMULTI:
1133                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1134                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1135                         IGB_CORE_LOCK(adapter);
1136                         igb_disable_intr(adapter);
1137                         igb_set_multi(adapter);
1138 #ifdef DEVICE_POLLING
1139                         if (!(ifp->if_capenable & IFCAP_POLLING))
1140 #endif
1141                                 igb_enable_intr(adapter);
1142                         IGB_CORE_UNLOCK(adapter);
1143                 }
1144                 break;
1145         case SIOCSIFMEDIA:
1146                 /* Check SOL/IDER usage */
1147                 IGB_CORE_LOCK(adapter);
1148                 if (e1000_check_reset_block(&adapter->hw)) {
1149                         IGB_CORE_UNLOCK(adapter);
1150                         device_printf(adapter->dev, "Media change is"
1151                             " blocked due to SOL/IDER session.\n");
1152                         break;
1153                 }
1154                 IGB_CORE_UNLOCK(adapter);
1155         case SIOCGIFMEDIA:
1156                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1157                     SIOCxIFMEDIA (Get/Set Interface Media)");
1158                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1159                 break;
1160         case SIOCSIFCAP:
1161             {
1162                 int mask, reinit;
1163
1164                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1165                 reinit = 0;
1166                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1167 #ifdef DEVICE_POLLING
1168                 if (mask & IFCAP_POLLING) {
1169                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1170                                 error = ether_poll_register(igb_poll, ifp);
1171                                 if (error)
1172                                         return (error);
1173                                 IGB_CORE_LOCK(adapter);
1174                                 igb_disable_intr(adapter);
1175                                 ifp->if_capenable |= IFCAP_POLLING;
1176                                 IGB_CORE_UNLOCK(adapter);
1177                         } else {
1178                                 error = ether_poll_deregister(ifp);
1179                                 /* Enable interrupt even in error case */
1180                                 IGB_CORE_LOCK(adapter);
1181                                 igb_enable_intr(adapter);
1182                                 ifp->if_capenable &= ~IFCAP_POLLING;
1183                                 IGB_CORE_UNLOCK(adapter);
1184                         }
1185                 }
1186 #endif
1187 #if __FreeBSD_version >= 1000000
1188                 /* HW cannot turn these on/off separately */
1189                 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1190                         ifp->if_capenable ^= IFCAP_RXCSUM;
1191                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1192                         reinit = 1;
1193                 }
1194                 if (mask & IFCAP_TXCSUM) {
1195                         ifp->if_capenable ^= IFCAP_TXCSUM;
1196                         reinit = 1;
1197                 }
1198                 if (mask & IFCAP_TXCSUM_IPV6) {
1199                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1200                         reinit = 1;
1201                 }
1202 #else
1203                 if (mask & IFCAP_HWCSUM) {
1204                         ifp->if_capenable ^= IFCAP_HWCSUM;
1205                         reinit = 1;
1206                 }
1207 #endif
1208                 if (mask & IFCAP_TSO4) {
1209                         ifp->if_capenable ^= IFCAP_TSO4;
1210                         reinit = 1;
1211                 }
1212                 if (mask & IFCAP_TSO6) {
1213                         ifp->if_capenable ^= IFCAP_TSO6;
1214                         reinit = 1;
1215                 }
1216                 if (mask & IFCAP_VLAN_HWTAGGING) {
1217                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1218                         reinit = 1;
1219                 }
1220                 if (mask & IFCAP_VLAN_HWFILTER) {
1221                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1222                         reinit = 1;
1223                 }
1224                 if (mask & IFCAP_VLAN_HWTSO) {
1225                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1226                         reinit = 1;
1227                 }
1228                 if (mask & IFCAP_LRO) {
1229                         ifp->if_capenable ^= IFCAP_LRO;
1230                         reinit = 1;
1231                 }
1232                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1233                         igb_init(adapter);
1234                 VLAN_CAPABILITIES(ifp);
1235                 break;
1236             }
1237
1238         default:
1239                 error = ether_ioctl(ifp, command, data);
1240                 break;
1241         }
1242
1243         return (error);
1244 }
1245
1246
1247 /*********************************************************************
1248  *  Init entry point
1249  *
1250  *  This routine is used in two ways. It is used by the stack as
1251  *  init entry point in network interface structure. It is also used
1252  *  by the driver as a hw/sw initialization routine to get to a
1253  *  consistent state.
1254  *
1255  *  return 0 on success, positive on failure
1256  **********************************************************************/
1257
1258 static void
1259 igb_init_locked(struct adapter *adapter)
1260 {
1261         struct ifnet    *ifp = adapter->ifp;
1262         device_t        dev = adapter->dev;
1263
1264         INIT_DEBUGOUT("igb_init: begin");
1265
1266         IGB_CORE_LOCK_ASSERT(adapter);
1267
1268         igb_disable_intr(adapter);
1269         callout_stop(&adapter->timer);
1270
1271         /* Get the latest mac address, User can use a LAA */
1272         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1273               ETHER_ADDR_LEN);
1274
1275         /* Put the address into the Receive Address Array */
1276         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1277
1278         igb_reset(adapter);
1279         igb_update_link_status(adapter);
1280
1281         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1282
1283         /* Set hardware offload abilities */
1284         ifp->if_hwassist = 0;
1285         if (ifp->if_capenable & IFCAP_TXCSUM) {
1286 #if __FreeBSD_version >= 1000000
1287                 ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
1288                 if (adapter->hw.mac.type != e1000_82575)
1289                         ifp->if_hwassist |= CSUM_IP_SCTP;
1290 #else
1291                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1292 #if __FreeBSD_version >= 800000
1293                 if (adapter->hw.mac.type != e1000_82575)
1294                         ifp->if_hwassist |= CSUM_SCTP;
1295 #endif
1296 #endif
1297         }
1298
1299 #if __FreeBSD_version >= 1000000
1300         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
1301                 ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
1302                 if (adapter->hw.mac.type != e1000_82575)
1303                         ifp->if_hwassist |= CSUM_IP6_SCTP;
1304         }
1305 #endif
1306         if (ifp->if_capenable & IFCAP_TSO)
1307                 ifp->if_hwassist |= CSUM_TSO;
1308
1309         /* Clear bad data from Rx FIFOs */
1310         e1000_rx_fifo_flush_82575(&adapter->hw);
1311
1312         /* Configure for OS presence */
1313         igb_init_manageability(adapter);
1314
1315         /* Prepare transmit descriptors and buffers */
1316         igb_setup_transmit_structures(adapter);
1317         igb_initialize_transmit_units(adapter);
1318
1319         /* Setup Multicast table */
1320         igb_set_multi(adapter);
1321
1322         /*
1323         ** Figure out the desired mbuf pool
1324         ** for doing jumbo/packetsplit
1325         */
1326         if (adapter->max_frame_size <= 2048)
1327                 adapter->rx_mbuf_sz = MCLBYTES;
1328         else if (adapter->max_frame_size <= 4096)
1329                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1330         else
1331                 adapter->rx_mbuf_sz = MJUM9BYTES;
1332
1333         /* Prepare receive descriptors and buffers */
1334         if (igb_setup_receive_structures(adapter)) {
1335                 device_printf(dev, "Could not setup receive structures\n");
1336                 return;
1337         }
1338         igb_initialize_receive_units(adapter);
1339
1340         /* Enable VLAN support */
1341         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1342                 igb_setup_vlan_hw_support(adapter);
1343                                 
1344         /* Don't lose promiscuous settings */
1345         igb_set_promisc(adapter);
1346
1347         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1348         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1349
1350         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1351         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1352
1353         if (adapter->msix > 1) /* Set up queue routing */
1354                 igb_configure_queues(adapter);
1355
1356         /* this clears any pending interrupts */
1357         E1000_READ_REG(&adapter->hw, E1000_ICR);
1358 #ifdef DEVICE_POLLING
1359         /*
1360          * Only enable interrupts if we are not polling, make sure
1361          * they are off otherwise.
1362          */
1363         if (ifp->if_capenable & IFCAP_POLLING)
1364                 igb_disable_intr(adapter);
1365         else
1366 #endif /* DEVICE_POLLING */
1367         {
1368                 igb_enable_intr(adapter);
1369                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1370         }
1371
1372         /* Set Energy Efficient Ethernet */
1373         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1374                 if (adapter->hw.mac.type == e1000_i354)
1375                         e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1376                 else
1377                         e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1378         }
1379 }
1380
1381 static void
1382 igb_init(void *arg)
1383 {
1384         struct adapter *adapter = arg;
1385
1386         IGB_CORE_LOCK(adapter);
1387         igb_init_locked(adapter);
1388         IGB_CORE_UNLOCK(adapter);
1389 }
1390
1391
1392 static void
1393 igb_handle_que(void *context, int pending)
1394 {
1395         struct igb_queue *que = context;
1396         struct adapter *adapter = que->adapter;
1397         struct tx_ring *txr = que->txr;
1398         struct ifnet    *ifp = adapter->ifp;
1399
1400         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1401                 bool    more;
1402
1403                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1404
1405                 IGB_TX_LOCK(txr);
1406                 igb_txeof(txr);
1407 #ifndef IGB_LEGACY_TX
1408                 /* Process the stack queue only if not depleted */
1409                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1410                     !drbr_empty(ifp, txr->br))
1411                         igb_mq_start_locked(ifp, txr);
1412 #else
1413                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1414                         igb_start_locked(txr, ifp);
1415 #endif
1416                 IGB_TX_UNLOCK(txr);
1417                 /* Do we need another? */
1418                 if (more) {
1419                         taskqueue_enqueue(que->tq, &que->que_task);
1420                         return;
1421                 }
1422         }
1423
1424 #ifdef DEVICE_POLLING
1425         if (ifp->if_capenable & IFCAP_POLLING)
1426                 return;
1427 #endif
1428         /* Reenable this interrupt */
1429         if (que->eims)
1430                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1431         else
1432                 igb_enable_intr(adapter);
1433 }
1434
1435 /* Deal with link in a sleepable context */
1436 static void
1437 igb_handle_link(void *context, int pending)
1438 {
1439         struct adapter *adapter = context;
1440
1441         IGB_CORE_LOCK(adapter);
1442         igb_handle_link_locked(adapter);
1443         IGB_CORE_UNLOCK(adapter);
1444 }
1445
1446 static void
1447 igb_handle_link_locked(struct adapter *adapter)
1448 {
1449         struct tx_ring  *txr = adapter->tx_rings;
1450         struct ifnet *ifp = adapter->ifp;
1451
1452         IGB_CORE_LOCK_ASSERT(adapter);
1453         adapter->hw.mac.get_link_status = 1;
1454         igb_update_link_status(adapter);
1455         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1456                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1457                         IGB_TX_LOCK(txr);
1458 #ifndef IGB_LEGACY_TX
1459                         /* Process the stack queue only if not depleted */
1460                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1461                             !drbr_empty(ifp, txr->br))
1462                                 igb_mq_start_locked(ifp, txr);
1463 #else
1464                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1465                                 igb_start_locked(txr, ifp);
1466 #endif
1467                         IGB_TX_UNLOCK(txr);
1468                 }
1469         }
1470 }
1471
1472 /*********************************************************************
1473  *
1474  *  MSI/Legacy Deferred
1475  *  Interrupt Service routine  
1476  *
1477  *********************************************************************/
1478 static int
1479 igb_irq_fast(void *arg)
1480 {
1481         struct adapter          *adapter = arg;
1482         struct igb_queue        *que = adapter->queues;
1483         u32                     reg_icr;
1484
1485
1486         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1487
1488         /* Hot eject?  */
1489         if (reg_icr == 0xffffffff)
1490                 return FILTER_STRAY;
1491
1492         /* Definitely not our interrupt.  */
1493         if (reg_icr == 0x0)
1494                 return FILTER_STRAY;
1495
1496         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1497                 return FILTER_STRAY;
1498
1499         /*
1500          * Mask interrupts until the taskqueue is finished running.  This is
1501          * cheap, just assume that it is needed.  This also works around the
1502          * MSI message reordering errata on certain systems.
1503          */
1504         igb_disable_intr(adapter);
1505         taskqueue_enqueue(que->tq, &que->que_task);
1506
1507         /* Link status change */
1508         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1509                 taskqueue_enqueue(que->tq, &adapter->link_task);
1510
1511         if (reg_icr & E1000_ICR_RXO)
1512                 adapter->rx_overruns++;
1513         return FILTER_HANDLED;
1514 }
1515
1516 #ifdef DEVICE_POLLING
1517 #if __FreeBSD_version >= 800000
1518 #define POLL_RETURN_COUNT(a) (a)
1519 static int
1520 #else
1521 #define POLL_RETURN_COUNT(a)
1522 static void
1523 #endif
1524 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1525 {
1526         struct adapter          *adapter = ifp->if_softc;
1527         struct igb_queue        *que;
1528         struct tx_ring          *txr;
1529         u32                     reg_icr, rx_done = 0;
1530         u32                     loop = IGB_MAX_LOOP;
1531         bool                    more;
1532
1533         IGB_CORE_LOCK(adapter);
1534         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1535                 IGB_CORE_UNLOCK(adapter);
1536                 return POLL_RETURN_COUNT(rx_done);
1537         }
1538
1539         if (cmd == POLL_AND_CHECK_STATUS) {
1540                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1541                 /* Link status change */
1542                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1543                         igb_handle_link_locked(adapter);
1544
1545                 if (reg_icr & E1000_ICR_RXO)
1546                         adapter->rx_overruns++;
1547         }
1548         IGB_CORE_UNLOCK(adapter);
1549
1550         for (int i = 0; i < adapter->num_queues; i++) {
1551                 que = &adapter->queues[i];
1552                 txr = que->txr;
1553
1554                 igb_rxeof(que, count, &rx_done);
1555
1556                 IGB_TX_LOCK(txr);
1557                 do {
1558                         more = igb_txeof(txr);
1559                 } while (loop-- && more);
1560 #ifndef IGB_LEGACY_TX
1561                 if (!drbr_empty(ifp, txr->br))
1562                         igb_mq_start_locked(ifp, txr);
1563 #else
1564                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1565                         igb_start_locked(txr, ifp);
1566 #endif
1567                 IGB_TX_UNLOCK(txr);
1568         }
1569
1570         return POLL_RETURN_COUNT(rx_done);
1571 }
1572 #endif /* DEVICE_POLLING */
1573
1574 /*********************************************************************
1575  *
1576  *  MSIX Que Interrupt Service routine
1577  *
1578  **********************************************************************/
1579 static void
1580 igb_msix_que(void *arg)
1581 {
1582         struct igb_queue *que = arg;
1583         struct adapter *adapter = que->adapter;
1584         struct ifnet   *ifp = adapter->ifp;
1585         struct tx_ring *txr = que->txr;
1586         struct rx_ring *rxr = que->rxr;
1587         u32             newitr = 0;
1588         bool            more_rx;
1589
1590         /* Ignore spurious interrupts */
1591         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1592                 return;
1593
1594         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1595         ++que->irqs;
1596
1597         IGB_TX_LOCK(txr);
1598         igb_txeof(txr);
1599 #ifndef IGB_LEGACY_TX
1600         /* Process the stack queue only if not depleted */
1601         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1602             !drbr_empty(ifp, txr->br))
1603                 igb_mq_start_locked(ifp, txr);
1604 #else
1605         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1606                 igb_start_locked(txr, ifp);
1607 #endif
1608         IGB_TX_UNLOCK(txr);
1609
1610         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1611
1612         if (adapter->enable_aim == FALSE)
1613                 goto no_calc;
1614         /*
1615         ** Do Adaptive Interrupt Moderation:
1616         **  - Write out last calculated setting
1617         **  - Calculate based on average size over
1618         **    the last interval.
1619         */
1620         if (que->eitr_setting)
1621                 E1000_WRITE_REG(&adapter->hw,
1622                     E1000_EITR(que->msix), que->eitr_setting);
1623  
1624         que->eitr_setting = 0;
1625
1626         /* Idle, do nothing */
1627         if ((txr->bytes == 0) && (rxr->bytes == 0))
1628                 goto no_calc;
1629                                 
1630         /* Used half Default if sub-gig */
1631         if (adapter->link_speed != 1000)
1632                 newitr = IGB_DEFAULT_ITR / 2;
1633         else {
1634                 if ((txr->bytes) && (txr->packets))
1635                         newitr = txr->bytes/txr->packets;
1636                 if ((rxr->bytes) && (rxr->packets))
1637                         newitr = max(newitr,
1638                             (rxr->bytes / rxr->packets));
1639                 newitr += 24; /* account for hardware frame, crc */
1640                 /* set an upper boundary */
1641                 newitr = min(newitr, 3000);
1642                 /* Be nice to the mid range */
1643                 if ((newitr > 300) && (newitr < 1200))
1644                         newitr = (newitr / 3);
1645                 else
1646                         newitr = (newitr / 2);
1647         }
1648         newitr &= 0x7FFC;  /* Mask invalid bits */
1649         if (adapter->hw.mac.type == e1000_82575)
1650                 newitr |= newitr << 16;
1651         else
1652                 newitr |= E1000_EITR_CNT_IGNR;
1653                  
1654         /* save for next interrupt */
1655         que->eitr_setting = newitr;
1656
1657         /* Reset state */
1658         txr->bytes = 0;
1659         txr->packets = 0;
1660         rxr->bytes = 0;
1661         rxr->packets = 0;
1662
1663 no_calc:
1664         /* Schedule a clean task if needed*/
1665         if (more_rx)
1666                 taskqueue_enqueue(que->tq, &que->que_task);
1667         else
1668                 /* Reenable this interrupt */
1669                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1670         return;
1671 }
1672
1673
1674 /*********************************************************************
1675  *
1676  *  MSIX Link Interrupt Service routine
1677  *
1678  **********************************************************************/
1679
1680 static void
1681 igb_msix_link(void *arg)
1682 {
1683         struct adapter  *adapter = arg;
1684         u32             icr;
1685
1686         ++adapter->link_irq;
1687         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1688         if (!(icr & E1000_ICR_LSC))
1689                 goto spurious;
1690         igb_handle_link(adapter, 0);
1691
1692 spurious:
1693         /* Rearm */
1694         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1695         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1696         return;
1697 }
1698
1699
1700 /*********************************************************************
1701  *
1702  *  Media Ioctl callback
1703  *
1704  *  This routine is called whenever the user queries the status of
1705  *  the interface using ifconfig.
1706  *
1707  **********************************************************************/
1708 static void
1709 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1710 {
1711         struct adapter *adapter = ifp->if_softc;
1712
1713         INIT_DEBUGOUT("igb_media_status: begin");
1714
1715         IGB_CORE_LOCK(adapter);
1716         igb_update_link_status(adapter);
1717
1718         ifmr->ifm_status = IFM_AVALID;
1719         ifmr->ifm_active = IFM_ETHER;
1720
1721         if (!adapter->link_active) {
1722                 IGB_CORE_UNLOCK(adapter);
1723                 return;
1724         }
1725
1726         ifmr->ifm_status |= IFM_ACTIVE;
1727
1728         switch (adapter->link_speed) {
1729         case 10:
1730                 ifmr->ifm_active |= IFM_10_T;
1731                 break;
1732         case 100:
1733                 /*
1734                 ** Support for 100Mb SFP - these are Fiber 
1735                 ** but the media type appears as serdes
1736                 */
1737                 if (adapter->hw.phy.media_type ==
1738                     e1000_media_type_internal_serdes)
1739                         ifmr->ifm_active |= IFM_100_FX;
1740                 else
1741                         ifmr->ifm_active |= IFM_100_TX;
1742                 break;
1743         case 1000:
1744                 ifmr->ifm_active |= IFM_1000_T;
1745                 break;
1746         case 2500:
1747                 ifmr->ifm_active |= IFM_2500_SX;
1748                 break;
1749         }
1750
1751         if (adapter->link_duplex == FULL_DUPLEX)
1752                 ifmr->ifm_active |= IFM_FDX;
1753         else
1754                 ifmr->ifm_active |= IFM_HDX;
1755
1756         IGB_CORE_UNLOCK(adapter);
1757 }
1758
1759 /*********************************************************************
1760  *
1761  *  Media Ioctl callback
1762  *
1763  *  This routine is called when the user changes speed/duplex using
1764  *  media/mediopt option with ifconfig.
1765  *
1766  **********************************************************************/
1767 static int
1768 igb_media_change(struct ifnet *ifp)
1769 {
1770         struct adapter *adapter = ifp->if_softc;
1771         struct ifmedia  *ifm = &adapter->media;
1772
1773         INIT_DEBUGOUT("igb_media_change: begin");
1774
1775         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1776                 return (EINVAL);
1777
1778         IGB_CORE_LOCK(adapter);
1779         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1780         case IFM_AUTO:
1781                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1782                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1783                 break;
1784         case IFM_1000_LX:
1785         case IFM_1000_SX:
1786         case IFM_1000_T:
1787                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1788                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1789                 break;
1790         case IFM_100_TX:
1791                 adapter->hw.mac.autoneg = FALSE;
1792                 adapter->hw.phy.autoneg_advertised = 0;
1793                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1794                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1795                 else
1796                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1797                 break;
1798         case IFM_10_T:
1799                 adapter->hw.mac.autoneg = FALSE;
1800                 adapter->hw.phy.autoneg_advertised = 0;
1801                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1802                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1803                 else
1804                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1805                 break;
1806         default:
1807                 device_printf(adapter->dev, "Unsupported media type\n");
1808         }
1809
1810         igb_init_locked(adapter);
1811         IGB_CORE_UNLOCK(adapter);
1812
1813         return (0);
1814 }
1815
1816
1817 /*********************************************************************
1818  *
1819  *  This routine maps the mbufs to Advanced TX descriptors.
1820  *  
1821  **********************************************************************/
1822 static int
1823 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1824 {
1825         struct adapter  *adapter = txr->adapter;
1826         u32             olinfo_status = 0, cmd_type_len;
1827         int             i, j, error, nsegs;
1828         int             first;
1829         bool            remap = TRUE;
1830         struct mbuf     *m_head;
1831         bus_dma_segment_t segs[IGB_MAX_SCATTER];
1832         bus_dmamap_t    map;
1833         struct igb_tx_buf *txbuf;
1834         union e1000_adv_tx_desc *txd = NULL;
1835
1836         m_head = *m_headp;
1837
1838         /* Basic descriptor defines */
1839         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1840             E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1841
1842         if (m_head->m_flags & M_VLANTAG)
1843                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1844
1845         /*
1846          * Important to capture the first descriptor
1847          * used because it will contain the index of
1848          * the one we tell the hardware to report back
1849          */
1850         first = txr->next_avail_desc;
1851         txbuf = &txr->tx_buffers[first];
1852         map = txbuf->map;
1853
1854         /*
1855          * Map the packet for DMA.
1856          */
1857 retry:
1858         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1859             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1860
1861         if (__predict_false(error)) {
1862                 struct mbuf *m;
1863
1864                 switch (error) {
1865                 case EFBIG:
1866                         /* Try it again? - one try */
1867                         if (remap == TRUE) {
1868                                 remap = FALSE;
1869                                 m = m_collapse(*m_headp, M_NOWAIT,
1870                                     IGB_MAX_SCATTER);
1871                                 if (m == NULL) {
1872                                         adapter->mbuf_defrag_failed++;
1873                                         m_freem(*m_headp);
1874                                         *m_headp = NULL;
1875                                         return (ENOBUFS);
1876                                 }
1877                                 *m_headp = m;
1878                                 goto retry;
1879                         } else
1880                                 return (error);
1881                 default:
1882                         txr->no_tx_dma_setup++;
1883                         m_freem(*m_headp);
1884                         *m_headp = NULL;
1885                         return (error);
1886                 }
1887         }
1888
1889         /* Make certain there are enough descriptors */
1890         if (txr->tx_avail < (nsegs + 2)) {
1891                 txr->no_desc_avail++;
1892                 bus_dmamap_unload(txr->txtag, map);
1893                 return (ENOBUFS);
1894         }
1895         m_head = *m_headp;
1896
1897         /*
1898         ** Set up the appropriate offload context
1899         ** this will consume the first descriptor
1900         */
1901         error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1902         if (__predict_false(error)) {
1903                 m_freem(*m_headp);
1904                 *m_headp = NULL;
1905                 return (error);
1906         }
1907
1908         /* 82575 needs the queue index added */
1909         if (adapter->hw.mac.type == e1000_82575)
1910                 olinfo_status |= txr->me << 4;
1911
1912         i = txr->next_avail_desc;
1913         for (j = 0; j < nsegs; j++) {
1914                 bus_size_t seglen;
1915                 bus_addr_t segaddr;
1916
1917                 txbuf = &txr->tx_buffers[i];
1918                 txd = &txr->tx_base[i];
1919                 seglen = segs[j].ds_len;
1920                 segaddr = htole64(segs[j].ds_addr);
1921
1922                 txd->read.buffer_addr = segaddr;
1923                 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1924                     cmd_type_len | seglen);
1925                 txd->read.olinfo_status = htole32(olinfo_status);
1926
1927                 if (++i == txr->num_desc)
1928                         i = 0;
1929         }
1930
1931         txd->read.cmd_type_len |=
1932             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1933         txr->tx_avail -= nsegs;
1934         txr->next_avail_desc = i;
1935
1936         txbuf->m_head = m_head;
1937         /*
1938         ** Here we swap the map so the last descriptor,
1939         ** which gets the completion interrupt has the
1940         ** real map, and the first descriptor gets the
1941         ** unused map from this descriptor.
1942         */
1943         txr->tx_buffers[first].map = txbuf->map;
1944         txbuf->map = map;
1945         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1946
1947         /* Set the EOP descriptor that will be marked done */
1948         txbuf = &txr->tx_buffers[first];
1949         txbuf->eop = txd;
1950
1951         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1952             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1953         /*
1954          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1955          * hardware that this frame is available to transmit.
1956          */
1957         ++txr->total_packets;
1958         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1959
1960         return (0);
1961 }
1962 static void
1963 igb_set_promisc(struct adapter *adapter)
1964 {
1965         struct ifnet    *ifp = adapter->ifp;
1966         struct e1000_hw *hw = &adapter->hw;
1967         u32             reg;
1968
1969         if (adapter->vf_ifp) {
1970                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1971                 return;
1972         }
1973
1974         reg = E1000_READ_REG(hw, E1000_RCTL);
1975         if (ifp->if_flags & IFF_PROMISC) {
1976                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1977                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1978         } else if (ifp->if_flags & IFF_ALLMULTI) {
1979                 reg |= E1000_RCTL_MPE;
1980                 reg &= ~E1000_RCTL_UPE;
1981                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1982         }
1983 }
1984
1985 static void
1986 igb_disable_promisc(struct adapter *adapter)
1987 {
1988         struct e1000_hw *hw = &adapter->hw;
1989         struct ifnet    *ifp = adapter->ifp;
1990         u32             reg;
1991         int             mcnt = 0;
1992
1993         if (adapter->vf_ifp) {
1994                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1995                 return;
1996         }
1997         reg = E1000_READ_REG(hw, E1000_RCTL);
1998         reg &=  (~E1000_RCTL_UPE);
1999         if (ifp->if_flags & IFF_ALLMULTI)
2000                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2001         else {
2002                 struct  ifmultiaddr *ifma;
2003 #if __FreeBSD_version < 800000
2004                 IF_ADDR_LOCK(ifp);
2005 #else   
2006                 if_maddr_rlock(ifp);
2007 #endif
2008                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2009                         if (ifma->ifma_addr->sa_family != AF_LINK)
2010                                 continue;
2011                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2012                                 break;
2013                         mcnt++;
2014                 }
2015 #if __FreeBSD_version < 800000
2016                 IF_ADDR_UNLOCK(ifp);
2017 #else
2018                 if_maddr_runlock(ifp);
2019 #endif
2020         }
2021         /* Don't disable if in MAX groups */
2022         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2023                 reg &=  (~E1000_RCTL_MPE);
2024         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2025 }
2026
2027
2028 /*********************************************************************
2029  *  Multicast Update
2030  *
2031  *  This routine is called whenever multicast address list is updated.
2032  *
2033  **********************************************************************/
2034
2035 static void
2036 igb_set_multi(struct adapter *adapter)
2037 {
2038         struct ifnet    *ifp = adapter->ifp;
2039         struct ifmultiaddr *ifma;
2040         u32 reg_rctl = 0;
2041         u8  *mta;
2042
2043         int mcnt = 0;
2044
2045         IOCTL_DEBUGOUT("igb_set_multi: begin");
2046
2047         mta = adapter->mta;
2048         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2049             MAX_NUM_MULTICAST_ADDRESSES);
2050
2051 #if __FreeBSD_version < 800000
2052         IF_ADDR_LOCK(ifp);
2053 #else
2054         if_maddr_rlock(ifp);
2055 #endif
2056         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2057                 if (ifma->ifma_addr->sa_family != AF_LINK)
2058                         continue;
2059
2060                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2061                         break;
2062
2063                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2064                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2065                 mcnt++;
2066         }
2067 #if __FreeBSD_version < 800000
2068         IF_ADDR_UNLOCK(ifp);
2069 #else
2070         if_maddr_runlock(ifp);
2071 #endif
2072
2073         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2074                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2075                 reg_rctl |= E1000_RCTL_MPE;
2076                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2077         } else
2078                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2079 }
2080
2081
2082 /*********************************************************************
2083  *  Timer routine:
2084  *      This routine checks for link status,
2085  *      updates statistics, and does the watchdog.
2086  *
2087  **********************************************************************/
2088
2089 static void
2090 igb_local_timer(void *arg)
2091 {
2092         struct adapter          *adapter = arg;
2093         device_t                dev = adapter->dev;
2094         struct ifnet            *ifp = adapter->ifp;
2095         struct tx_ring          *txr = adapter->tx_rings;
2096         struct igb_queue        *que = adapter->queues;
2097         int                     hung = 0, busy = 0;
2098
2099
2100         IGB_CORE_LOCK_ASSERT(adapter);
2101
2102         igb_update_link_status(adapter);
2103         igb_update_stats_counters(adapter);
2104
2105         /*
2106         ** Check the TX queues status
2107         **      - central locked handling of OACTIVE
2108         **      - watchdog only if all queues show hung
2109         */
2110         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2111                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2112                     (adapter->pause_frames == 0))
2113                         ++hung;
2114                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2115                         ++busy;
2116                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2117                         taskqueue_enqueue(que->tq, &que->que_task);
2118         }
2119         if (hung == adapter->num_queues)
2120                 goto timeout;
2121         if (busy == adapter->num_queues)
2122                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2123         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2124             (busy < adapter->num_queues))
2125                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2126
2127         adapter->pause_frames = 0;
2128         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2129 #ifndef DEVICE_POLLING
2130         /* Schedule all queue interrupts - deadlock protection */
2131         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2132 #endif
2133         return;
2134
2135 timeout:
2136         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2137         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2138             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2139             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2140         device_printf(dev,"TX(%d) desc avail = %d,"
2141             "Next TX to Clean = %d\n",
2142             txr->me, txr->tx_avail, txr->next_to_clean);
2143         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2144         adapter->watchdog_events++;
2145         igb_init_locked(adapter);
2146 }
2147
2148 static void
2149 igb_update_link_status(struct adapter *adapter)
2150 {
2151         struct e1000_hw         *hw = &adapter->hw;
2152         struct e1000_fc_info    *fc = &hw->fc;
2153         struct ifnet            *ifp = adapter->ifp;
2154         device_t                dev = adapter->dev;
2155         struct tx_ring          *txr = adapter->tx_rings;
2156         u32                     link_check, thstat, ctrl;
2157         char                    *flowctl = NULL;
2158
2159         link_check = thstat = ctrl = 0;
2160
2161         /* Get the cached link value or read for real */
2162         switch (hw->phy.media_type) {
2163         case e1000_media_type_copper:
2164                 if (hw->mac.get_link_status) {
2165                         /* Do the work to read phy */
2166                         e1000_check_for_link(hw);
2167                         link_check = !hw->mac.get_link_status;
2168                 } else
2169                         link_check = TRUE;
2170                 break;
2171         case e1000_media_type_fiber:
2172                 e1000_check_for_link(hw);
2173                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2174                                  E1000_STATUS_LU);
2175                 break;
2176         case e1000_media_type_internal_serdes:
2177                 e1000_check_for_link(hw);
2178                 link_check = adapter->hw.mac.serdes_has_link;
2179                 break;
2180         /* VF device is type_unknown */
2181         case e1000_media_type_unknown:
2182                 e1000_check_for_link(hw);
2183                 link_check = !hw->mac.get_link_status;
2184                 /* Fall thru */
2185         default:
2186                 break;
2187         }
2188
2189         /* Check for thermal downshift or shutdown */
2190         if (hw->mac.type == e1000_i350) {
2191                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2192                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2193         }
2194
2195         /* Get the flow control for display */
2196         switch (fc->current_mode) {
2197         case e1000_fc_rx_pause:
2198                 flowctl = "RX";
2199                 break;  
2200         case e1000_fc_tx_pause:
2201                 flowctl = "TX";
2202                 break;  
2203         case e1000_fc_full:
2204                 flowctl = "Full";
2205                 break;  
2206         case e1000_fc_none:
2207         default:
2208                 flowctl = "None";
2209                 break;  
2210         }
2211
2212         /* Now we check if a transition has happened */
2213         if (link_check && (adapter->link_active == 0)) {
2214                 e1000_get_speed_and_duplex(&adapter->hw, 
2215                     &adapter->link_speed, &adapter->link_duplex);
2216                 if (bootverbose)
2217                         device_printf(dev, "Link is up %d Mbps %s,"
2218                             " Flow Control: %s\n",
2219                             adapter->link_speed,
2220                             ((adapter->link_duplex == FULL_DUPLEX) ?
2221                             "Full Duplex" : "Half Duplex"), flowctl);
2222                 adapter->link_active = 1;
2223                 ifp->if_baudrate = adapter->link_speed * 1000000;
2224                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2225                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2226                         device_printf(dev, "Link: thermal downshift\n");
2227                 /* Delay Link Up for Phy update */
2228                 if (((hw->mac.type == e1000_i210) ||
2229                     (hw->mac.type == e1000_i211)) &&
2230                     (hw->phy.id == I210_I_PHY_ID))
2231                         msec_delay(I210_LINK_DELAY);
2232                 /* Reset if the media type changed. */
2233                 if (hw->dev_spec._82575.media_changed) {
2234                         hw->dev_spec._82575.media_changed = false;
2235                         adapter->flags |= IGB_MEDIA_RESET;
2236                         igb_reset(adapter);
2237                 }       
2238                 /* This can sleep */
2239                 if_link_state_change(ifp, LINK_STATE_UP);
2240         } else if (!link_check && (adapter->link_active == 1)) {
2241                 ifp->if_baudrate = adapter->link_speed = 0;
2242                 adapter->link_duplex = 0;
2243                 if (bootverbose)
2244                         device_printf(dev, "Link is Down\n");
2245                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2246                     (thstat & E1000_THSTAT_PWR_DOWN))
2247                         device_printf(dev, "Link: thermal shutdown\n");
2248                 adapter->link_active = 0;
2249                 /* This can sleep */
2250                 if_link_state_change(ifp, LINK_STATE_DOWN);
2251                 /* Reset queue state */
2252                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2253                         txr->queue_status = IGB_QUEUE_IDLE;
2254         }
2255 }
2256
2257 /*********************************************************************
2258  *
2259  *  This routine disables all traffic on the adapter by issuing a
2260  *  global reset on the MAC and deallocates TX/RX buffers.
2261  *
2262  **********************************************************************/
2263
2264 static void
2265 igb_stop(void *arg)
2266 {
2267         struct adapter  *adapter = arg;
2268         struct ifnet    *ifp = adapter->ifp;
2269         struct tx_ring *txr = adapter->tx_rings;
2270
2271         IGB_CORE_LOCK_ASSERT(adapter);
2272
2273         INIT_DEBUGOUT("igb_stop: begin");
2274
2275         igb_disable_intr(adapter);
2276
2277         callout_stop(&adapter->timer);
2278
2279         /* Tell the stack that the interface is no longer active */
2280         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2281         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2282
2283         /* Disarm watchdog timer. */
2284         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2285                 IGB_TX_LOCK(txr);
2286                 txr->queue_status = IGB_QUEUE_IDLE;
2287                 IGB_TX_UNLOCK(txr);
2288         }
2289
2290         e1000_reset_hw(&adapter->hw);
2291         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2292
2293         e1000_led_off(&adapter->hw);
2294         e1000_cleanup_led(&adapter->hw);
2295 }
2296
2297
2298 /*********************************************************************
2299  *
2300  *  Determine hardware revision.
2301  *
2302  **********************************************************************/
2303 static void
2304 igb_identify_hardware(struct adapter *adapter)
2305 {
2306         device_t dev = adapter->dev;
2307
2308         /* Make sure our PCI config space has the necessary stuff set */
2309         pci_enable_busmaster(dev);
2310         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2311
2312         /* Save off the information about this board */
2313         adapter->hw.vendor_id = pci_get_vendor(dev);
2314         adapter->hw.device_id = pci_get_device(dev);
2315         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2316         adapter->hw.subsystem_vendor_id =
2317             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2318         adapter->hw.subsystem_device_id =
2319             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2320
2321         /* Set MAC type early for PCI setup */
2322         e1000_set_mac_type(&adapter->hw);
2323
2324         /* Are we a VF device? */
2325         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2326             (adapter->hw.mac.type == e1000_vfadapt_i350))
2327                 adapter->vf_ifp = 1;
2328         else
2329                 adapter->vf_ifp = 0;
2330 }
2331
2332 static int
2333 igb_allocate_pci_resources(struct adapter *adapter)
2334 {
2335         device_t        dev = adapter->dev;
2336         int             rid;
2337
2338         rid = PCIR_BAR(0);
2339         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2340             &rid, RF_ACTIVE);
2341         if (adapter->pci_mem == NULL) {
2342                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2343                 return (ENXIO);
2344         }
2345         adapter->osdep.mem_bus_space_tag =
2346             rman_get_bustag(adapter->pci_mem);
2347         adapter->osdep.mem_bus_space_handle =
2348             rman_get_bushandle(adapter->pci_mem);
2349         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2350
2351         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2352
2353         /* This will setup either MSI/X or MSI */
2354         adapter->msix = igb_setup_msix(adapter);
2355         adapter->hw.back = &adapter->osdep;
2356
2357         return (0);
2358 }
2359
2360 /*********************************************************************
2361  *
2362  *  Setup the Legacy or MSI Interrupt handler
2363  *
2364  **********************************************************************/
2365 static int
2366 igb_allocate_legacy(struct adapter *adapter)
2367 {
2368         device_t                dev = adapter->dev;
2369         struct igb_queue        *que = adapter->queues;
2370 #ifndef IGB_LEGACY_TX
2371         struct tx_ring          *txr = adapter->tx_rings;
2372 #endif
2373         int                     error, rid = 0;
2374
2375         /* Turn off all interrupts */
2376         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2377
2378         /* MSI RID is 1 */
2379         if (adapter->msix == 1)
2380                 rid = 1;
2381
2382         /* We allocate a single interrupt resource */
2383         adapter->res = bus_alloc_resource_any(dev,
2384             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2385         if (adapter->res == NULL) {
2386                 device_printf(dev, "Unable to allocate bus resource: "
2387                     "interrupt\n");
2388                 return (ENXIO);
2389         }
2390
2391 #ifndef IGB_LEGACY_TX
2392         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2393 #endif
2394
2395         /*
2396          * Try allocating a fast interrupt and the associated deferred
2397          * processing contexts.
2398          */
2399         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2400         /* Make tasklet for deferred link handling */
2401         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2402         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2403             taskqueue_thread_enqueue, &que->tq);
2404         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2405             device_get_nameunit(adapter->dev));
2406         if ((error = bus_setup_intr(dev, adapter->res,
2407             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2408             adapter, &adapter->tag)) != 0) {
2409                 device_printf(dev, "Failed to register fast interrupt "
2410                             "handler: %d\n", error);
2411                 taskqueue_free(que->tq);
2412                 que->tq = NULL;
2413                 return (error);
2414         }
2415
2416         return (0);
2417 }
2418
2419
2420 /*********************************************************************
2421  *
2422  *  Setup the MSIX Queue Interrupt handlers: 
2423  *
2424  **********************************************************************/
2425 static int
2426 igb_allocate_msix(struct adapter *adapter)
2427 {
2428         device_t                dev = adapter->dev;
2429         struct igb_queue        *que = adapter->queues;
2430         int                     error, rid, vector = 0;
2431         int                     cpu_id = 0;
2432 #ifdef  RSS
2433         cpuset_t cpu_mask;
2434 #endif
2435
2436         /* Be sure to start with all interrupts disabled */
2437         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2438         E1000_WRITE_FLUSH(&adapter->hw);
2439
2440 #ifdef  RSS
2441         /*
2442          * If we're doing RSS, the number of queues needs to
2443          * match the number of RSS buckets that are configured.
2444          *
2445          * + If there's more queues than RSS buckets, we'll end
2446          *   up with queues that get no traffic.
2447          *
2448          * + If there's more RSS buckets than queues, we'll end
2449          *   up having multiple RSS buckets map to the same queue,
2450          *   so there'll be some contention.
2451          */
2452         if (adapter->num_queues != rss_getnumbuckets()) {
2453                 device_printf(dev,
2454                     "%s: number of queues (%d) != number of RSS buckets (%d)"
2455                     "; performance will be impacted.\n",
2456                     __func__,
2457                     adapter->num_queues,
2458                     rss_getnumbuckets());
2459         }
2460 #endif
2461
2462         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2463                 rid = vector +1;
2464                 que->res = bus_alloc_resource_any(dev,
2465                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2466                 if (que->res == NULL) {
2467                         device_printf(dev,
2468                             "Unable to allocate bus resource: "
2469                             "MSIX Queue Interrupt\n");
2470                         return (ENXIO);
2471                 }
2472                 error = bus_setup_intr(dev, que->res,
2473                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2474                     igb_msix_que, que, &que->tag);
2475                 if (error) {
2476                         que->res = NULL;
2477                         device_printf(dev, "Failed to register Queue handler");
2478                         return (error);
2479                 }
2480 #if __FreeBSD_version >= 800504
2481                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2482 #endif
2483                 que->msix = vector;
2484                 if (adapter->hw.mac.type == e1000_82575)
2485                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2486                 else
2487                         que->eims = 1 << vector;
2488
2489 #ifdef  RSS
2490                 /*
2491                  * The queue ID is used as the RSS layer bucket ID.
2492                  * We look up the queue ID -> RSS CPU ID and select
2493                  * that.
2494                  */
2495                 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2496 #else
2497                 /*
2498                  * Bind the msix vector, and thus the
2499                  * rings to the corresponding cpu.
2500                  *
2501                  * This just happens to match the default RSS round-robin
2502                  * bucket -> queue -> CPU allocation.
2503                  */
2504                 if (adapter->num_queues > 1) {
2505                         if (igb_last_bind_cpu < 0)
2506                                 igb_last_bind_cpu = CPU_FIRST();
2507                         cpu_id = igb_last_bind_cpu;
2508                 }
2509 #endif
2510
2511                 if (adapter->num_queues > 1) {
2512                         bus_bind_intr(dev, que->res, cpu_id);
2513 #ifdef  RSS
2514                         device_printf(dev,
2515                                 "Bound queue %d to RSS bucket %d\n",
2516                                 i, cpu_id);
2517 #else
2518                         device_printf(dev,
2519                                 "Bound queue %d to cpu %d\n",
2520                                 i, cpu_id);
2521 #endif
2522                 }
2523
2524 #ifndef IGB_LEGACY_TX
2525                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2526                     que->txr);
2527 #endif
2528                 /* Make tasklet for deferred handling */
2529                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2530                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2531                     taskqueue_thread_enqueue, &que->tq);
2532                 if (adapter->num_queues > 1) {
2533                         /*
2534                          * Only pin the taskqueue thread to a CPU if
2535                          * RSS is in use.
2536                          *
2537                          * This again just happens to match the default RSS
2538                          * round-robin bucket -> queue -> CPU allocation.
2539                          */
2540 #ifdef  RSS
2541                         CPU_SETOF(cpu_id, &cpu_mask);
2542                         taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2543                             &cpu_mask,
2544                             "%s que (bucket %d)",
2545                             device_get_nameunit(adapter->dev),
2546                             cpu_id);
2547 #else
2548                         taskqueue_start_threads(&que->tq, 1, PI_NET,
2549                             "%s que (qid %d)",
2550                             device_get_nameunit(adapter->dev),
2551                             cpu_id);
2552 #endif
2553                 } else {
2554                         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2555                             device_get_nameunit(adapter->dev));
2556                 }
2557
2558                 /* Finally update the last bound CPU id */
2559                 if (adapter->num_queues > 1)
2560                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2561         }
2562
2563         /* And Link */
2564         rid = vector + 1;
2565         adapter->res = bus_alloc_resource_any(dev,
2566             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2567         if (adapter->res == NULL) {
2568                 device_printf(dev,
2569                     "Unable to allocate bus resource: "
2570                     "MSIX Link Interrupt\n");
2571                 return (ENXIO);
2572         }
2573         if ((error = bus_setup_intr(dev, adapter->res,
2574             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2575             igb_msix_link, adapter, &adapter->tag)) != 0) {
2576                 device_printf(dev, "Failed to register Link handler");
2577                 return (error);
2578         }
2579 #if __FreeBSD_version >= 800504
2580         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2581 #endif
2582         adapter->linkvec = vector;
2583
2584         return (0);
2585 }
2586
2587
2588 static void
2589 igb_configure_queues(struct adapter *adapter)
2590 {
2591         struct  e1000_hw        *hw = &adapter->hw;
2592         struct  igb_queue       *que;
2593         u32                     tmp, ivar = 0, newitr = 0;
2594
2595         /* First turn on RSS capability */
2596         if (adapter->hw.mac.type != e1000_82575)
2597                 E1000_WRITE_REG(hw, E1000_GPIE,
2598                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2599                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2600
2601         /* Turn on MSIX */
2602         switch (adapter->hw.mac.type) {
2603         case e1000_82580:
2604         case e1000_i350:
2605         case e1000_i354:
2606         case e1000_i210:
2607         case e1000_i211:
2608         case e1000_vfadapt:
2609         case e1000_vfadapt_i350:
2610                 /* RX entries */
2611                 for (int i = 0; i < adapter->num_queues; i++) {
2612                         u32 index = i >> 1;
2613                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2614                         que = &adapter->queues[i];
2615                         if (i & 1) {
2616                                 ivar &= 0xFF00FFFF;
2617                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2618                         } else {
2619                                 ivar &= 0xFFFFFF00;
2620                                 ivar |= que->msix | E1000_IVAR_VALID;
2621                         }
2622                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2623                 }
2624                 /* TX entries */
2625                 for (int i = 0; i < adapter->num_queues; i++) {
2626                         u32 index = i >> 1;
2627                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2628                         que = &adapter->queues[i];
2629                         if (i & 1) {
2630                                 ivar &= 0x00FFFFFF;
2631                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2632                         } else {
2633                                 ivar &= 0xFFFF00FF;
2634                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2635                         }
2636                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2637                         adapter->que_mask |= que->eims;
2638                 }
2639
2640                 /* And for the link interrupt */
2641                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2642                 adapter->link_mask = 1 << adapter->linkvec;
2643                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2644                 break;
2645         case e1000_82576:
2646                 /* RX entries */
2647                 for (int i = 0; i < adapter->num_queues; i++) {
2648                         u32 index = i & 0x7; /* Each IVAR has two entries */
2649                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2650                         que = &adapter->queues[i];
2651                         if (i < 8) {
2652                                 ivar &= 0xFFFFFF00;
2653                                 ivar |= que->msix | E1000_IVAR_VALID;
2654                         } else {
2655                                 ivar &= 0xFF00FFFF;
2656                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2657                         }
2658                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2659                         adapter->que_mask |= que->eims;
2660                 }
2661                 /* TX entries */
2662                 for (int i = 0; i < adapter->num_queues; i++) {
2663                         u32 index = i & 0x7; /* Each IVAR has two entries */
2664                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2665                         que = &adapter->queues[i];
2666                         if (i < 8) {
2667                                 ivar &= 0xFFFF00FF;
2668                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2669                         } else {
2670                                 ivar &= 0x00FFFFFF;
2671                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2672                         }
2673                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2674                         adapter->que_mask |= que->eims;
2675                 }
2676
2677                 /* And for the link interrupt */
2678                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2679                 adapter->link_mask = 1 << adapter->linkvec;
2680                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2681                 break;
2682
2683         case e1000_82575:
2684                 /* enable MSI-X support*/
2685                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2686                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2687                 /* Auto-Mask interrupts upon ICR read. */
2688                 tmp |= E1000_CTRL_EXT_EIAME;
2689                 tmp |= E1000_CTRL_EXT_IRCA;
2690                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2691
2692                 /* Queues */
2693                 for (int i = 0; i < adapter->num_queues; i++) {
2694                         que = &adapter->queues[i];
2695                         tmp = E1000_EICR_RX_QUEUE0 << i;
2696                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2697                         que->eims = tmp;
2698                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2699                             i, que->eims);
2700                         adapter->que_mask |= que->eims;
2701                 }
2702
2703                 /* Link */
2704                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2705                     E1000_EIMS_OTHER);
2706                 adapter->link_mask |= E1000_EIMS_OTHER;
2707         default:
2708                 break;
2709         }
2710
2711         /* Set the starting interrupt rate */
2712         if (igb_max_interrupt_rate > 0)
2713                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2714
2715         if (hw->mac.type == e1000_82575)
2716                 newitr |= newitr << 16;
2717         else
2718                 newitr |= E1000_EITR_CNT_IGNR;
2719
2720         for (int i = 0; i < adapter->num_queues; i++) {
2721                 que = &adapter->queues[i];
2722                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2723         }
2724
2725         return;
2726 }
2727
2728
2729 static void
2730 igb_free_pci_resources(struct adapter *adapter)
2731 {
2732         struct          igb_queue *que = adapter->queues;
2733         device_t        dev = adapter->dev;
2734         int             rid;
2735
2736         /*
2737         ** There is a slight possibility of a failure mode
2738         ** in attach that will result in entering this function
2739         ** before interrupt resources have been initialized, and
2740         ** in that case we do not want to execute the loops below
2741         ** We can detect this reliably by the state of the adapter
2742         ** res pointer.
2743         */
2744         if (adapter->res == NULL)
2745                 goto mem;
2746
2747         /*
2748          * First release all the interrupt resources:
2749          */
2750         for (int i = 0; i < adapter->num_queues; i++, que++) {
2751                 rid = que->msix + 1;
2752                 if (que->tag != NULL) {
2753                         bus_teardown_intr(dev, que->res, que->tag);
2754                         que->tag = NULL;
2755                 }
2756                 if (que->res != NULL)
2757                         bus_release_resource(dev,
2758                             SYS_RES_IRQ, rid, que->res);
2759         }
2760
2761         /* Clean the Legacy or Link interrupt last */
2762         if (adapter->linkvec) /* we are doing MSIX */
2763                 rid = adapter->linkvec + 1;
2764         else
2765                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2766
2767         que = adapter->queues;
2768         if (adapter->tag != NULL) {
2769                 taskqueue_drain(que->tq, &adapter->link_task);
2770                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2771                 adapter->tag = NULL;
2772         }
2773         if (adapter->res != NULL)
2774                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2775
2776         for (int i = 0; i < adapter->num_queues; i++, que++) {
2777                 if (que->tq != NULL) {
2778 #ifndef IGB_LEGACY_TX
2779                         taskqueue_drain(que->tq, &que->txr->txq_task);
2780 #endif
2781                         taskqueue_drain(que->tq, &que->que_task);
2782                         taskqueue_free(que->tq);
2783                 }
2784         }
2785 mem:
2786         if (adapter->msix)
2787                 pci_release_msi(dev);
2788
2789         if (adapter->msix_mem != NULL)
2790                 bus_release_resource(dev, SYS_RES_MEMORY,
2791                     adapter->memrid, adapter->msix_mem);
2792
2793         if (adapter->pci_mem != NULL)
2794                 bus_release_resource(dev, SYS_RES_MEMORY,
2795                     PCIR_BAR(0), adapter->pci_mem);
2796
2797 }
2798
2799 /*
2800  * Setup Either MSI/X or MSI
2801  */
2802 static int
2803 igb_setup_msix(struct adapter *adapter)
2804 {
2805         device_t        dev = adapter->dev;
2806         int             bar, want, queues, msgs, maxqueues;
2807
2808         /* tuneable override */
2809         if (igb_enable_msix == 0)
2810                 goto msi;
2811
2812         /* First try MSI/X */
2813         msgs = pci_msix_count(dev); 
2814         if (msgs == 0)
2815                 goto msi;
2816         /*
2817         ** Some new devices, as with ixgbe, now may
2818         ** use a different BAR, so we need to keep
2819         ** track of which is used.
2820         */
2821         adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2822         bar = pci_read_config(dev, adapter->memrid, 4);
2823         if (bar == 0) /* use next bar */
2824                 adapter->memrid += 4;
2825         adapter->msix_mem = bus_alloc_resource_any(dev,
2826             SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2827         if (adapter->msix_mem == NULL) {
2828                 /* May not be enabled */
2829                 device_printf(adapter->dev,
2830                     "Unable to map MSIX table \n");
2831                 goto msi;
2832         }
2833
2834         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2835
2836         /* Override via tuneable */
2837         if (igb_num_queues != 0)
2838                 queues = igb_num_queues;
2839
2840 #ifdef  RSS
2841         /* If we're doing RSS, clamp at the number of RSS buckets */
2842         if (queues > rss_getnumbuckets())
2843                 queues = rss_getnumbuckets();
2844 #endif
2845
2846
2847         /* Sanity check based on HW */
2848         switch (adapter->hw.mac.type) {
2849                 case e1000_82575:
2850                         maxqueues = 4;
2851                         break;
2852                 case e1000_82576:
2853                 case e1000_82580:
2854                 case e1000_i350:
2855                 case e1000_i354:
2856                         maxqueues = 8;
2857                         break;
2858                 case e1000_i210:
2859                         maxqueues = 4;
2860                         break;
2861                 case e1000_i211:
2862                         maxqueues = 2;
2863                         break;
2864                 default:  /* VF interfaces */
2865                         maxqueues = 1;
2866                         break;
2867         }
2868
2869         /* Final clamp on the actual hardware capability */
2870         if (queues > maxqueues)
2871                 queues = maxqueues;
2872
2873         /*
2874         ** One vector (RX/TX pair) per queue
2875         ** plus an additional for Link interrupt
2876         */
2877         want = queues + 1;
2878         if (msgs >= want)
2879                 msgs = want;
2880         else {
2881                 device_printf(adapter->dev,
2882                     "MSIX Configuration Problem, "
2883                     "%d vectors configured, but %d queues wanted!\n",
2884                     msgs, want);
2885                 goto msi;
2886         }
2887         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2888                 device_printf(adapter->dev,
2889                     "Using MSIX interrupts with %d vectors\n", msgs);
2890                 adapter->num_queues = queues;
2891                 return (msgs);
2892         }
2893         /*
2894         ** If MSIX alloc failed or provided us with
2895         ** less than needed, free and fall through to MSI
2896         */
2897         pci_release_msi(dev);
2898
2899 msi:
2900         if (adapter->msix_mem != NULL) {
2901                 bus_release_resource(dev, SYS_RES_MEMORY,
2902                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2903                 adapter->msix_mem = NULL;
2904         }
2905         msgs = 1;
2906         if (pci_alloc_msi(dev, &msgs) == 0) {
2907                 device_printf(adapter->dev," Using an MSI interrupt\n");
2908                 return (msgs);
2909         }
2910         device_printf(adapter->dev," Using a Legacy interrupt\n");
2911         return (0);
2912 }
2913
2914 /*********************************************************************
2915  *
2916  *  Initialize the DMA Coalescing feature
2917  *
2918  **********************************************************************/
2919 static void
2920 igb_init_dmac(struct adapter *adapter, u32 pba)
2921 {
2922         device_t        dev = adapter->dev;
2923         struct e1000_hw *hw = &adapter->hw;
2924         u32             dmac, reg = ~E1000_DMACR_DMAC_EN;
2925         u16             hwm;
2926
2927         if (hw->mac.type == e1000_i211)
2928                 return;
2929
2930         if (hw->mac.type > e1000_82580) {
2931
2932                 if (adapter->dmac == 0) { /* Disabling it */
2933                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2934                         return;
2935                 } else
2936                         device_printf(dev, "DMA Coalescing enabled\n");
2937
2938                 /* Set starting threshold */
2939                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2940
2941                 hwm = 64 * pba - adapter->max_frame_size / 16;
2942                 if (hwm < 64 * (pba - 6))
2943                         hwm = 64 * (pba - 6);
2944                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2945                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2946                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2947                     & E1000_FCRTC_RTH_COAL_MASK);
2948                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2949
2950
2951                 dmac = pba - adapter->max_frame_size / 512;
2952                 if (dmac < pba - 10)
2953                         dmac = pba - 10;
2954                 reg = E1000_READ_REG(hw, E1000_DMACR);
2955                 reg &= ~E1000_DMACR_DMACTHR_MASK;
2956                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2957                     & E1000_DMACR_DMACTHR_MASK);
2958
2959                 /* transition to L0x or L1 if available..*/
2960                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2961
2962                 /* Check if status is 2.5Gb backplane connection
2963                 * before configuration of watchdog timer, which is
2964                 * in msec values in 12.8usec intervals
2965                 * watchdog timer= msec values in 32usec intervals
2966                 * for non 2.5Gb connection
2967                 */
2968                 if (hw->mac.type == e1000_i354) {
2969                         int status = E1000_READ_REG(hw, E1000_STATUS);
2970                         if ((status & E1000_STATUS_2P5_SKU) &&
2971                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2972                                 reg |= ((adapter->dmac * 5) >> 6);
2973                         else
2974                                 reg |= (adapter->dmac >> 5);
2975                 } else {
2976                         reg |= (adapter->dmac >> 5);
2977                 }
2978
2979                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2980
2981                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2982
2983                 /* Set the interval before transition */
2984                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2985                 if (hw->mac.type == e1000_i350)
2986                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
2987                 /*
2988                 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2989                 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2990                 */
2991                 if (hw->mac.type == e1000_i354) {
2992                         int status = E1000_READ_REG(hw, E1000_STATUS);
2993                         if ((status & E1000_STATUS_2P5_SKU) &&
2994                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2995                                 reg |= 0xA;
2996                         else
2997                                 reg |= 0x4;
2998                 } else {
2999                         reg |= 0x4;
3000                 }
3001
3002                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3003
3004                 /* free space in tx packet buffer to wake from DMA coal */
3005                 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
3006                     (2 * adapter->max_frame_size)) >> 6);
3007
3008                 /* make low power state decision controlled by DMA coal */
3009                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3010                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3011                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3012
3013         } else if (hw->mac.type == e1000_82580) {
3014                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3015                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3016                     reg & ~E1000_PCIEMISC_LX_DECISION);
3017                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3018         }
3019 }
3020
3021
3022 /*********************************************************************
3023  *
3024  *  Set up an fresh starting state
3025  *
3026  **********************************************************************/
3027 static void
3028 igb_reset(struct adapter *adapter)
3029 {
3030         device_t        dev = adapter->dev;
3031         struct e1000_hw *hw = &adapter->hw;
3032         struct e1000_fc_info *fc = &hw->fc;
3033         struct ifnet    *ifp = adapter->ifp;
3034         u32             pba = 0;
3035         u16             hwm;
3036
3037         INIT_DEBUGOUT("igb_reset: begin");
3038
3039         /* Let the firmware know the OS is in control */
3040         igb_get_hw_control(adapter);
3041
3042         /*
3043          * Packet Buffer Allocation (PBA)
3044          * Writing PBA sets the receive portion of the buffer
3045          * the remainder is used for the transmit buffer.
3046          */
3047         switch (hw->mac.type) {
3048         case e1000_82575:
3049                 pba = E1000_PBA_32K;
3050                 break;
3051         case e1000_82576:
3052         case e1000_vfadapt:
3053                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3054                 pba &= E1000_RXPBS_SIZE_MASK_82576;
3055                 break;
3056         case e1000_82580:
3057         case e1000_i350:
3058         case e1000_i354:
3059         case e1000_vfadapt_i350:
3060                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3061                 pba = e1000_rxpbs_adjust_82580(pba);
3062                 break;
3063         case e1000_i210:
3064         case e1000_i211:
3065                 pba = E1000_PBA_34K;
3066         default:
3067                 break;
3068         }
3069
3070         /* Special needs in case of Jumbo frames */
3071         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3072                 u32 tx_space, min_tx, min_rx;
3073                 pba = E1000_READ_REG(hw, E1000_PBA);
3074                 tx_space = pba >> 16;
3075                 pba &= 0xffff;
3076                 min_tx = (adapter->max_frame_size +
3077                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3078                 min_tx = roundup2(min_tx, 1024);
3079                 min_tx >>= 10;
3080                 min_rx = adapter->max_frame_size;
3081                 min_rx = roundup2(min_rx, 1024);
3082                 min_rx >>= 10;
3083                 if (tx_space < min_tx &&
3084                     ((min_tx - tx_space) < pba)) {
3085                         pba = pba - (min_tx - tx_space);
3086                         /*
3087                          * if short on rx space, rx wins
3088                          * and must trump tx adjustment
3089                          */
3090                         if (pba < min_rx)
3091                                 pba = min_rx;
3092                 }
3093                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3094         }
3095
3096         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3097
3098         /*
3099          * These parameters control the automatic generation (Tx) and
3100          * response (Rx) to Ethernet PAUSE frames.
3101          * - High water mark should allow for at least two frames to be
3102          *   received after sending an XOFF.
3103          * - Low water mark works best when it is very near the high water mark.
3104          *   This allows the receiver to restart by sending XON when it has
3105          *   drained a bit.
3106          */
3107         hwm = min(((pba << 10) * 9 / 10),
3108             ((pba << 10) - 2 * adapter->max_frame_size));
3109
3110         if (hw->mac.type < e1000_82576) {
3111                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3112                 fc->low_water = fc->high_water - 8;
3113         } else {
3114                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3115                 fc->low_water = fc->high_water - 16;
3116         }
3117
3118         fc->pause_time = IGB_FC_PAUSE_TIME;
3119         fc->send_xon = TRUE;
3120         if (adapter->fc)
3121                 fc->requested_mode = adapter->fc;
3122         else
3123                 fc->requested_mode = e1000_fc_default;
3124
3125         /* Issue a global reset */
3126         e1000_reset_hw(hw);
3127         E1000_WRITE_REG(hw, E1000_WUC, 0);
3128
3129         /* Reset for AutoMediaDetect */
3130         if (adapter->flags & IGB_MEDIA_RESET) {
3131                 e1000_setup_init_funcs(hw, TRUE);
3132                 e1000_get_bus_info(hw);
3133                 adapter->flags &= ~IGB_MEDIA_RESET;
3134         }
3135
3136         if (e1000_init_hw(hw) < 0)
3137                 device_printf(dev, "Hardware Initialization Failed\n");
3138
3139         /* Setup DMA Coalescing */
3140         igb_init_dmac(adapter, pba);
3141
3142         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3143         e1000_get_phy_info(hw);
3144         e1000_check_for_link(hw);
3145         return;
3146 }
3147
3148 /*********************************************************************
3149  *
3150  *  Setup networking device structure and register an interface.
3151  *
3152  **********************************************************************/
3153 static int
3154 igb_setup_interface(device_t dev, struct adapter *adapter)
3155 {
3156         struct ifnet   *ifp;
3157
3158         INIT_DEBUGOUT("igb_setup_interface: begin");
3159
3160         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3161         if (ifp == NULL) {
3162                 device_printf(dev, "can not allocate ifnet structure\n");
3163                 return (-1);
3164         }
3165         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3166         ifp->if_init =  igb_init;
3167         ifp->if_softc = adapter;
3168         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3169         ifp->if_ioctl = igb_ioctl;
3170         ifp->if_get_counter = igb_get_counter;
3171
3172         /* TSO parameters */
3173         ifp->if_hw_tsomax = IP_MAXPACKET;
3174         ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3175         ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3176
3177 #ifndef IGB_LEGACY_TX
3178         ifp->if_transmit = igb_mq_start;
3179         ifp->if_qflush = igb_qflush;
3180 #else
3181         ifp->if_start = igb_start;
3182         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3183         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3184         IFQ_SET_READY(&ifp->if_snd);
3185 #endif
3186
3187         ether_ifattach(ifp, adapter->hw.mac.addr);
3188
3189         ifp->if_capabilities = ifp->if_capenable = 0;
3190
3191         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3192 #if __FreeBSD_version >= 1000000
3193         ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
3194 #endif
3195         ifp->if_capabilities |= IFCAP_TSO;
3196         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3197         ifp->if_capenable = ifp->if_capabilities;
3198
3199         /* Don't enable LRO by default */
3200         ifp->if_capabilities |= IFCAP_LRO;
3201
3202 #ifdef DEVICE_POLLING
3203         ifp->if_capabilities |= IFCAP_POLLING;
3204 #endif
3205
3206         /*
3207          * Tell the upper layer(s) we
3208          * support full VLAN capability.
3209          */
3210         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3211         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3212                              |  IFCAP_VLAN_HWTSO
3213                              |  IFCAP_VLAN_MTU;
3214         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3215                           |  IFCAP_VLAN_HWTSO
3216                           |  IFCAP_VLAN_MTU;
3217
3218         /*
3219         ** Don't turn this on by default, if vlans are
3220         ** created on another pseudo device (eg. lagg)
3221         ** then vlan events are not passed thru, breaking
3222         ** operation, but with HW FILTER off it works. If
3223         ** using vlans directly on the igb driver you can
3224         ** enable this and get full hardware tag filtering.
3225         */
3226         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3227
3228         /*
3229          * Specify the media types supported by this adapter and register
3230          * callbacks to update media and link information
3231          */
3232         ifmedia_init(&adapter->media, IFM_IMASK,
3233             igb_media_change, igb_media_status);
3234         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3235             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3236                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3237                             0, NULL);
3238                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3239         } else {
3240                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3241                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3242                             0, NULL);
3243                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3244                             0, NULL);
3245                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3246                             0, NULL);
3247                 if (adapter->hw.phy.type != e1000_phy_ife) {
3248                         ifmedia_add(&adapter->media,
3249                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3250                         ifmedia_add(&adapter->media,
3251                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3252                 }
3253         }
3254         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3255         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3256         return (0);
3257 }
3258
3259
3260 /*
3261  * Manage DMA'able memory.
3262  */
3263 static void
3264 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3265 {
3266         if (error)
3267                 return;
3268         *(bus_addr_t *) arg = segs[0].ds_addr;
3269 }
3270
3271 static int
3272 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3273         struct igb_dma_alloc *dma, int mapflags)
3274 {
3275         int error;
3276
3277         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3278                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3279                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3280                                 BUS_SPACE_MAXADDR,      /* highaddr */
3281                                 NULL, NULL,             /* filter, filterarg */
3282                                 size,                   /* maxsize */
3283                                 1,                      /* nsegments */
3284                                 size,                   /* maxsegsize */
3285                                 0,                      /* flags */
3286                                 NULL,                   /* lockfunc */
3287                                 NULL,                   /* lockarg */
3288                                 &dma->dma_tag);
3289         if (error) {
3290                 device_printf(adapter->dev,
3291                     "%s: bus_dma_tag_create failed: %d\n",
3292                     __func__, error);
3293                 goto fail_0;
3294         }
3295
3296         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3297             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3298         if (error) {
3299                 device_printf(adapter->dev,
3300                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3301                     __func__, (uintmax_t)size, error);
3302                 goto fail_2;
3303         }
3304
3305         dma->dma_paddr = 0;
3306         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3307             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3308         if (error || dma->dma_paddr == 0) {
3309                 device_printf(adapter->dev,
3310                     "%s: bus_dmamap_load failed: %d\n",
3311                     __func__, error);
3312                 goto fail_3;
3313         }
3314
3315         return (0);
3316
3317 fail_3:
3318         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3319 fail_2:
3320         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3321         bus_dma_tag_destroy(dma->dma_tag);
3322 fail_0:
3323         dma->dma_tag = NULL;
3324
3325         return (error);
3326 }
3327
3328 static void
3329 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3330 {
3331         if (dma->dma_tag == NULL)
3332                 return;
3333         if (dma->dma_paddr != 0) {
3334                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3335                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3336                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3337                 dma->dma_paddr = 0;
3338         }
3339         if (dma->dma_vaddr != NULL) {
3340                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3341                 dma->dma_vaddr = NULL;
3342         }
3343         bus_dma_tag_destroy(dma->dma_tag);
3344         dma->dma_tag = NULL;
3345 }
3346
3347
3348 /*********************************************************************
3349  *
3350  *  Allocate memory for the transmit and receive rings, and then
3351  *  the descriptors associated with each, called only once at attach.
3352  *
3353  **********************************************************************/
3354 static int
3355 igb_allocate_queues(struct adapter *adapter)
3356 {
3357         device_t dev = adapter->dev;
3358         struct igb_queue        *que = NULL;
3359         struct tx_ring          *txr = NULL;
3360         struct rx_ring          *rxr = NULL;
3361         int rsize, tsize, error = E1000_SUCCESS;
3362         int txconf = 0, rxconf = 0;
3363
3364         /* First allocate the top level queue structs */
3365         if (!(adapter->queues =
3366             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3367             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3368                 device_printf(dev, "Unable to allocate queue memory\n");
3369                 error = ENOMEM;
3370                 goto fail;
3371         }
3372
3373         /* Next allocate the TX ring struct memory */
3374         if (!(adapter->tx_rings =
3375             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3376             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3377                 device_printf(dev, "Unable to allocate TX ring memory\n");
3378                 error = ENOMEM;
3379                 goto tx_fail;
3380         }
3381
3382         /* Now allocate the RX */
3383         if (!(adapter->rx_rings =
3384             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3385             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3386                 device_printf(dev, "Unable to allocate RX ring memory\n");
3387                 error = ENOMEM;
3388                 goto rx_fail;
3389         }
3390
3391         tsize = roundup2(adapter->num_tx_desc *
3392             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3393         /*
3394          * Now set up the TX queues, txconf is needed to handle the
3395          * possibility that things fail midcourse and we need to
3396          * undo memory gracefully
3397          */ 
3398         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3399                 /* Set up some basics */
3400                 txr = &adapter->tx_rings[i];
3401                 txr->adapter = adapter;
3402                 txr->me = i;
3403                 txr->num_desc = adapter->num_tx_desc;
3404
3405                 /* Initialize the TX lock */
3406                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3407                     device_get_nameunit(dev), txr->me);
3408                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3409
3410                 if (igb_dma_malloc(adapter, tsize,
3411                         &txr->txdma, BUS_DMA_NOWAIT)) {
3412                         device_printf(dev,
3413                             "Unable to allocate TX Descriptor memory\n");
3414                         error = ENOMEM;
3415                         goto err_tx_desc;
3416                 }
3417                 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3418                 bzero((void *)txr->tx_base, tsize);
3419
3420                 /* Now allocate transmit buffers for the ring */
3421                 if (igb_allocate_transmit_buffers(txr)) {
3422                         device_printf(dev,
3423                             "Critical Failure setting up transmit buffers\n");
3424                         error = ENOMEM;
3425                         goto err_tx_desc;
3426                 }
3427 #ifndef IGB_LEGACY_TX
3428                 /* Allocate a buf ring */
3429                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3430                     M_WAITOK, &txr->tx_mtx);
3431 #endif
3432         }
3433
3434         /*
3435          * Next the RX queues...
3436          */ 
3437         rsize = roundup2(adapter->num_rx_desc *
3438             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3439         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3440                 rxr = &adapter->rx_rings[i];
3441                 rxr->adapter = adapter;
3442                 rxr->me = i;
3443
3444                 /* Initialize the RX lock */
3445                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3446                     device_get_nameunit(dev), txr->me);
3447                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3448
3449                 if (igb_dma_malloc(adapter, rsize,
3450                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3451                         device_printf(dev,
3452                             "Unable to allocate RxDescriptor memory\n");
3453                         error = ENOMEM;
3454                         goto err_rx_desc;
3455                 }
3456                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3457                 bzero((void *)rxr->rx_base, rsize);
3458
3459                 /* Allocate receive buffers for the ring*/
3460                 if (igb_allocate_receive_buffers(rxr)) {
3461                         device_printf(dev,
3462                             "Critical Failure setting up receive buffers\n");
3463                         error = ENOMEM;
3464                         goto err_rx_desc;
3465                 }
3466         }
3467
3468         /*
3469         ** Finally set up the queue holding structs
3470         */
3471         for (int i = 0; i < adapter->num_queues; i++) {
3472                 que = &adapter->queues[i];
3473                 que->adapter = adapter;
3474                 que->txr = &adapter->tx_rings[i];
3475                 que->rxr = &adapter->rx_rings[i];
3476         }
3477
3478         return (0);
3479
3480 err_rx_desc:
3481         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3482                 igb_dma_free(adapter, &rxr->rxdma);
3483 err_tx_desc:
3484         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3485                 igb_dma_free(adapter, &txr->txdma);
3486         free(adapter->rx_rings, M_DEVBUF);
3487 rx_fail:
3488 #ifndef IGB_LEGACY_TX
3489         buf_ring_free(txr->br, M_DEVBUF);
3490 #endif
3491         free(adapter->tx_rings, M_DEVBUF);
3492 tx_fail:
3493         free(adapter->queues, M_DEVBUF);
3494 fail:
3495         return (error);
3496 }
3497
3498 /*********************************************************************
3499  *
3500  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3501  *  the information needed to transmit a packet on the wire. This is
3502  *  called only once at attach, setup is done every reset.
3503  *
3504  **********************************************************************/
3505 static int
3506 igb_allocate_transmit_buffers(struct tx_ring *txr)
3507 {
3508         struct adapter *adapter = txr->adapter;
3509         device_t dev = adapter->dev;
3510         struct igb_tx_buf *txbuf;
3511         int error, i;
3512
3513         /*
3514          * Setup DMA descriptor areas.
3515          */
3516         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3517                                1, 0,                    /* alignment, bounds */
3518                                BUS_SPACE_MAXADDR,       /* lowaddr */
3519                                BUS_SPACE_MAXADDR,       /* highaddr */
3520                                NULL, NULL,              /* filter, filterarg */
3521                                IGB_TSO_SIZE,            /* maxsize */
3522                                IGB_MAX_SCATTER,         /* nsegments */
3523                                PAGE_SIZE,               /* maxsegsize */
3524                                0,                       /* flags */
3525                                NULL,                    /* lockfunc */
3526                                NULL,                    /* lockfuncarg */
3527                                &txr->txtag))) {
3528                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3529                 goto fail;
3530         }
3531
3532         if (!(txr->tx_buffers =
3533             (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3534             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3535                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3536                 error = ENOMEM;
3537                 goto fail;
3538         }
3539
3540         /* Create the descriptor buffer dma maps */
3541         txbuf = txr->tx_buffers;
3542         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3543                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3544                 if (error != 0) {
3545                         device_printf(dev, "Unable to create TX DMA map\n");
3546                         goto fail;
3547                 }
3548         }
3549
3550         return 0;
3551 fail:
3552         /* We free all, it handles case where we are in the middle */
3553         igb_free_transmit_structures(adapter);
3554         return (error);
3555 }
3556
3557 /*********************************************************************
3558  *
3559  *  Initialize a transmit ring.
3560  *
3561  **********************************************************************/
3562 static void
3563 igb_setup_transmit_ring(struct tx_ring *txr)
3564 {
3565         struct adapter *adapter = txr->adapter;
3566         struct igb_tx_buf *txbuf;
3567         int i;
3568 #ifdef DEV_NETMAP
3569         struct netmap_adapter *na = NA(adapter->ifp);
3570         struct netmap_slot *slot;
3571 #endif /* DEV_NETMAP */
3572
3573         /* Clear the old descriptor contents */
3574         IGB_TX_LOCK(txr);
3575 #ifdef DEV_NETMAP
3576         slot = netmap_reset(na, NR_TX, txr->me, 0);
3577 #endif /* DEV_NETMAP */
3578         bzero((void *)txr->tx_base,
3579               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3580         /* Reset indices */
3581         txr->next_avail_desc = 0;
3582         txr->next_to_clean = 0;
3583
3584         /* Free any existing tx buffers. */
3585         txbuf = txr->tx_buffers;
3586         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3587                 if (txbuf->m_head != NULL) {
3588                         bus_dmamap_sync(txr->txtag, txbuf->map,
3589                             BUS_DMASYNC_POSTWRITE);
3590                         bus_dmamap_unload(txr->txtag, txbuf->map);
3591                         m_freem(txbuf->m_head);
3592                         txbuf->m_head = NULL;
3593                 }
3594 #ifdef DEV_NETMAP
3595                 if (slot) {
3596                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3597                         /* no need to set the address */
3598                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3599                 }
3600 #endif /* DEV_NETMAP */
3601                 /* clear the watch index */
3602                 txbuf->eop = NULL;
3603         }
3604
3605         /* Set number of descriptors available */
3606         txr->tx_avail = adapter->num_tx_desc;
3607
3608         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3609             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3610         IGB_TX_UNLOCK(txr);
3611 }
3612
3613 /*********************************************************************
3614  *
3615  *  Initialize all transmit rings.
3616  *
3617  **********************************************************************/
3618 static void
3619 igb_setup_transmit_structures(struct adapter *adapter)
3620 {
3621         struct tx_ring *txr = adapter->tx_rings;
3622
3623         for (int i = 0; i < adapter->num_queues; i++, txr++)
3624                 igb_setup_transmit_ring(txr);
3625
3626         return;
3627 }
3628
3629 /*********************************************************************
3630  *
3631  *  Enable transmit unit.
3632  *
3633  **********************************************************************/
3634 static void
3635 igb_initialize_transmit_units(struct adapter *adapter)
3636 {
3637         struct tx_ring  *txr = adapter->tx_rings;
3638         struct e1000_hw *hw = &adapter->hw;
3639         u32             tctl, txdctl;
3640
3641         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3642         tctl = txdctl = 0;
3643
3644         /* Setup the Tx Descriptor Rings */
3645         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3646                 u64 bus_addr = txr->txdma.dma_paddr;
3647
3648                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3649                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3650                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3651                     (uint32_t)(bus_addr >> 32));
3652                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3653                     (uint32_t)bus_addr);
3654
3655                 /* Setup the HW Tx Head and Tail descriptor pointers */
3656                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3657                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3658
3659                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3660                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3661                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3662
3663                 txr->queue_status = IGB_QUEUE_IDLE;
3664
3665                 txdctl |= IGB_TX_PTHRESH;
3666                 txdctl |= IGB_TX_HTHRESH << 8;
3667                 txdctl |= IGB_TX_WTHRESH << 16;
3668                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3669                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3670         }
3671
3672         if (adapter->vf_ifp)
3673                 return;
3674
3675         e1000_config_collision_dist(hw);
3676
3677         /* Program the Transmit Control Register */
3678         tctl = E1000_READ_REG(hw, E1000_TCTL);
3679         tctl &= ~E1000_TCTL_CT;
3680         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3681                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3682
3683         /* This write will effectively turn on the transmit unit. */
3684         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3685 }
3686
3687 /*********************************************************************
3688  *
3689  *  Free all transmit rings.
3690  *
3691  **********************************************************************/
3692 static void
3693 igb_free_transmit_structures(struct adapter *adapter)
3694 {
3695         struct tx_ring *txr = adapter->tx_rings;
3696
3697         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3698                 IGB_TX_LOCK(txr);
3699                 igb_free_transmit_buffers(txr);
3700                 igb_dma_free(adapter, &txr->txdma);
3701                 IGB_TX_UNLOCK(txr);
3702                 IGB_TX_LOCK_DESTROY(txr);
3703         }
3704         free(adapter->tx_rings, M_DEVBUF);
3705 }
3706
3707 /*********************************************************************
3708  *
3709  *  Free transmit ring related data structures.
3710  *
3711  **********************************************************************/
3712 static void
3713 igb_free_transmit_buffers(struct tx_ring *txr)
3714 {
3715         struct adapter *adapter = txr->adapter;
3716         struct igb_tx_buf *tx_buffer;
3717         int             i;
3718
3719         INIT_DEBUGOUT("free_transmit_ring: begin");
3720
3721         if (txr->tx_buffers == NULL)
3722                 return;
3723
3724         tx_buffer = txr->tx_buffers;
3725         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3726                 if (tx_buffer->m_head != NULL) {
3727                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3728                             BUS_DMASYNC_POSTWRITE);
3729                         bus_dmamap_unload(txr->txtag,
3730                             tx_buffer->map);
3731                         m_freem(tx_buffer->m_head);
3732                         tx_buffer->m_head = NULL;
3733                         if (tx_buffer->map != NULL) {
3734                                 bus_dmamap_destroy(txr->txtag,
3735                                     tx_buffer->map);
3736                                 tx_buffer->map = NULL;
3737                         }
3738                 } else if (tx_buffer->map != NULL) {
3739                         bus_dmamap_unload(txr->txtag,
3740                             tx_buffer->map);
3741                         bus_dmamap_destroy(txr->txtag,
3742                             tx_buffer->map);
3743                         tx_buffer->map = NULL;
3744                 }
3745         }
3746 #ifndef IGB_LEGACY_TX
3747         if (txr->br != NULL)
3748                 buf_ring_free(txr->br, M_DEVBUF);
3749 #endif
3750         if (txr->tx_buffers != NULL) {
3751                 free(txr->tx_buffers, M_DEVBUF);
3752                 txr->tx_buffers = NULL;
3753         }
3754         if (txr->txtag != NULL) {
3755                 bus_dma_tag_destroy(txr->txtag);
3756                 txr->txtag = NULL;
3757         }
3758         return;
3759 }
3760
3761 /**********************************************************************
3762  *
3763  *  Setup work for hardware segmentation offload (TSO) on
3764  *  adapters using advanced tx descriptors
3765  *
3766  **********************************************************************/
3767 static int
3768 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3769     u32 *cmd_type_len, u32 *olinfo_status)
3770 {
3771         struct adapter *adapter = txr->adapter;
3772         struct e1000_adv_tx_context_desc *TXD;
3773         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3774         u32 mss_l4len_idx = 0, paylen;
3775         u16 vtag = 0, eh_type;
3776         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3777         struct ether_vlan_header *eh;
3778 #ifdef INET6
3779         struct ip6_hdr *ip6;
3780 #endif
3781 #ifdef INET
3782         struct ip *ip;
3783 #endif
3784         struct tcphdr *th;
3785
3786
3787         /*
3788          * Determine where frame payload starts.
3789          * Jump over vlan headers if already present
3790          */
3791         eh = mtod(mp, struct ether_vlan_header *);
3792         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3793                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3794                 eh_type = eh->evl_proto;
3795         } else {
3796                 ehdrlen = ETHER_HDR_LEN;
3797                 eh_type = eh->evl_encap_proto;
3798         }
3799
3800         switch (ntohs(eh_type)) {
3801 #ifdef INET6
3802         case ETHERTYPE_IPV6:
3803                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3804                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3805                 if (ip6->ip6_nxt != IPPROTO_TCP)
3806                         return (ENXIO);
3807                 ip_hlen = sizeof(struct ip6_hdr);
3808                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3809                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3810                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3811                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3812                 break;
3813 #endif
3814 #ifdef INET
3815         case ETHERTYPE_IP:
3816                 ip = (struct ip *)(mp->m_data + ehdrlen);
3817                 if (ip->ip_p != IPPROTO_TCP)
3818                         return (ENXIO);
3819                 ip->ip_sum = 0;
3820                 ip_hlen = ip->ip_hl << 2;
3821                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3822                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3823                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3824                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3825                 /* Tell transmit desc to also do IPv4 checksum. */
3826                 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3827                 break;
3828 #endif
3829         default:
3830                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3831                     __func__, ntohs(eh_type));
3832                 break;
3833         }
3834
3835         ctxd = txr->next_avail_desc;
3836         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3837
3838         tcp_hlen = th->th_off << 2;
3839
3840         /* This is used in the transmit desc in encap */
3841         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3842
3843         /* VLAN MACLEN IPLEN */
3844         if (mp->m_flags & M_VLANTAG) {
3845                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3846                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3847         }
3848
3849         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3850         vlan_macip_lens |= ip_hlen;
3851         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3852
3853         /* ADV DTYPE TUCMD */
3854         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3855         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3856         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3857
3858         /* MSS L4LEN IDX */
3859         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3860         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3861         /* 82575 needs the queue index added */
3862         if (adapter->hw.mac.type == e1000_82575)
3863                 mss_l4len_idx |= txr->me << 4;
3864         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3865
3866         TXD->seqnum_seed = htole32(0);
3867
3868         if (++ctxd == txr->num_desc)
3869                 ctxd = 0;
3870
3871         txr->tx_avail--;
3872         txr->next_avail_desc = ctxd;
3873         *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3874         *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3875         *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3876         ++txr->tso_tx;
3877         return (0);
3878 }
3879
3880 /*********************************************************************
3881  *
3882  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3883  *
3884  **********************************************************************/
3885
3886 static int
3887 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3888     u32 *cmd_type_len, u32 *olinfo_status)
3889 {
3890         struct e1000_adv_tx_context_desc *TXD;
3891         struct adapter *adapter = txr->adapter;
3892         struct ether_vlan_header *eh;
3893         struct ip *ip;
3894         struct ip6_hdr *ip6;
3895         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3896         int     ehdrlen, ip_hlen = 0;
3897         u16     etype;
3898         u8      ipproto = 0;
3899         int     offload = TRUE;
3900         int     ctxd = txr->next_avail_desc;
3901         u16     vtag = 0;
3902
3903         /* First check if TSO is to be used */
3904         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3905                 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3906
3907         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3908                 offload = FALSE;
3909
3910         /* Indicate the whole packet as payload when not doing TSO */
3911         *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3912
3913         /* Now ready a context descriptor */
3914         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3915
3916         /*
3917         ** In advanced descriptors the vlan tag must 
3918         ** be placed into the context descriptor. Hence
3919         ** we need to make one even if not doing offloads.
3920         */
3921         if (mp->m_flags & M_VLANTAG) {
3922                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3923                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3924         } else if (offload == FALSE) /* ... no offload to do */
3925                 return (0);
3926
3927         /*
3928          * Determine where frame payload starts.
3929          * Jump over vlan headers if already present,
3930          * helpful for QinQ too.
3931          */
3932         eh = mtod(mp, struct ether_vlan_header *);
3933         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3934                 etype = ntohs(eh->evl_proto);
3935                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3936         } else {
3937                 etype = ntohs(eh->evl_encap_proto);
3938                 ehdrlen = ETHER_HDR_LEN;
3939         }
3940
3941         /* Set the ether header length */
3942         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3943
3944         switch (etype) {
3945                 case ETHERTYPE_IP:
3946                         ip = (struct ip *)(mp->m_data + ehdrlen);
3947                         ip_hlen = ip->ip_hl << 2;
3948                         ipproto = ip->ip_p;
3949                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3950                         break;
3951                 case ETHERTYPE_IPV6:
3952                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3953                         ip_hlen = sizeof(struct ip6_hdr);
3954                         /* XXX-BZ this will go badly in case of ext hdrs. */
3955                         ipproto = ip6->ip6_nxt;
3956                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3957                         break;
3958                 default:
3959                         offload = FALSE;
3960                         break;
3961         }
3962
3963         vlan_macip_lens |= ip_hlen;
3964         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3965
3966         switch (ipproto) {
3967                 case IPPROTO_TCP:
3968 #if __FreeBSD_version >= 1000000
3969                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
3970 #else
3971                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3972 #endif
3973                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3974                         break;
3975                 case IPPROTO_UDP:
3976 #if __FreeBSD_version >= 1000000
3977                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
3978 #else
3979                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3980 #endif
3981                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3982                         break;
3983
3984 #if __FreeBSD_version >= 800000
3985                 case IPPROTO_SCTP:
3986 #if __FreeBSD_version >= 1000000
3987                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
3988 #else
3989                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3990 #endif
3991                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3992                         break;
3993 #endif
3994                 default:
3995                         offload = FALSE;
3996                         break;
3997         }
3998
3999         if (offload) /* For the TX descriptor setup */
4000                 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4001
4002         /* 82575 needs the queue index added */
4003         if (adapter->hw.mac.type == e1000_82575)
4004                 mss_l4len_idx = txr->me << 4;
4005
4006         /* Now copy bits into descriptor */
4007         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
4008         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
4009         TXD->seqnum_seed = htole32(0);
4010         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
4011
4012         /* We've consumed the first desc, adjust counters */
4013         if (++ctxd == txr->num_desc)
4014                 ctxd = 0;
4015         txr->next_avail_desc = ctxd;
4016         --txr->tx_avail;
4017
4018         return (0);
4019 }
4020
4021 /**********************************************************************
4022  *
4023  *  Examine each tx_buffer in the used queue. If the hardware is done
4024  *  processing the packet then free associated resources. The
4025  *  tx_buffer is put back on the free queue.
4026  *
4027  *  TRUE return means there's work in the ring to clean, FALSE its empty.
4028  **********************************************************************/
4029 static bool
4030 igb_txeof(struct tx_ring *txr)
4031 {
4032         struct adapter          *adapter = txr->adapter;
4033 #ifdef DEV_NETMAP
4034         struct ifnet            *ifp = adapter->ifp;
4035 #endif /* DEV_NETMAP */
4036         u32                     work, processed = 0;
4037         int                     limit = adapter->tx_process_limit;
4038         struct igb_tx_buf       *buf;
4039         union e1000_adv_tx_desc *txd;
4040
4041         mtx_assert(&txr->tx_mtx, MA_OWNED);
4042
4043 #ifdef DEV_NETMAP
4044         if (netmap_tx_irq(ifp, txr->me))
4045                 return (FALSE);
4046 #endif /* DEV_NETMAP */
4047
4048         if (txr->tx_avail == txr->num_desc) {
4049                 txr->queue_status = IGB_QUEUE_IDLE;
4050                 return FALSE;
4051         }
4052
4053         /* Get work starting point */
4054         work = txr->next_to_clean;
4055         buf = &txr->tx_buffers[work];
4056         txd = &txr->tx_base[work];
4057         work -= txr->num_desc; /* The distance to ring end */
4058         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4059             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4060         do {
4061                 union e1000_adv_tx_desc *eop = buf->eop;
4062                 if (eop == NULL) /* No work */
4063                         break;
4064
4065                 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4066                         break;  /* I/O not complete */
4067
4068                 if (buf->m_head) {
4069                         txr->bytes +=
4070                             buf->m_head->m_pkthdr.len;
4071                         bus_dmamap_sync(txr->txtag,
4072                             buf->map,
4073                             BUS_DMASYNC_POSTWRITE);
4074                         bus_dmamap_unload(txr->txtag,
4075                             buf->map);
4076                         m_freem(buf->m_head);
4077                         buf->m_head = NULL;
4078                 }
4079                 buf->eop = NULL;
4080                 ++txr->tx_avail;
4081
4082                 /* We clean the range if multi segment */
4083                 while (txd != eop) {
4084                         ++txd;
4085                         ++buf;
4086                         ++work;
4087                         /* wrap the ring? */
4088                         if (__predict_false(!work)) {
4089                                 work -= txr->num_desc;
4090                                 buf = txr->tx_buffers;
4091                                 txd = txr->tx_base;
4092                         }
4093                         if (buf->m_head) {
4094                                 txr->bytes +=
4095                                     buf->m_head->m_pkthdr.len;
4096                                 bus_dmamap_sync(txr->txtag,
4097                                     buf->map,
4098                                     BUS_DMASYNC_POSTWRITE);
4099                                 bus_dmamap_unload(txr->txtag,
4100                                     buf->map);
4101                                 m_freem(buf->m_head);
4102                                 buf->m_head = NULL;
4103                         }
4104                         ++txr->tx_avail;
4105                         buf->eop = NULL;
4106
4107                 }
4108                 ++txr->packets;
4109                 ++processed;
4110                 txr->watchdog_time = ticks;
4111
4112                 /* Try the next packet */
4113                 ++txd;
4114                 ++buf;
4115                 ++work;
4116                 /* reset with a wrap */
4117                 if (__predict_false(!work)) {
4118                         work -= txr->num_desc;
4119                         buf = txr->tx_buffers;
4120                         txd = txr->tx_base;
4121                 }
4122                 prefetch(txd);
4123         } while (__predict_true(--limit));
4124
4125         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4126             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4127
4128         work += txr->num_desc;
4129         txr->next_to_clean = work;
4130
4131         /*
4132         ** Watchdog calculation, we know there's
4133         ** work outstanding or the first return
4134         ** would have been taken, so none processed
4135         ** for too long indicates a hang.
4136         */
4137         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4138                 txr->queue_status |= IGB_QUEUE_HUNG;
4139
4140         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4141                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;       
4142
4143         if (txr->tx_avail == txr->num_desc) {
4144                 txr->queue_status = IGB_QUEUE_IDLE;
4145                 return (FALSE);
4146         }
4147
4148         return (TRUE);
4149 }
4150
4151 /*********************************************************************
4152  *
4153  *  Refresh mbuf buffers for RX descriptor rings
4154  *   - now keeps its own state so discards due to resource
4155  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4156  *     it just returns, keeping its placeholder, thus it can simply
4157  *     be recalled to try again.
4158  *
4159  **********************************************************************/
4160 static void
4161 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4162 {
4163         struct adapter          *adapter = rxr->adapter;
4164         bus_dma_segment_t       hseg[1];
4165         bus_dma_segment_t       pseg[1];
4166         struct igb_rx_buf       *rxbuf;
4167         struct mbuf             *mh, *mp;
4168         int                     i, j, nsegs, error;
4169         bool                    refreshed = FALSE;
4170
4171         i = j = rxr->next_to_refresh;
4172         /*
4173         ** Get one descriptor beyond
4174         ** our work mark to control
4175         ** the loop.
4176         */
4177         if (++j == adapter->num_rx_desc)
4178                 j = 0;
4179
4180         while (j != limit) {
4181                 rxbuf = &rxr->rx_buffers[i];
4182                 /* No hdr mbuf used with header split off */
4183                 if (rxr->hdr_split == FALSE)
4184                         goto no_split;
4185                 if (rxbuf->m_head == NULL) {
4186                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4187                         if (mh == NULL)
4188                                 goto update;
4189                 } else
4190                         mh = rxbuf->m_head;
4191
4192                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4193                 mh->m_len = MHLEN;
4194                 mh->m_flags |= M_PKTHDR;
4195                 /* Get the memory mapping */
4196                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4197                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4198                 if (error != 0) {
4199                         printf("Refresh mbufs: hdr dmamap load"
4200                             " failure - %d\n", error);
4201                         m_free(mh);
4202                         rxbuf->m_head = NULL;
4203                         goto update;
4204                 }
4205                 rxbuf->m_head = mh;
4206                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4207                     BUS_DMASYNC_PREREAD);
4208                 rxr->rx_base[i].read.hdr_addr =
4209                     htole64(hseg[0].ds_addr);
4210 no_split:
4211                 if (rxbuf->m_pack == NULL) {
4212                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4213                             M_PKTHDR, adapter->rx_mbuf_sz);
4214                         if (mp == NULL)
4215                                 goto update;
4216                 } else
4217                         mp = rxbuf->m_pack;
4218
4219                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4220                 /* Get the memory mapping */
4221                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4222                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4223                 if (error != 0) {
4224                         printf("Refresh mbufs: payload dmamap load"
4225                             " failure - %d\n", error);
4226                         m_free(mp);
4227                         rxbuf->m_pack = NULL;
4228                         goto update;
4229                 }
4230                 rxbuf->m_pack = mp;
4231                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4232                     BUS_DMASYNC_PREREAD);
4233                 rxr->rx_base[i].read.pkt_addr =
4234                     htole64(pseg[0].ds_addr);
4235                 refreshed = TRUE; /* I feel wefreshed :) */
4236
4237                 i = j; /* our next is precalculated */
4238                 rxr->next_to_refresh = i;
4239                 if (++j == adapter->num_rx_desc)
4240                         j = 0;
4241         }
4242 update:
4243         if (refreshed) /* update tail */
4244                 E1000_WRITE_REG(&adapter->hw,
4245                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4246         return;
4247 }
4248
4249
4250 /*********************************************************************
4251  *
4252  *  Allocate memory for rx_buffer structures. Since we use one
4253  *  rx_buffer per received packet, the maximum number of rx_buffer's
4254  *  that we'll need is equal to the number of receive descriptors
4255  *  that we've allocated.
4256  *
4257  **********************************************************************/
4258 static int
4259 igb_allocate_receive_buffers(struct rx_ring *rxr)
4260 {
4261         struct  adapter         *adapter = rxr->adapter;
4262         device_t                dev = adapter->dev;
4263         struct igb_rx_buf       *rxbuf;
4264         int                     i, bsize, error;
4265
4266         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4267         if (!(rxr->rx_buffers =
4268             (struct igb_rx_buf *) malloc(bsize,
4269             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4270                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4271                 error = ENOMEM;
4272                 goto fail;
4273         }
4274
4275         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4276                                    1, 0,                /* alignment, bounds */
4277                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4278                                    BUS_SPACE_MAXADDR,   /* highaddr */
4279                                    NULL, NULL,          /* filter, filterarg */
4280                                    MSIZE,               /* maxsize */
4281                                    1,                   /* nsegments */
4282                                    MSIZE,               /* maxsegsize */
4283                                    0,                   /* flags */
4284                                    NULL,                /* lockfunc */
4285                                    NULL,                /* lockfuncarg */
4286                                    &rxr->htag))) {
4287                 device_printf(dev, "Unable to create RX DMA tag\n");
4288                 goto fail;
4289         }
4290
4291         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4292                                    1, 0,                /* alignment, bounds */
4293                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4294                                    BUS_SPACE_MAXADDR,   /* highaddr */
4295                                    NULL, NULL,          /* filter, filterarg */
4296                                    MJUM9BYTES,          /* maxsize */
4297                                    1,                   /* nsegments */
4298                                    MJUM9BYTES,          /* maxsegsize */
4299                                    0,                   /* flags */
4300                                    NULL,                /* lockfunc */
4301                                    NULL,                /* lockfuncarg */
4302                                    &rxr->ptag))) {
4303                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4304                 goto fail;
4305         }
4306
4307         for (i = 0; i < adapter->num_rx_desc; i++) {
4308                 rxbuf = &rxr->rx_buffers[i];
4309                 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4310                 if (error) {
4311                         device_printf(dev,
4312                             "Unable to create RX head DMA maps\n");
4313                         goto fail;
4314                 }
4315                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4316                 if (error) {
4317                         device_printf(dev,
4318                             "Unable to create RX packet DMA maps\n");
4319                         goto fail;
4320                 }
4321         }
4322
4323         return (0);
4324
4325 fail:
4326         /* Frees all, but can handle partial completion */
4327         igb_free_receive_structures(adapter);
4328         return (error);
4329 }
4330
4331
4332 static void
4333 igb_free_receive_ring(struct rx_ring *rxr)
4334 {
4335         struct  adapter         *adapter = rxr->adapter;
4336         struct igb_rx_buf       *rxbuf;
4337
4338
4339         for (int i = 0; i < adapter->num_rx_desc; i++) {
4340                 rxbuf = &rxr->rx_buffers[i];
4341                 if (rxbuf->m_head != NULL) {
4342                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4343                             BUS_DMASYNC_POSTREAD);
4344                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4345                         rxbuf->m_head->m_flags |= M_PKTHDR;
4346                         m_freem(rxbuf->m_head);
4347                 }
4348                 if (rxbuf->m_pack != NULL) {
4349                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4350                             BUS_DMASYNC_POSTREAD);
4351                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4352                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4353                         m_freem(rxbuf->m_pack);
4354                 }
4355                 rxbuf->m_head = NULL;
4356                 rxbuf->m_pack = NULL;
4357         }
4358 }
4359
4360
4361 /*********************************************************************
4362  *
4363  *  Initialize a receive ring and its buffers.
4364  *
4365  **********************************************************************/
4366 static int
4367 igb_setup_receive_ring(struct rx_ring *rxr)
4368 {
4369         struct  adapter         *adapter;
4370         struct  ifnet           *ifp;
4371         device_t                dev;
4372         struct igb_rx_buf       *rxbuf;
4373         bus_dma_segment_t       pseg[1], hseg[1];
4374         struct lro_ctrl         *lro = &rxr->lro;
4375         int                     rsize, nsegs, error = 0;
4376 #ifdef DEV_NETMAP
4377         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4378         struct netmap_slot *slot;
4379 #endif /* DEV_NETMAP */
4380
4381         adapter = rxr->adapter;
4382         dev = adapter->dev;
4383         ifp = adapter->ifp;
4384
4385         /* Clear the ring contents */
4386         IGB_RX_LOCK(rxr);
4387 #ifdef DEV_NETMAP
4388         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4389 #endif /* DEV_NETMAP */
4390         rsize = roundup2(adapter->num_rx_desc *
4391             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4392         bzero((void *)rxr->rx_base, rsize);
4393
4394         /*
4395         ** Free current RX buffer structures and their mbufs
4396         */
4397         igb_free_receive_ring(rxr);
4398
4399         /* Configure for header split? */
4400         if (igb_header_split)
4401                 rxr->hdr_split = TRUE;
4402
4403         /* Now replenish the ring mbufs */
4404         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4405                 struct mbuf     *mh, *mp;
4406
4407                 rxbuf = &rxr->rx_buffers[j];
4408 #ifdef DEV_NETMAP
4409                 if (slot) {
4410                         /* slot sj is mapped to the j-th NIC-ring entry */
4411                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4412                         uint64_t paddr;
4413                         void *addr;
4414
4415                         addr = PNMB(na, slot + sj, &paddr);
4416                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4417                         /* Update descriptor */
4418                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4419                         continue;
4420                 }
4421 #endif /* DEV_NETMAP */
4422                 if (rxr->hdr_split == FALSE)
4423                         goto skip_head;
4424
4425                 /* First the header */
4426                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4427                 if (rxbuf->m_head == NULL) {
4428                         error = ENOBUFS;
4429                         goto fail;
4430                 }
4431                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4432                 mh = rxbuf->m_head;
4433                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4434                 mh->m_flags |= M_PKTHDR;
4435                 /* Get the memory mapping */
4436                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4437                     rxbuf->hmap, rxbuf->m_head, hseg,
4438                     &nsegs, BUS_DMA_NOWAIT);
4439                 if (error != 0) /* Nothing elegant to do here */
4440                         goto fail;
4441                 bus_dmamap_sync(rxr->htag,
4442                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4443                 /* Update descriptor */
4444                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4445
4446 skip_head:
4447                 /* Now the payload cluster */
4448                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4449                     M_PKTHDR, adapter->rx_mbuf_sz);
4450                 if (rxbuf->m_pack == NULL) {
4451                         error = ENOBUFS;
4452                         goto fail;
4453                 }
4454                 mp = rxbuf->m_pack;
4455                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4456                 /* Get the memory mapping */
4457                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4458                     rxbuf->pmap, mp, pseg,
4459                     &nsegs, BUS_DMA_NOWAIT);
4460                 if (error != 0)
4461                         goto fail;
4462                 bus_dmamap_sync(rxr->ptag,
4463                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4464                 /* Update descriptor */
4465                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4466         }
4467
4468         /* Setup our descriptor indices */
4469         rxr->next_to_check = 0;
4470         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4471         rxr->lro_enabled = FALSE;
4472         rxr->rx_split_packets = 0;
4473         rxr->rx_bytes = 0;
4474
4475         rxr->fmp = NULL;
4476         rxr->lmp = NULL;
4477
4478         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4479             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4480
4481         /*
4482         ** Now set up the LRO interface, we
4483         ** also only do head split when LRO
4484         ** is enabled, since so often they
4485         ** are undesireable in similar setups.
4486         */
4487         if (ifp->if_capenable & IFCAP_LRO) {
4488                 error = tcp_lro_init(lro);
4489                 if (error) {
4490                         device_printf(dev, "LRO Initialization failed!\n");
4491                         goto fail;
4492                 }
4493                 INIT_DEBUGOUT("RX LRO Initialized\n");
4494                 rxr->lro_enabled = TRUE;
4495                 lro->ifp = adapter->ifp;
4496         }
4497
4498         IGB_RX_UNLOCK(rxr);
4499         return (0);
4500
4501 fail:
4502         igb_free_receive_ring(rxr);
4503         IGB_RX_UNLOCK(rxr);
4504         return (error);
4505 }
4506
4507
4508 /*********************************************************************
4509  *
4510  *  Initialize all receive rings.
4511  *
4512  **********************************************************************/
4513 static int
4514 igb_setup_receive_structures(struct adapter *adapter)
4515 {
4516         struct rx_ring *rxr = adapter->rx_rings;
4517         int i;
4518
4519         for (i = 0; i < adapter->num_queues; i++, rxr++)
4520                 if (igb_setup_receive_ring(rxr))
4521                         goto fail;
4522
4523         return (0);
4524 fail:
4525         /*
4526          * Free RX buffers allocated so far, we will only handle
4527          * the rings that completed, the failing case will have
4528          * cleaned up for itself. 'i' is the endpoint.
4529          */
4530         for (int j = 0; j < i; ++j) {
4531                 rxr = &adapter->rx_rings[j];
4532                 IGB_RX_LOCK(rxr);
4533                 igb_free_receive_ring(rxr);
4534                 IGB_RX_UNLOCK(rxr);
4535         }
4536
4537         return (ENOBUFS);
4538 }
4539
4540 /*
4541  * Initialise the RSS mapping for NICs that support multiple transmit/
4542  * receive rings.
4543  */
4544 static void
4545 igb_initialise_rss_mapping(struct adapter *adapter)
4546 {
4547         struct e1000_hw *hw = &adapter->hw;
4548         int i;
4549         int queue_id;
4550         u32 reta;
4551         u32 rss_key[10], mrqc, shift = 0;
4552
4553         /* XXX? */
4554         if (adapter->hw.mac.type == e1000_82575)
4555                 shift = 6;
4556
4557         /*
4558          * The redirection table controls which destination
4559          * queue each bucket redirects traffic to.
4560          * Each DWORD represents four queues, with the LSB
4561          * being the first queue in the DWORD.
4562          *
4563          * This just allocates buckets to queues using round-robin
4564          * allocation.
4565          *
4566          * NOTE: It Just Happens to line up with the default
4567          * RSS allocation method.
4568          */
4569
4570         /* Warning FM follows */
4571         reta = 0;
4572         for (i = 0; i < 128; i++) {
4573 #ifdef  RSS
4574                 queue_id = rss_get_indirection_to_bucket(i);
4575                 /*
4576                  * If we have more queues than buckets, we'll
4577                  * end up mapping buckets to a subset of the
4578                  * queues.
4579                  *
4580                  * If we have more buckets than queues, we'll
4581                  * end up instead assigning multiple buckets
4582                  * to queues.
4583                  *
4584                  * Both are suboptimal, but we need to handle
4585                  * the case so we don't go out of bounds
4586                  * indexing arrays and such.
4587                  */
4588                 queue_id = queue_id % adapter->num_queues;
4589 #else
4590                 queue_id = (i % adapter->num_queues);
4591 #endif
4592                 /* Adjust if required */
4593                 queue_id = queue_id << shift;
4594
4595                 /*
4596                  * The low 8 bits are for hash value (n+0);
4597                  * The next 8 bits are for hash value (n+1), etc.
4598                  */
4599                 reta = reta >> 8;
4600                 reta = reta | ( ((uint32_t) queue_id) << 24);
4601                 if ((i & 3) == 3) {
4602                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4603                         reta = 0;
4604                 }
4605         }
4606
4607         /* Now fill in hash table */
4608
4609         /*
4610          * MRQC: Multiple Receive Queues Command
4611          * Set queuing to RSS control, number depends on the device.
4612          */
4613         mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4614
4615 #ifdef  RSS
4616         /* XXX ew typecasting */
4617         rss_getkey((uint8_t *) &rss_key);
4618 #else
4619         arc4rand(&rss_key, sizeof(rss_key), 0);
4620 #endif
4621         for (i = 0; i < 10; i++)
4622                 E1000_WRITE_REG_ARRAY(hw,
4623                     E1000_RSSRK(0), i, rss_key[i]);
4624
4625         /*
4626          * Configure the RSS fields to hash upon.
4627          */
4628         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4629             E1000_MRQC_RSS_FIELD_IPV4_TCP);
4630         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4631             E1000_MRQC_RSS_FIELD_IPV6_TCP);
4632         mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4633             E1000_MRQC_RSS_FIELD_IPV6_UDP);
4634         mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4635             E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4636
4637         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4638 }
4639
4640 /*********************************************************************
4641  *
4642  *  Enable receive unit.
4643  *
4644  **********************************************************************/
4645 static void
4646 igb_initialize_receive_units(struct adapter *adapter)
4647 {
4648         struct rx_ring  *rxr = adapter->rx_rings;
4649         struct ifnet    *ifp = adapter->ifp;
4650         struct e1000_hw *hw = &adapter->hw;
4651         u32             rctl, rxcsum, psize, srrctl = 0;
4652
4653         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4654
4655         /*
4656          * Make sure receives are disabled while setting
4657          * up the descriptor ring
4658          */
4659         rctl = E1000_READ_REG(hw, E1000_RCTL);
4660         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4661
4662         /*
4663         ** Set up for header split
4664         */
4665         if (igb_header_split) {
4666                 /* Use a standard mbuf for the header */
4667                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4668                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4669         } else
4670                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4671
4672         /*
4673         ** Set up for jumbo frames
4674         */
4675         if (ifp->if_mtu > ETHERMTU) {
4676                 rctl |= E1000_RCTL_LPE;
4677                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4678                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4679                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4680                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4681                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4682                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4683                 }
4684                 /* Set maximum packet len */
4685                 psize = adapter->max_frame_size;
4686                 /* are we on a vlan? */
4687                 if (adapter->ifp->if_vlantrunk != NULL)
4688                         psize += VLAN_TAG_SIZE;
4689                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4690         } else {
4691                 rctl &= ~E1000_RCTL_LPE;
4692                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4693                 rctl |= E1000_RCTL_SZ_2048;
4694         }
4695
4696         /*
4697          * If TX flow control is disabled and there's >1 queue defined,
4698          * enable DROP.
4699          *
4700          * This drops frames rather than hanging the RX MAC for all queues.
4701          */
4702         if ((adapter->num_queues > 1) &&
4703             (adapter->fc == e1000_fc_none ||
4704              adapter->fc == e1000_fc_rx_pause)) {
4705                 srrctl |= E1000_SRRCTL_DROP_EN;
4706         }
4707
4708         /* Setup the Base and Length of the Rx Descriptor Rings */
4709         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4710                 u64 bus_addr = rxr->rxdma.dma_paddr;
4711                 u32 rxdctl;
4712
4713                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4714                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4715                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4716                     (uint32_t)(bus_addr >> 32));
4717                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4718                     (uint32_t)bus_addr);
4719                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4720                 /* Enable this Queue */
4721                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4722                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4723                 rxdctl &= 0xFFF00000;
4724                 rxdctl |= IGB_RX_PTHRESH;
4725                 rxdctl |= IGB_RX_HTHRESH << 8;
4726                 rxdctl |= IGB_RX_WTHRESH << 16;
4727                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4728         }
4729
4730         /*
4731         ** Setup for RX MultiQueue
4732         */
4733         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4734         if (adapter->num_queues >1) {
4735
4736                 /* rss setup */
4737                 igb_initialise_rss_mapping(adapter);
4738
4739                 /*
4740                 ** NOTE: Receive Full-Packet Checksum Offload 
4741                 ** is mutually exclusive with Multiqueue. However
4742                 ** this is not the same as TCP/IP checksums which
4743                 ** still work.
4744                 */
4745                 rxcsum |= E1000_RXCSUM_PCSD;
4746 #if __FreeBSD_version >= 800000
4747                 /* For SCTP Offload */
4748                 if ((hw->mac.type != e1000_82575) &&
4749                     (ifp->if_capenable & IFCAP_RXCSUM))
4750                         rxcsum |= E1000_RXCSUM_CRCOFL;
4751 #endif
4752         } else {
4753                 /* Non RSS setup */
4754                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4755                         rxcsum |= E1000_RXCSUM_IPPCSE;
4756 #if __FreeBSD_version >= 800000
4757                         if (adapter->hw.mac.type != e1000_82575)
4758                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4759 #endif
4760                 } else
4761                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4762         }
4763         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4764
4765         /* Setup the Receive Control Register */
4766         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4767         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4768                    E1000_RCTL_RDMTS_HALF |
4769                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4770         /* Strip CRC bytes. */
4771         rctl |= E1000_RCTL_SECRC;
4772         /* Make sure VLAN Filters are off */
4773         rctl &= ~E1000_RCTL_VFE;
4774         /* Don't store bad packets */
4775         rctl &= ~E1000_RCTL_SBP;
4776
4777         /* Enable Receives */
4778         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4779
4780         /*
4781          * Setup the HW Rx Head and Tail Descriptor Pointers
4782          *   - needs to be after enable
4783          */
4784         for (int i = 0; i < adapter->num_queues; i++) {
4785                 rxr = &adapter->rx_rings[i];
4786                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4787 #ifdef DEV_NETMAP
4788                 /*
4789                  * an init() while a netmap client is active must
4790                  * preserve the rx buffers passed to userspace.
4791                  * In this driver it means we adjust RDT to
4792                  * something different from next_to_refresh
4793                  * (which is not used in netmap mode).
4794                  */
4795                 if (ifp->if_capenable & IFCAP_NETMAP) {
4796                         struct netmap_adapter *na = NA(adapter->ifp);
4797                         struct netmap_kring *kring = &na->rx_rings[i];
4798                         int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4799
4800                         if (t >= adapter->num_rx_desc)
4801                                 t -= adapter->num_rx_desc;
4802                         else if (t < 0)
4803                                 t += adapter->num_rx_desc;
4804                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4805                 } else
4806 #endif /* DEV_NETMAP */
4807                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4808         }
4809         return;
4810 }
4811
4812 /*********************************************************************
4813  *
4814  *  Free receive rings.
4815  *
4816  **********************************************************************/
4817 static void
4818 igb_free_receive_structures(struct adapter *adapter)
4819 {
4820         struct rx_ring *rxr = adapter->rx_rings;
4821
4822         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4823                 struct lro_ctrl *lro = &rxr->lro;
4824                 igb_free_receive_buffers(rxr);
4825                 tcp_lro_free(lro);
4826                 igb_dma_free(adapter, &rxr->rxdma);
4827         }
4828
4829         free(adapter->rx_rings, M_DEVBUF);
4830 }
4831
4832 /*********************************************************************
4833  *
4834  *  Free receive ring data structures.
4835  *
4836  **********************************************************************/
4837 static void
4838 igb_free_receive_buffers(struct rx_ring *rxr)
4839 {
4840         struct adapter          *adapter = rxr->adapter;
4841         struct igb_rx_buf       *rxbuf;
4842         int i;
4843
4844         INIT_DEBUGOUT("free_receive_structures: begin");
4845
4846         /* Cleanup any existing buffers */
4847         if (rxr->rx_buffers != NULL) {
4848                 for (i = 0; i < adapter->num_rx_desc; i++) {
4849                         rxbuf = &rxr->rx_buffers[i];
4850                         if (rxbuf->m_head != NULL) {
4851                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4852                                     BUS_DMASYNC_POSTREAD);
4853                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4854                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4855                                 m_freem(rxbuf->m_head);
4856                         }
4857                         if (rxbuf->m_pack != NULL) {
4858                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4859                                     BUS_DMASYNC_POSTREAD);
4860                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4861                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4862                                 m_freem(rxbuf->m_pack);
4863                         }
4864                         rxbuf->m_head = NULL;
4865                         rxbuf->m_pack = NULL;
4866                         if (rxbuf->hmap != NULL) {
4867                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4868                                 rxbuf->hmap = NULL;
4869                         }
4870                         if (rxbuf->pmap != NULL) {
4871                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4872                                 rxbuf->pmap = NULL;
4873                         }
4874                 }
4875                 if (rxr->rx_buffers != NULL) {
4876                         free(rxr->rx_buffers, M_DEVBUF);
4877                         rxr->rx_buffers = NULL;
4878                 }
4879         }
4880
4881         if (rxr->htag != NULL) {
4882                 bus_dma_tag_destroy(rxr->htag);
4883                 rxr->htag = NULL;
4884         }
4885         if (rxr->ptag != NULL) {
4886                 bus_dma_tag_destroy(rxr->ptag);
4887                 rxr->ptag = NULL;
4888         }
4889 }
4890
4891 static __inline void
4892 igb_rx_discard(struct rx_ring *rxr, int i)
4893 {
4894         struct igb_rx_buf       *rbuf;
4895
4896         rbuf = &rxr->rx_buffers[i];
4897
4898         /* Partially received? Free the chain */
4899         if (rxr->fmp != NULL) {
4900                 rxr->fmp->m_flags |= M_PKTHDR;
4901                 m_freem(rxr->fmp);
4902                 rxr->fmp = NULL;
4903                 rxr->lmp = NULL;
4904         }
4905
4906         /*
4907         ** With advanced descriptors the writeback
4908         ** clobbers the buffer addrs, so its easier
4909         ** to just free the existing mbufs and take
4910         ** the normal refresh path to get new buffers
4911         ** and mapping.
4912         */
4913         if (rbuf->m_head) {
4914                 m_free(rbuf->m_head);
4915                 rbuf->m_head = NULL;
4916                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4917         }
4918
4919         if (rbuf->m_pack) {
4920                 m_free(rbuf->m_pack);
4921                 rbuf->m_pack = NULL;
4922                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4923         }
4924
4925         return;
4926 }
4927
4928 static __inline void
4929 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4930 {
4931
4932         /*
4933          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4934          * should be computed by hardware. Also it should not have VLAN tag in
4935          * ethernet header.
4936          */
4937         if (rxr->lro_enabled &&
4938             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4939             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4940             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4941             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4942             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4943             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4944                 /*
4945                  * Send to the stack if:
4946                  **  - LRO not enabled, or
4947                  **  - no LRO resources, or
4948                  **  - lro enqueue fails
4949                  */
4950                 if (rxr->lro.lro_cnt != 0)
4951                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4952                                 return;
4953         }
4954         IGB_RX_UNLOCK(rxr);
4955         (*ifp->if_input)(ifp, m);
4956         IGB_RX_LOCK(rxr);
4957 }
4958
4959 /*********************************************************************
4960  *
4961  *  This routine executes in interrupt context. It replenishes
4962  *  the mbufs in the descriptor and sends data which has been
4963  *  dma'ed into host memory to upper layer.
4964  *
4965  *  We loop at most count times if count is > 0, or until done if
4966  *  count < 0.
4967  *
4968  *  Return TRUE if more to clean, FALSE otherwise
4969  *********************************************************************/
4970 static bool
4971 igb_rxeof(struct igb_queue *que, int count, int *done)
4972 {
4973         struct adapter          *adapter = que->adapter;
4974         struct rx_ring          *rxr = que->rxr;
4975         struct ifnet            *ifp = adapter->ifp;
4976         struct lro_ctrl         *lro = &rxr->lro;
4977         int                     i, processed = 0, rxdone = 0;
4978         u32                     ptype, staterr = 0;
4979         union e1000_adv_rx_desc *cur;
4980
4981         IGB_RX_LOCK(rxr);
4982         /* Sync the ring. */
4983         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4984             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4985
4986 #ifdef DEV_NETMAP
4987         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4988                 IGB_RX_UNLOCK(rxr);
4989                 return (FALSE);
4990         }
4991 #endif /* DEV_NETMAP */
4992
4993         /* Main clean loop */
4994         for (i = rxr->next_to_check; count != 0;) {
4995                 struct mbuf             *sendmp, *mh, *mp;
4996                 struct igb_rx_buf       *rxbuf;
4997                 u16                     hlen, plen, hdr, vtag, pkt_info;
4998                 bool                    eop = FALSE;
4999  
5000                 cur = &rxr->rx_base[i];
5001                 staterr = le32toh(cur->wb.upper.status_error);
5002                 if ((staterr & E1000_RXD_STAT_DD) == 0)
5003                         break;
5004                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
5005                         break;
5006                 count--;
5007                 sendmp = mh = mp = NULL;
5008                 cur->wb.upper.status_error = 0;
5009                 rxbuf = &rxr->rx_buffers[i];
5010                 plen = le16toh(cur->wb.upper.length);
5011                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
5012                 if (((adapter->hw.mac.type == e1000_i350) ||
5013                     (adapter->hw.mac.type == e1000_i354)) &&
5014                     (staterr & E1000_RXDEXT_STATERR_LB))
5015                         vtag = be16toh(cur->wb.upper.vlan);
5016                 else
5017                         vtag = le16toh(cur->wb.upper.vlan);
5018                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
5019                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
5020                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
5021
5022                 /*
5023                  * Free the frame (all segments) if we're at EOP and
5024                  * it's an error.
5025                  *
5026                  * The datasheet states that EOP + status is only valid for
5027                  * the final segment in a multi-segment frame.
5028                  */
5029                 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
5030                         adapter->dropped_pkts++;
5031                         ++rxr->rx_discarded;
5032                         igb_rx_discard(rxr, i);
5033                         goto next_desc;
5034                 }
5035
5036                 /*
5037                 ** The way the hardware is configured to
5038                 ** split, it will ONLY use the header buffer
5039                 ** when header split is enabled, otherwise we
5040                 ** get normal behavior, ie, both header and
5041                 ** payload are DMA'd into the payload buffer.
5042                 **
5043                 ** The fmp test is to catch the case where a
5044                 ** packet spans multiple descriptors, in that
5045                 ** case only the first header is valid.
5046                 */
5047                 if (rxr->hdr_split && rxr->fmp == NULL) {
5048                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5049                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5050                             E1000_RXDADV_HDRBUFLEN_SHIFT;
5051                         if (hlen > IGB_HDR_BUF)
5052                                 hlen = IGB_HDR_BUF;
5053                         mh = rxr->rx_buffers[i].m_head;
5054                         mh->m_len = hlen;
5055                         /* clear buf pointer for refresh */
5056                         rxbuf->m_head = NULL;
5057                         /*
5058                         ** Get the payload length, this
5059                         ** could be zero if its a small
5060                         ** packet.
5061                         */
5062                         if (plen > 0) {
5063                                 mp = rxr->rx_buffers[i].m_pack;
5064                                 mp->m_len = plen;
5065                                 mh->m_next = mp;
5066                                 /* clear buf pointer */
5067                                 rxbuf->m_pack = NULL;
5068                                 rxr->rx_split_packets++;
5069                         }
5070                 } else {
5071                         /*
5072                         ** Either no header split, or a
5073                         ** secondary piece of a fragmented
5074                         ** split packet.
5075                         */
5076                         mh = rxr->rx_buffers[i].m_pack;
5077                         mh->m_len = plen;
5078                         /* clear buf info for refresh */
5079                         rxbuf->m_pack = NULL;
5080                 }
5081                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5082
5083                 ++processed; /* So we know when to refresh */
5084
5085                 /* Initial frame - setup */
5086                 if (rxr->fmp == NULL) {
5087                         mh->m_pkthdr.len = mh->m_len;
5088                         /* Save the head of the chain */
5089                         rxr->fmp = mh;
5090                         rxr->lmp = mh;
5091                         if (mp != NULL) {
5092                                 /* Add payload if split */
5093                                 mh->m_pkthdr.len += mp->m_len;
5094                                 rxr->lmp = mh->m_next;
5095                         }
5096                 } else {
5097                         /* Chain mbuf's together */
5098                         rxr->lmp->m_next = mh;
5099                         rxr->lmp = rxr->lmp->m_next;
5100                         rxr->fmp->m_pkthdr.len += mh->m_len;
5101                 }
5102
5103                 if (eop) {
5104                         rxr->fmp->m_pkthdr.rcvif = ifp;
5105                         rxr->rx_packets++;
5106                         /* capture data for AIM */
5107                         rxr->packets++;
5108                         rxr->bytes += rxr->fmp->m_pkthdr.len;
5109                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5110
5111                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5112                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
5113
5114                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5115                             (staterr & E1000_RXD_STAT_VP) != 0) {
5116                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5117                                 rxr->fmp->m_flags |= M_VLANTAG;
5118                         }
5119
5120                         /*
5121                          * In case of multiqueue, we have RXCSUM.PCSD bit set
5122                          * and never cleared. This means we have RSS hash
5123                          * available to be used.
5124                          */
5125                         if (adapter->num_queues > 1) {
5126                                 rxr->fmp->m_pkthdr.flowid = 
5127                                     le32toh(cur->wb.lower.hi_dword.rss);
5128                                 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5129                                         case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5130                                                 M_HASHTYPE_SET(rxr->fmp,
5131                                                     M_HASHTYPE_RSS_TCP_IPV4);
5132                                         break;
5133                                         case E1000_RXDADV_RSSTYPE_IPV4:
5134                                                 M_HASHTYPE_SET(rxr->fmp,
5135                                                     M_HASHTYPE_RSS_IPV4);
5136                                         break;
5137                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5138                                                 M_HASHTYPE_SET(rxr->fmp,
5139                                                     M_HASHTYPE_RSS_TCP_IPV6);
5140                                         break;
5141                                         case E1000_RXDADV_RSSTYPE_IPV6_EX:
5142                                                 M_HASHTYPE_SET(rxr->fmp,
5143                                                     M_HASHTYPE_RSS_IPV6_EX);
5144                                         break;
5145                                         case E1000_RXDADV_RSSTYPE_IPV6:
5146                                                 M_HASHTYPE_SET(rxr->fmp,
5147                                                     M_HASHTYPE_RSS_IPV6);
5148                                         break;
5149                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5150                                                 M_HASHTYPE_SET(rxr->fmp,
5151                                                     M_HASHTYPE_RSS_TCP_IPV6_EX);
5152                                         break;
5153                                         default:
5154                                                 /* XXX fallthrough */
5155                                                 M_HASHTYPE_SET(rxr->fmp,
5156                                                     M_HASHTYPE_OPAQUE);
5157                                 }
5158                         } else {
5159 #ifndef IGB_LEGACY_TX
5160                                 rxr->fmp->m_pkthdr.flowid = que->msix;
5161                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5162 #endif
5163                         }
5164                         sendmp = rxr->fmp;
5165                         /* Make sure to set M_PKTHDR. */
5166                         sendmp->m_flags |= M_PKTHDR;
5167                         rxr->fmp = NULL;
5168                         rxr->lmp = NULL;
5169                 }
5170
5171 next_desc:
5172                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5173                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5174
5175                 /* Advance our pointers to the next descriptor. */
5176                 if (++i == adapter->num_rx_desc)
5177                         i = 0;
5178                 /*
5179                 ** Send to the stack or LRO
5180                 */
5181                 if (sendmp != NULL) {
5182                         rxr->next_to_check = i;
5183                         igb_rx_input(rxr, ifp, sendmp, ptype);
5184                         i = rxr->next_to_check;
5185                         rxdone++;
5186                 }
5187
5188                 /* Every 8 descriptors we go to refresh mbufs */
5189                 if (processed == 8) {
5190                         igb_refresh_mbufs(rxr, i);
5191                         processed = 0;
5192                 }
5193         }
5194
5195         /* Catch any remainders */
5196         if (igb_rx_unrefreshed(rxr))
5197                 igb_refresh_mbufs(rxr, i);
5198
5199         rxr->next_to_check = i;
5200
5201         /*
5202          * Flush any outstanding LRO work
5203          */
5204         tcp_lro_flush_all(lro);
5205
5206         if (done != NULL)
5207                 *done += rxdone;
5208
5209         IGB_RX_UNLOCK(rxr);
5210         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5211 }
5212
5213 /*********************************************************************
5214  *
5215  *  Verify that the hardware indicated that the checksum is valid.
5216  *  Inform the stack about the status of checksum so that stack
5217  *  doesn't spend time verifying the checksum.
5218  *
5219  *********************************************************************/
5220 static void
5221 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5222 {
5223         u16 status = (u16)staterr;
5224         u8  errors = (u8) (staterr >> 24);
5225         int sctp;
5226
5227         /* Ignore Checksum bit is set */
5228         if (status & E1000_RXD_STAT_IXSM) {
5229                 mp->m_pkthdr.csum_flags = 0;
5230                 return;
5231         }
5232
5233         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5234             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5235                 sctp = 1;
5236         else
5237                 sctp = 0;
5238         if (status & E1000_RXD_STAT_IPCS) {
5239                 /* Did it pass? */
5240                 if (!(errors & E1000_RXD_ERR_IPE)) {
5241                         /* IP Checksum Good */
5242                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5243                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5244                 } else
5245                         mp->m_pkthdr.csum_flags = 0;
5246         }
5247
5248         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5249                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5250 #if __FreeBSD_version >= 800000
5251                 if (sctp) /* reassign */
5252                         type = CSUM_SCTP_VALID;
5253 #endif
5254                 /* Did it pass? */
5255                 if (!(errors & E1000_RXD_ERR_TCPE)) {
5256                         mp->m_pkthdr.csum_flags |= type;
5257                         if (sctp == 0)
5258                                 mp->m_pkthdr.csum_data = htons(0xffff);
5259                 }
5260         }
5261         return;
5262 }
5263
5264 /*
5265  * This routine is run via an vlan
5266  * config EVENT
5267  */
5268 static void
5269 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5270 {
5271         struct adapter  *adapter = ifp->if_softc;
5272         u32             index, bit;
5273
5274         if (ifp->if_softc !=  arg)   /* Not our event */
5275                 return;
5276
5277         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5278                 return;
5279
5280         IGB_CORE_LOCK(adapter);
5281         index = (vtag >> 5) & 0x7F;
5282         bit = vtag & 0x1F;
5283         adapter->shadow_vfta[index] |= (1 << bit);
5284         ++adapter->num_vlans;
5285         /* Change hw filter setting */
5286         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5287                 igb_setup_vlan_hw_support(adapter);
5288         IGB_CORE_UNLOCK(adapter);
5289 }
5290
5291 /*
5292  * This routine is run via an vlan
5293  * unconfig EVENT
5294  */
5295 static void
5296 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5297 {
5298         struct adapter  *adapter = ifp->if_softc;
5299         u32             index, bit;
5300
5301         if (ifp->if_softc !=  arg)
5302                 return;
5303
5304         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5305                 return;
5306
5307         IGB_CORE_LOCK(adapter);
5308         index = (vtag >> 5) & 0x7F;
5309         bit = vtag & 0x1F;
5310         adapter->shadow_vfta[index] &= ~(1 << bit);
5311         --adapter->num_vlans;
5312         /* Change hw filter setting */
5313         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5314                 igb_setup_vlan_hw_support(adapter);
5315         IGB_CORE_UNLOCK(adapter);
5316 }
5317
5318 static void
5319 igb_setup_vlan_hw_support(struct adapter *adapter)
5320 {
5321         struct e1000_hw *hw = &adapter->hw;
5322         struct ifnet    *ifp = adapter->ifp;
5323         u32             reg;
5324
5325         if (adapter->vf_ifp) {
5326                 e1000_rlpml_set_vf(hw,
5327                     adapter->max_frame_size + VLAN_TAG_SIZE);
5328                 return;
5329         }
5330
5331         reg = E1000_READ_REG(hw, E1000_CTRL);
5332         reg |= E1000_CTRL_VME;
5333         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5334
5335         /* Enable the Filter Table */
5336         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5337                 reg = E1000_READ_REG(hw, E1000_RCTL);
5338                 reg &= ~E1000_RCTL_CFIEN;
5339                 reg |= E1000_RCTL_VFE;
5340                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5341         }
5342
5343         /* Update the frame size */
5344         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5345             adapter->max_frame_size + VLAN_TAG_SIZE);
5346
5347         /* Don't bother with table if no vlans */
5348         if ((adapter->num_vlans == 0) ||
5349             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5350                 return;
5351         /*
5352         ** A soft reset zero's out the VFTA, so
5353         ** we need to repopulate it now.
5354         */
5355         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5356                 if (adapter->shadow_vfta[i] != 0) {
5357                         if (adapter->vf_ifp)
5358                                 e1000_vfta_set_vf(hw,
5359                                     adapter->shadow_vfta[i], TRUE);
5360                         else
5361                                 e1000_write_vfta(hw,
5362                                     i, adapter->shadow_vfta[i]);
5363                 }
5364 }
5365
5366 static void
5367 igb_enable_intr(struct adapter *adapter)
5368 {
5369         /* With RSS set up what to auto clear */
5370         if (adapter->msix_mem) {
5371                 u32 mask = (adapter->que_mask | adapter->link_mask);
5372                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5373                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5374                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5375                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5376                     E1000_IMS_LSC);
5377         } else {
5378                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5379                     IMS_ENABLE_MASK);
5380         }
5381         E1000_WRITE_FLUSH(&adapter->hw);
5382
5383         return;
5384 }
5385
5386 static void
5387 igb_disable_intr(struct adapter *adapter)
5388 {
5389         if (adapter->msix_mem) {
5390                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5391                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5392         } 
5393         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5394         E1000_WRITE_FLUSH(&adapter->hw);
5395         return;
5396 }
5397
5398 /*
5399  * Bit of a misnomer, what this really means is
5400  * to enable OS management of the system... aka
5401  * to disable special hardware management features 
5402  */
5403 static void
5404 igb_init_manageability(struct adapter *adapter)
5405 {
5406         if (adapter->has_manage) {
5407                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5408                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5409
5410                 /* disable hardware interception of ARP */
5411                 manc &= ~(E1000_MANC_ARP_EN);
5412
5413                 /* enable receiving management packets to the host */
5414                 manc |= E1000_MANC_EN_MNG2HOST;
5415                 manc2h |= 1 << 5;  /* Mng Port 623 */
5416                 manc2h |= 1 << 6;  /* Mng Port 664 */
5417                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5418                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5419         }
5420 }
5421
5422 /*
5423  * Give control back to hardware management
5424  * controller if there is one.
5425  */
5426 static void
5427 igb_release_manageability(struct adapter *adapter)
5428 {
5429         if (adapter->has_manage) {
5430                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5431
5432                 /* re-enable hardware interception of ARP */
5433                 manc |= E1000_MANC_ARP_EN;
5434                 manc &= ~E1000_MANC_EN_MNG2HOST;
5435
5436                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5437         }
5438 }
5439
5440 /*
5441  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5442  * For ASF and Pass Through versions of f/w this means that
5443  * the driver is loaded. 
5444  *
5445  */
5446 static void
5447 igb_get_hw_control(struct adapter *adapter)
5448 {
5449         u32 ctrl_ext;
5450
5451         if (adapter->vf_ifp)
5452                 return;
5453
5454         /* Let firmware know the driver has taken over */
5455         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5456         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5457             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5458 }
5459
5460 /*
5461  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5462  * For ASF and Pass Through versions of f/w this means that the
5463  * driver is no longer loaded.
5464  *
5465  */
5466 static void
5467 igb_release_hw_control(struct adapter *adapter)
5468 {
5469         u32 ctrl_ext;
5470
5471         if (adapter->vf_ifp)
5472                 return;
5473
5474         /* Let firmware taken over control of h/w */
5475         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5476         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5477             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5478 }
5479
5480 static int
5481 igb_is_valid_ether_addr(uint8_t *addr)
5482 {
5483         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5484
5485         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5486                 return (FALSE);
5487         }
5488
5489         return (TRUE);
5490 }
5491
5492
5493 /*
5494  * Enable PCI Wake On Lan capability
5495  */
5496 static void
5497 igb_enable_wakeup(device_t dev)
5498 {
5499         u16     cap, status;
5500         u8      id;
5501
5502         /* First find the capabilities pointer*/
5503         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5504         /* Read the PM Capabilities */
5505         id = pci_read_config(dev, cap, 1);
5506         if (id != PCIY_PMG)     /* Something wrong */
5507                 return;
5508         /* OK, we have the power capabilities, so
5509            now get the status register */
5510         cap += PCIR_POWER_STATUS;
5511         status = pci_read_config(dev, cap, 2);
5512         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5513         pci_write_config(dev, cap, status, 2);
5514         return;
5515 }
5516
5517 static void
5518 igb_led_func(void *arg, int onoff)
5519 {
5520         struct adapter  *adapter = arg;
5521
5522         IGB_CORE_LOCK(adapter);
5523         if (onoff) {
5524                 e1000_setup_led(&adapter->hw);
5525                 e1000_led_on(&adapter->hw);
5526         } else {
5527                 e1000_led_off(&adapter->hw);
5528                 e1000_cleanup_led(&adapter->hw);
5529         }
5530         IGB_CORE_UNLOCK(adapter);
5531 }
5532
5533 static uint64_t
5534 igb_get_vf_counter(if_t ifp, ift_counter cnt)
5535 {
5536         struct adapter *adapter;
5537         struct e1000_vf_stats *stats;
5538 #ifndef IGB_LEGACY_TX
5539         struct tx_ring *txr;
5540         uint64_t rv;
5541 #endif
5542
5543         adapter = if_getsoftc(ifp);
5544         stats = (struct e1000_vf_stats *)adapter->stats;
5545
5546         switch (cnt) {
5547         case IFCOUNTER_IPACKETS:
5548                 return (stats->gprc);
5549         case IFCOUNTER_OPACKETS:
5550                 return (stats->gptc);
5551         case IFCOUNTER_IBYTES:
5552                 return (stats->gorc);
5553         case IFCOUNTER_OBYTES:
5554                 return (stats->gotc);
5555         case IFCOUNTER_IMCASTS:
5556                 return (stats->mprc);
5557         case IFCOUNTER_IERRORS:
5558                 return (adapter->dropped_pkts);
5559         case IFCOUNTER_OERRORS:
5560                 return (adapter->watchdog_events);
5561 #ifndef IGB_LEGACY_TX
5562         case IFCOUNTER_OQDROPS:
5563                 rv = 0;
5564                 txr = adapter->tx_rings;
5565                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5566                         rv += txr->br->br_drops;
5567                 return (rv);
5568 #endif
5569         default:
5570                 return (if_get_counter_default(ifp, cnt));
5571         }
5572 }
5573
5574 static uint64_t
5575 igb_get_counter(if_t ifp, ift_counter cnt)
5576 {
5577         struct adapter *adapter;
5578         struct e1000_hw_stats *stats;
5579 #ifndef IGB_LEGACY_TX
5580         struct tx_ring *txr;
5581         uint64_t rv;
5582 #endif
5583
5584         adapter = if_getsoftc(ifp);
5585         if (adapter->vf_ifp)
5586                 return (igb_get_vf_counter(ifp, cnt));
5587
5588         stats = (struct e1000_hw_stats *)adapter->stats;
5589
5590         switch (cnt) {
5591         case IFCOUNTER_IPACKETS:
5592                 return (stats->gprc);
5593         case IFCOUNTER_OPACKETS:
5594                 return (stats->gptc);
5595         case IFCOUNTER_IBYTES:
5596                 return (stats->gorc);
5597         case IFCOUNTER_OBYTES:
5598                 return (stats->gotc);
5599         case IFCOUNTER_IMCASTS:
5600                 return (stats->mprc);
5601         case IFCOUNTER_OMCASTS:
5602                 return (stats->mptc);
5603         case IFCOUNTER_IERRORS:
5604                 return (adapter->dropped_pkts + stats->rxerrc +
5605                     stats->crcerrs + stats->algnerrc +
5606                     stats->ruc + stats->roc + stats->cexterr);
5607         case IFCOUNTER_OERRORS:
5608                 return (stats->ecol + stats->latecol +
5609                     adapter->watchdog_events);
5610         case IFCOUNTER_COLLISIONS:
5611                 return (stats->colc);
5612         case IFCOUNTER_IQDROPS:
5613                 return (stats->mpc);
5614 #ifndef IGB_LEGACY_TX
5615         case IFCOUNTER_OQDROPS:
5616                 rv = 0;
5617                 txr = adapter->tx_rings;
5618                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5619                         rv += txr->br->br_drops;
5620                 return (rv);
5621 #endif
5622         default:
5623                 return (if_get_counter_default(ifp, cnt));
5624         }
5625 }
5626
5627 /**********************************************************************
5628  *
5629  *  Update the board statistics counters.
5630  *
5631  **********************************************************************/
5632 static void
5633 igb_update_stats_counters(struct adapter *adapter)
5634 {
5635         struct e1000_hw         *hw = &adapter->hw;
5636         struct e1000_hw_stats   *stats;
5637
5638         /* 
5639         ** The virtual function adapter has only a
5640         ** small controlled set of stats, do only 
5641         ** those and return.
5642         */
5643         if (adapter->vf_ifp) {
5644                 igb_update_vf_stats_counters(adapter);
5645                 return;
5646         }
5647
5648         stats = (struct e1000_hw_stats  *)adapter->stats;
5649
5650         if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5651            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5652                 stats->symerrs +=
5653                     E1000_READ_REG(hw,E1000_SYMERRS);
5654                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5655         }
5656
5657         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5658         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5659         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5660         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5661
5662         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5663         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5664         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5665         stats->dc += E1000_READ_REG(hw, E1000_DC);
5666         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5667         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5668         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5669         /*
5670         ** For watchdog management we need to know if we have been
5671         ** paused during the last interval, so capture that here.
5672         */ 
5673         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5674         stats->xoffrxc += adapter->pause_frames;
5675         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5676         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5677         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5678         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5679         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5680         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5681         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5682         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5683         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5684         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5685         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5686         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5687
5688         /* For the 64-bit byte counters the low dword must be read first. */
5689         /* Both registers clear on the read of the high dword */
5690
5691         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5692             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5693         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5694             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5695
5696         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5697         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5698         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5699         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5700         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5701
5702         stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5703         stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5704         stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5705
5706         stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5707             ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5708         stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5709             ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5710
5711         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5712         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5713         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5714         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5715         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5716         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5717         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5718         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5719         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5720         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5721
5722         /* Interrupt Counts */
5723
5724         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5725         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5726         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5727         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5728         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5729         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5730         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5731         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5732         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5733
5734         /* Host to Card Statistics */
5735
5736         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5737         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5738         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5739         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5740         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5741         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5742         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5743         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5744             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5745         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5746             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5747         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5748         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5749         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5750
5751         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5752         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5753         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5754         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5755         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5756         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5757
5758         /* Driver specific counters */
5759         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5760         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5761         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5762         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5763         adapter->packet_buf_alloc_tx =
5764             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5765         adapter->packet_buf_alloc_rx =
5766             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5767 }
5768
5769
5770 /**********************************************************************
5771  *
5772  *  Initialize the VF board statistics counters.
5773  *
5774  **********************************************************************/
5775 static void
5776 igb_vf_init_stats(struct adapter *adapter)
5777 {
5778         struct e1000_hw *hw = &adapter->hw;
5779         struct e1000_vf_stats   *stats;
5780
5781         stats = (struct e1000_vf_stats  *)adapter->stats;
5782         if (stats == NULL)
5783                 return;
5784         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5785         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5786         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5787         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5788         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5789 }
5790  
5791 /**********************************************************************
5792  *
5793  *  Update the VF board statistics counters.
5794  *
5795  **********************************************************************/
5796 static void
5797 igb_update_vf_stats_counters(struct adapter *adapter)
5798 {
5799         struct e1000_hw *hw = &adapter->hw;
5800         struct e1000_vf_stats   *stats;
5801
5802         if (adapter->link_speed == 0)
5803                 return;
5804
5805         stats = (struct e1000_vf_stats  *)adapter->stats;
5806
5807         UPDATE_VF_REG(E1000_VFGPRC,
5808             stats->last_gprc, stats->gprc);
5809         UPDATE_VF_REG(E1000_VFGORC,
5810             stats->last_gorc, stats->gorc);
5811         UPDATE_VF_REG(E1000_VFGPTC,
5812             stats->last_gptc, stats->gptc);
5813         UPDATE_VF_REG(E1000_VFGOTC,
5814             stats->last_gotc, stats->gotc);
5815         UPDATE_VF_REG(E1000_VFMPRC,
5816             stats->last_mprc, stats->mprc);
5817 }
5818
5819 /* Export a single 32-bit register via a read-only sysctl. */
5820 static int
5821 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5822 {
5823         struct adapter *adapter;
5824         u_int val;
5825
5826         adapter = oidp->oid_arg1;
5827         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5828         return (sysctl_handle_int(oidp, &val, 0, req));
5829 }
5830
5831 /*
5832 **  Tuneable interrupt rate handler
5833 */
5834 static int
5835 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5836 {
5837         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5838         int                     error;
5839         u32                     reg, usec, rate;
5840                         
5841         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5842         usec = ((reg & 0x7FFC) >> 2);
5843         if (usec > 0)
5844                 rate = 1000000 / usec;
5845         else
5846                 rate = 0;
5847         error = sysctl_handle_int(oidp, &rate, 0, req);
5848         if (error || !req->newptr)
5849                 return error;
5850         return 0;
5851 }
5852
5853 /*
5854  * Add sysctl variables, one per statistic, to the system.
5855  */
5856 static void
5857 igb_add_hw_stats(struct adapter *adapter)
5858 {
5859         device_t dev = adapter->dev;
5860
5861         struct tx_ring *txr = adapter->tx_rings;
5862         struct rx_ring *rxr = adapter->rx_rings;
5863
5864         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5865         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5866         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5867         struct e1000_hw_stats *stats = adapter->stats;
5868
5869         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5870         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5871
5872 #define QUEUE_NAME_LEN 32
5873         char namebuf[QUEUE_NAME_LEN];
5874
5875         /* Driver Statistics */
5876         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5877                         CTLFLAG_RD, &adapter->dropped_pkts,
5878                         "Driver dropped packets");
5879         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 
5880                         CTLFLAG_RD, &adapter->link_irq,
5881                         "Link MSIX IRQ Handled");
5882         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5883                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5884                         "Defragmenting mbuf chain failed");
5885         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5886                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5887                         "Driver tx dma failure in xmit");
5888         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5889                         CTLFLAG_RD, &adapter->rx_overruns,
5890                         "RX overruns");
5891         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5892                         CTLFLAG_RD, &adapter->watchdog_events,
5893                         "Watchdog timeouts");
5894
5895         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5896                         CTLFLAG_RD, &adapter->device_control,
5897                         "Device Control Register");
5898         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5899                         CTLFLAG_RD, &adapter->rx_control,
5900                         "Receiver Control Register");
5901         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5902                         CTLFLAG_RD, &adapter->int_mask,
5903                         "Interrupt Mask");
5904         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5905                         CTLFLAG_RD, &adapter->eint_mask,
5906                         "Extended Interrupt Mask");
5907         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5908                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5909                         "Transmit Buffer Packet Allocation");
5910         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5911                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5912                         "Receive Buffer Packet Allocation");
5913         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5914                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5915                         "Flow Control High Watermark");
5916         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5917                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5918                         "Flow Control Low Watermark");
5919
5920         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5921                 struct lro_ctrl *lro = &rxr->lro;
5922
5923                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5924                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5925                                             CTLFLAG_RD, NULL, "Queue Name");
5926                 queue_list = SYSCTL_CHILDREN(queue_node);
5927
5928                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5929                                 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5930                                 sizeof(&adapter->queues[i]),
5931                                 igb_sysctl_interrupt_rate_handler,
5932                                 "IU", "Interrupt Rate");
5933
5934                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5935                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5936                                 igb_sysctl_reg_handler, "IU",
5937                                 "Transmit Descriptor Head");
5938                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5939                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5940                                 igb_sysctl_reg_handler, "IU",
5941                                 "Transmit Descriptor Tail");
5942                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5943                                 CTLFLAG_RD, &txr->no_desc_avail,
5944                                 "Queue Descriptors Unavailable");
5945                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5946                                 CTLFLAG_RD, &txr->total_packets,
5947                                 "Queue Packets Transmitted");
5948
5949                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5950                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5951                                 igb_sysctl_reg_handler, "IU",
5952                                 "Receive Descriptor Head");
5953                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5954                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5955                                 igb_sysctl_reg_handler, "IU",
5956                                 "Receive Descriptor Tail");
5957                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5958                                 CTLFLAG_RD, &rxr->rx_packets,
5959                                 "Queue Packets Received");
5960                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5961                                 CTLFLAG_RD, &rxr->rx_bytes,
5962                                 "Queue Bytes Received");
5963                 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
5964                                 CTLFLAG_RD, &lro->lro_queued, 0,
5965                                 "LRO Queued");
5966                 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
5967                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5968                                 "LRO Flushed");
5969         }
5970
5971         /* MAC stats get their own sub node */
5972
5973         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5974                                     CTLFLAG_RD, NULL, "MAC Statistics");
5975         stat_list = SYSCTL_CHILDREN(stat_node);
5976
5977         /*
5978         ** VF adapter has a very limited set of stats
5979         ** since its not managing the metal, so to speak.
5980         */
5981         if (adapter->vf_ifp) {
5982         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5983                         CTLFLAG_RD, &stats->gprc,
5984                         "Good Packets Received");
5985         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5986                         CTLFLAG_RD, &stats->gptc,
5987                         "Good Packets Transmitted");
5988         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5989                         CTLFLAG_RD, &stats->gorc, 
5990                         "Good Octets Received"); 
5991         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5992                         CTLFLAG_RD, &stats->gotc, 
5993                         "Good Octets Transmitted"); 
5994         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5995                         CTLFLAG_RD, &stats->mprc,
5996                         "Multicast Packets Received");
5997                 return;
5998         }
5999
6000         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
6001                         CTLFLAG_RD, &stats->ecol,
6002                         "Excessive collisions");
6003         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
6004                         CTLFLAG_RD, &stats->scc,
6005                         "Single collisions");
6006         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
6007                         CTLFLAG_RD, &stats->mcc,
6008                         "Multiple collisions");
6009         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
6010                         CTLFLAG_RD, &stats->latecol,
6011                         "Late collisions");
6012         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
6013                         CTLFLAG_RD, &stats->colc,
6014                         "Collision Count");
6015         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
6016                         CTLFLAG_RD, &stats->symerrs,
6017                         "Symbol Errors");
6018         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
6019                         CTLFLAG_RD, &stats->sec,
6020                         "Sequence Errors");
6021         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
6022                         CTLFLAG_RD, &stats->dc,
6023                         "Defer Count");
6024         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
6025                         CTLFLAG_RD, &stats->mpc,
6026                         "Missed Packets");
6027         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
6028                         CTLFLAG_RD, &stats->rlec,
6029                         "Receive Length Errors");
6030         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
6031                         CTLFLAG_RD, &stats->rnbc,
6032                         "Receive No Buffers");
6033         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
6034                         CTLFLAG_RD, &stats->ruc,
6035                         "Receive Undersize");
6036         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
6037                         CTLFLAG_RD, &stats->rfc,
6038                         "Fragmented Packets Received");
6039         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
6040                         CTLFLAG_RD, &stats->roc,
6041                         "Oversized Packets Received");
6042         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
6043                         CTLFLAG_RD, &stats->rjc,
6044                         "Recevied Jabber");
6045         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6046                         CTLFLAG_RD, &stats->rxerrc,
6047                         "Receive Errors");
6048         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6049                         CTLFLAG_RD, &stats->crcerrs,
6050                         "CRC errors");
6051         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6052                         CTLFLAG_RD, &stats->algnerrc,
6053                         "Alignment Errors");
6054         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6055                         CTLFLAG_RD, &stats->tncrs,
6056                         "Transmit with No CRS");
6057         /* On 82575 these are collision counts */
6058         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6059                         CTLFLAG_RD, &stats->cexterr,
6060                         "Collision/Carrier extension errors");
6061         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6062                         CTLFLAG_RD, &stats->xonrxc,
6063                         "XON Received");
6064         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6065                         CTLFLAG_RD, &stats->xontxc,
6066                         "XON Transmitted");
6067         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6068                         CTLFLAG_RD, &stats->xoffrxc,
6069                         "XOFF Received");
6070         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6071                         CTLFLAG_RD, &stats->xofftxc,
6072                         "XOFF Transmitted");
6073         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6074                         CTLFLAG_RD, &stats->fcruc,
6075                         "Unsupported Flow Control Received");
6076         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6077                         CTLFLAG_RD, &stats->mgprc,
6078                         "Management Packets Received");
6079         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6080                         CTLFLAG_RD, &stats->mgpdc,
6081                         "Management Packets Dropped");
6082         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6083                         CTLFLAG_RD, &stats->mgptc,
6084                         "Management Packets Transmitted");
6085         /* Packet Reception Stats */
6086         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6087                         CTLFLAG_RD, &stats->tpr,
6088                         "Total Packets Received");
6089         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6090                         CTLFLAG_RD, &stats->gprc,
6091                         "Good Packets Received");
6092         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6093                         CTLFLAG_RD, &stats->bprc,
6094                         "Broadcast Packets Received");
6095         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6096                         CTLFLAG_RD, &stats->mprc,
6097                         "Multicast Packets Received");
6098         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6099                         CTLFLAG_RD, &stats->prc64,
6100                         "64 byte frames received");
6101         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6102                         CTLFLAG_RD, &stats->prc127,
6103                         "65-127 byte frames received");
6104         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6105                         CTLFLAG_RD, &stats->prc255,
6106                         "128-255 byte frames received");
6107         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6108                         CTLFLAG_RD, &stats->prc511,
6109                         "256-511 byte frames received");
6110         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6111                         CTLFLAG_RD, &stats->prc1023,
6112                         "512-1023 byte frames received");
6113         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6114                         CTLFLAG_RD, &stats->prc1522,
6115                         "1023-1522 byte frames received");
6116         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
6117                         CTLFLAG_RD, &stats->gorc, 
6118                         "Good Octets Received");
6119         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 
6120                         CTLFLAG_RD, &stats->tor, 
6121                         "Total Octets Received");
6122
6123         /* Packet Transmission Stats */
6124         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
6125                         CTLFLAG_RD, &stats->gotc, 
6126                         "Good Octets Transmitted"); 
6127         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 
6128                         CTLFLAG_RD, &stats->tot, 
6129                         "Total Octets Transmitted");
6130         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6131                         CTLFLAG_RD, &stats->tpt,
6132                         "Total Packets Transmitted");
6133         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6134                         CTLFLAG_RD, &stats->gptc,
6135                         "Good Packets Transmitted");
6136         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6137                         CTLFLAG_RD, &stats->bptc,
6138                         "Broadcast Packets Transmitted");
6139         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6140                         CTLFLAG_RD, &stats->mptc,
6141                         "Multicast Packets Transmitted");
6142         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6143                         CTLFLAG_RD, &stats->ptc64,
6144                         "64 byte frames transmitted");
6145         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6146                         CTLFLAG_RD, &stats->ptc127,
6147                         "65-127 byte frames transmitted");
6148         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6149                         CTLFLAG_RD, &stats->ptc255,
6150                         "128-255 byte frames transmitted");
6151         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6152                         CTLFLAG_RD, &stats->ptc511,
6153                         "256-511 byte frames transmitted");
6154         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6155                         CTLFLAG_RD, &stats->ptc1023,
6156                         "512-1023 byte frames transmitted");
6157         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6158                         CTLFLAG_RD, &stats->ptc1522,
6159                         "1024-1522 byte frames transmitted");
6160         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6161                         CTLFLAG_RD, &stats->tsctc,
6162                         "TSO Contexts Transmitted");
6163         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6164                         CTLFLAG_RD, &stats->tsctfc,
6165                         "TSO Contexts Failed");
6166
6167
6168         /* Interrupt Stats */
6169
6170         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
6171                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
6172         int_list = SYSCTL_CHILDREN(int_node);
6173
6174         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6175                         CTLFLAG_RD, &stats->iac,
6176                         "Interrupt Assertion Count");
6177
6178         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6179                         CTLFLAG_RD, &stats->icrxptc,
6180                         "Interrupt Cause Rx Pkt Timer Expire Count");
6181
6182         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6183                         CTLFLAG_RD, &stats->icrxatc,
6184                         "Interrupt Cause Rx Abs Timer Expire Count");
6185
6186         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6187                         CTLFLAG_RD, &stats->ictxptc,
6188                         "Interrupt Cause Tx Pkt Timer Expire Count");
6189
6190         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6191                         CTLFLAG_RD, &stats->ictxatc,
6192                         "Interrupt Cause Tx Abs Timer Expire Count");
6193
6194         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6195                         CTLFLAG_RD, &stats->ictxqec,
6196                         "Interrupt Cause Tx Queue Empty Count");
6197
6198         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6199                         CTLFLAG_RD, &stats->ictxqmtc,
6200                         "Interrupt Cause Tx Queue Min Thresh Count");
6201
6202         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6203                         CTLFLAG_RD, &stats->icrxdmtc,
6204                         "Interrupt Cause Rx Desc Min Thresh Count");
6205
6206         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6207                         CTLFLAG_RD, &stats->icrxoc,
6208                         "Interrupt Cause Receiver Overrun Count");
6209
6210         /* Host to Card Stats */
6211
6212         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
6213                                     CTLFLAG_RD, NULL, 
6214                                     "Host to Card Statistics");
6215
6216         host_list = SYSCTL_CHILDREN(host_node);
6217
6218         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6219                         CTLFLAG_RD, &stats->cbtmpc,
6220                         "Circuit Breaker Tx Packet Count");
6221
6222         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6223                         CTLFLAG_RD, &stats->htdpmc,
6224                         "Host Transmit Discarded Packets");
6225
6226         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6227                         CTLFLAG_RD, &stats->rpthc,
6228                         "Rx Packets To Host");
6229
6230         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6231                         CTLFLAG_RD, &stats->cbrmpc,
6232                         "Circuit Breaker Rx Packet Count");
6233
6234         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6235                         CTLFLAG_RD, &stats->cbrdpc,
6236                         "Circuit Breaker Rx Dropped Count");
6237
6238         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6239                         CTLFLAG_RD, &stats->hgptc,
6240                         "Host Good Packets Tx Count");
6241
6242         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6243                         CTLFLAG_RD, &stats->htcbdpc,
6244                         "Host Tx Circuit Breaker Dropped Count");
6245
6246         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6247                         CTLFLAG_RD, &stats->hgorc,
6248                         "Host Good Octets Received Count");
6249
6250         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6251                         CTLFLAG_RD, &stats->hgotc,
6252                         "Host Good Octets Transmit Count");
6253
6254         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6255                         CTLFLAG_RD, &stats->lenerrs,
6256                         "Length Errors");
6257
6258         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6259                         CTLFLAG_RD, &stats->scvpc,
6260                         "SerDes/SGMII Code Violation Pkt Count");
6261
6262         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6263                         CTLFLAG_RD, &stats->hrmpc,
6264                         "Header Redirection Missed Packet Count");
6265 }
6266
6267
6268 /**********************************************************************
6269  *
6270  *  This routine provides a way to dump out the adapter eeprom,
6271  *  often a useful debug/service tool. This only dumps the first
6272  *  32 words, stuff that matters is in that extent.
6273  *
6274  **********************************************************************/
6275 static int
6276 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6277 {
6278         struct adapter *adapter;
6279         int error;
6280         int result;
6281
6282         result = -1;
6283         error = sysctl_handle_int(oidp, &result, 0, req);
6284
6285         if (error || !req->newptr)
6286                 return (error);
6287
6288         /*
6289          * This value will cause a hex dump of the
6290          * first 32 16-bit words of the EEPROM to
6291          * the screen.
6292          */
6293         if (result == 1) {
6294                 adapter = (struct adapter *)arg1;
6295                 igb_print_nvm_info(adapter);
6296         }
6297
6298         return (error);
6299 }
6300
6301 static void
6302 igb_print_nvm_info(struct adapter *adapter)
6303 {
6304         u16     eeprom_data;
6305         int     i, j, row = 0;
6306
6307         /* Its a bit crude, but it gets the job done */
6308         printf("\nInterface EEPROM Dump:\n");
6309         printf("Offset\n0x0000  ");
6310         for (i = 0, j = 0; i < 32; i++, j++) {
6311                 if (j == 8) { /* Make the offset block */
6312                         j = 0; ++row;
6313                         printf("\n0x00%x0  ",row);
6314                 }
6315                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6316                 printf("%04x ", eeprom_data);
6317         }
6318         printf("\n");
6319 }
6320
6321 static void
6322 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6323         const char *description, int *limit, int value)
6324 {
6325         *limit = value;
6326         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6327             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6328             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6329 }
6330
6331 /*
6332 ** Set flow control using sysctl:
6333 ** Flow control values:
6334 **      0 - off
6335 **      1 - rx pause
6336 **      2 - tx pause
6337 **      3 - full
6338 */
6339 static int
6340 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6341 {
6342         int             error;
6343         static int      input = 3; /* default is full */
6344         struct adapter  *adapter = (struct adapter *) arg1;
6345
6346         error = sysctl_handle_int(oidp, &input, 0, req);
6347
6348         if ((error) || (req->newptr == NULL))
6349                 return (error);
6350
6351         switch (input) {
6352                 case e1000_fc_rx_pause:
6353                 case e1000_fc_tx_pause:
6354                 case e1000_fc_full:
6355                 case e1000_fc_none:
6356                         adapter->hw.fc.requested_mode = input;
6357                         adapter->fc = input;
6358                         break;
6359                 default:
6360                         /* Do nothing */
6361                         return (error);
6362         }
6363
6364         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6365         e1000_force_mac_fc(&adapter->hw);
6366         /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6367         return (error);
6368 }
6369
6370 /*
6371 ** Manage DMA Coalesce:
6372 ** Control values:
6373 **      0/1 - off/on
6374 **      Legal timer values are:
6375 **      250,500,1000-10000 in thousands
6376 */
6377 static int
6378 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6379 {
6380         struct adapter *adapter = (struct adapter *) arg1;
6381         int             error;
6382
6383         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6384
6385         if ((error) || (req->newptr == NULL))
6386                 return (error);
6387
6388         switch (adapter->dmac) {
6389                 case 0:
6390                         /* Disabling */
6391                         break;
6392                 case 1: /* Just enable and use default */
6393                         adapter->dmac = 1000;
6394                         break;
6395                 case 250:
6396                 case 500:
6397                 case 1000:
6398                 case 2000:
6399                 case 3000:
6400                 case 4000:
6401                 case 5000:
6402                 case 6000:
6403                 case 7000:
6404                 case 8000:
6405                 case 9000:
6406                 case 10000:
6407                         /* Legal values - allow */
6408                         break;
6409                 default:
6410                         /* Do nothing, illegal value */
6411                         adapter->dmac = 0;
6412                         return (EINVAL);
6413         }
6414         /* Reinit the interface */
6415         igb_init(adapter);
6416         return (error);
6417 }
6418
6419 /*
6420 ** Manage Energy Efficient Ethernet:
6421 ** Control values:
6422 **     0/1 - enabled/disabled
6423 */
6424 static int
6425 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6426 {
6427         struct adapter  *adapter = (struct adapter *) arg1;
6428         int             error, value;
6429
6430         value = adapter->hw.dev_spec._82575.eee_disable;
6431         error = sysctl_handle_int(oidp, &value, 0, req);
6432         if (error || req->newptr == NULL)
6433                 return (error);
6434         IGB_CORE_LOCK(adapter);
6435         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6436         igb_init_locked(adapter);
6437         IGB_CORE_UNLOCK(adapter);
6438         return (0);
6439 }