]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_igb.c
MFH
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "opt_rss.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #include "opt_altq.h"
43 #endif
44
45 #include "if_igb.h"
46
47 /*********************************************************************
48  *  Driver version:
49  *********************************************************************/
50 char igb_driver_version[] = "2.5.3-k";
51
52
53 /*********************************************************************
54  *  PCI Device ID Table
55  *
56  *  Used by probe to select devices to load on
57  *  Last field stores an index into e1000_strings
58  *  Last entry must be all 0s
59  *
60  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61  *********************************************************************/
62
63 static igb_vendor_info_t igb_vendor_info_array[] =
64 {
65         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
72         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
79         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
81         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
88         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,  0, 0, 0},
89         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
90         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,  0, 0, 0},
91         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
93         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,  0, 0, 0},
98         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
99         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,  0, 0, 0},
100         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
101         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,  0, 0, 0},
104         /* required last entry */
105         {0, 0, 0, 0, 0}
106 };
107
108 /*********************************************************************
109  *  Table of branding strings for all supported NICs.
110  *********************************************************************/
111
112 static char *igb_strings[] = {
113         "Intel(R) PRO/1000 Network Connection"
114 };
115
116 /*********************************************************************
117  *  Function prototypes
118  *********************************************************************/
119 static int      igb_probe(device_t);
120 static int      igb_attach(device_t);
121 static int      igb_detach(device_t);
122 static int      igb_shutdown(device_t);
123 static int      igb_suspend(device_t);
124 static int      igb_resume(device_t);
125 #ifndef IGB_LEGACY_TX
126 static int      igb_mq_start(struct ifnet *, struct mbuf *);
127 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128 static void     igb_qflush(struct ifnet *);
129 static void     igb_deferred_mq_start(void *, int);
130 #else
131 static void     igb_start(struct ifnet *);
132 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133 #endif
134 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
135 static uint64_t igb_get_counter(if_t, ift_counter);
136 static void     igb_init(void *);
137 static void     igb_init_locked(struct adapter *);
138 static void     igb_stop(void *);
139 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
140 static int      igb_media_change(struct ifnet *);
141 static void     igb_identify_hardware(struct adapter *);
142 static int      igb_allocate_pci_resources(struct adapter *);
143 static int      igb_allocate_msix(struct adapter *);
144 static int      igb_allocate_legacy(struct adapter *);
145 static int      igb_setup_msix(struct adapter *);
146 static void     igb_free_pci_resources(struct adapter *);
147 static void     igb_local_timer(void *);
148 static void     igb_reset(struct adapter *);
149 static int      igb_setup_interface(device_t, struct adapter *);
150 static int      igb_allocate_queues(struct adapter *);
151 static void     igb_configure_queues(struct adapter *);
152
153 static int      igb_allocate_transmit_buffers(struct tx_ring *);
154 static void     igb_setup_transmit_structures(struct adapter *);
155 static void     igb_setup_transmit_ring(struct tx_ring *);
156 static void     igb_initialize_transmit_units(struct adapter *);
157 static void     igb_free_transmit_structures(struct adapter *);
158 static void     igb_free_transmit_buffers(struct tx_ring *);
159
160 static int      igb_allocate_receive_buffers(struct rx_ring *);
161 static int      igb_setup_receive_structures(struct adapter *);
162 static int      igb_setup_receive_ring(struct rx_ring *);
163 static void     igb_initialize_receive_units(struct adapter *);
164 static void     igb_free_receive_structures(struct adapter *);
165 static void     igb_free_receive_buffers(struct rx_ring *);
166 static void     igb_free_receive_ring(struct rx_ring *);
167
168 static void     igb_enable_intr(struct adapter *);
169 static void     igb_disable_intr(struct adapter *);
170 static void     igb_update_stats_counters(struct adapter *);
171 static bool     igb_txeof(struct tx_ring *);
172
173 static __inline void igb_rx_discard(struct rx_ring *, int);
174 static __inline void igb_rx_input(struct rx_ring *,
175                     struct ifnet *, struct mbuf *, u32);
176
177 static bool     igb_rxeof(struct igb_queue *, int, int *);
178 static void     igb_rx_checksum(u32, struct mbuf *, u32);
179 static int      igb_tx_ctx_setup(struct tx_ring *,
180                     struct mbuf *, u32 *, u32 *);
181 static int      igb_tso_setup(struct tx_ring *,
182                     struct mbuf *, u32 *, u32 *);
183 static void     igb_set_promisc(struct adapter *);
184 static void     igb_disable_promisc(struct adapter *);
185 static void     igb_set_multi(struct adapter *);
186 static void     igb_update_link_status(struct adapter *);
187 static void     igb_refresh_mbufs(struct rx_ring *, int);
188
189 static void     igb_register_vlan(void *, struct ifnet *, u16);
190 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
191 static void     igb_setup_vlan_hw_support(struct adapter *);
192
193 static int      igb_xmit(struct tx_ring *, struct mbuf **);
194 static int      igb_dma_malloc(struct adapter *, bus_size_t,
195                     struct igb_dma_alloc *, int);
196 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198 static void     igb_print_nvm_info(struct adapter *);
199 static int      igb_is_valid_ether_addr(u8 *);
200 static void     igb_add_hw_stats(struct adapter *);
201
202 static void     igb_vf_init_stats(struct adapter *);
203 static void     igb_update_vf_stats_counters(struct adapter *);
204
205 /* Management and WOL Support */
206 static void     igb_init_manageability(struct adapter *);
207 static void     igb_release_manageability(struct adapter *);
208 static void     igb_get_hw_control(struct adapter *);
209 static void     igb_release_hw_control(struct adapter *);
210 static void     igb_enable_wakeup(device_t);
211 static void     igb_led_func(void *, int);
212
213 static int      igb_irq_fast(void *);
214 static void     igb_msix_que(void *);
215 static void     igb_msix_link(void *);
216 static void     igb_handle_que(void *context, int pending);
217 static void     igb_handle_link(void *context, int pending);
218 static void     igb_handle_link_locked(struct adapter *);
219
220 static void     igb_set_sysctl_value(struct adapter *, const char *,
221                     const char *, int *, int);
222 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226 #ifdef DEVICE_POLLING
227 static poll_handler_t igb_poll;
228 #endif /* POLLING */
229
230 /*********************************************************************
231  *  FreeBSD Device Interface Entry Points
232  *********************************************************************/
233
234 static device_method_t igb_methods[] = {
235         /* Device interface */
236         DEVMETHOD(device_probe, igb_probe),
237         DEVMETHOD(device_attach, igb_attach),
238         DEVMETHOD(device_detach, igb_detach),
239         DEVMETHOD(device_shutdown, igb_shutdown),
240         DEVMETHOD(device_suspend, igb_suspend),
241         DEVMETHOD(device_resume, igb_resume),
242         DEVMETHOD_END
243 };
244
245 static driver_t igb_driver = {
246         "igb", igb_methods, sizeof(struct adapter),
247 };
248
249 static devclass_t igb_devclass;
250 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251 MODULE_DEPEND(igb, pci, 1, 1, 1);
252 MODULE_DEPEND(igb, ether, 1, 1, 1);
253 #ifdef DEV_NETMAP
254 MODULE_DEPEND(igb, netmap, 1, 1, 1);
255 #endif /* DEV_NETMAP */
256
257 /*********************************************************************
258  *  Tunable default values.
259  *********************************************************************/
260
261 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263 /* Descriptor defaults */
264 static int igb_rxd = IGB_DEFAULT_RXD;
265 static int igb_txd = IGB_DEFAULT_TXD;
266 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267     "Number of receive descriptors per queue");
268 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269     "Number of transmit descriptors per queue");
270
271 /*
272 ** AIM: Adaptive Interrupt Moderation
273 ** which means that the interrupt rate
274 ** is varied over time based on the
275 ** traffic for that interrupt vector
276 */
277 static int igb_enable_aim = TRUE;
278 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279     "Enable adaptive interrupt moderation");
280
281 /*
282  * MSIX should be the default for best performance,
283  * but this allows it to be forced off for testing.
284  */         
285 static int igb_enable_msix = 1;
286 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287     "Enable MSI-X interrupts");
288
289 /*
290 ** Tuneable Interrupt rate
291 */
292 static int igb_max_interrupt_rate = 8000;
293 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296 #ifndef IGB_LEGACY_TX
297 /*
298 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
299 */
300 static int igb_buf_ring_size = IGB_BR_SIZE;
301 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302     &igb_buf_ring_size, 0, "Size of the bufring");
303 #endif
304
305 /*
306 ** Header split causes the packet header to
307 ** be dma'd to a seperate mbuf from the payload.
308 ** this can have memory alignment benefits. But
309 ** another plus is that small packets often fit
310 ** into the header and thus use no cluster. Its
311 ** a very workload dependent type feature.
312 */
313 static int igb_header_split = FALSE;
314 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315     "Enable receive mbuf header split");
316
317 /*
318 ** This will autoconfigure based on the
319 ** number of CPUs and max supported
320 ** MSIX messages if left at 0.
321 */
322 static int igb_num_queues = 0;
323 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324     "Number of queues to configure, 0 indicates autoconfigure");
325
326 /*
327 ** Global variable to store last used CPU when binding queues
328 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
329 ** queue is bound to a cpu.
330 */
331 static int igb_last_bind_cpu = -1;
332
333 /* How many packets rxeof tries to clean at a time */
334 static int igb_rx_process_limit = 100;
335 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336     &igb_rx_process_limit, 0,
337     "Maximum number of received packets to process at a time, -1 means unlimited");
338
339 /* How many packets txeof tries to clean at a time */
340 static int igb_tx_process_limit = -1;
341 SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342     &igb_tx_process_limit, 0,
343     "Maximum number of sent packets to process at a time, -1 means unlimited");
344
345 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
346 #include <dev/netmap/if_igb_netmap.h>
347 #endif /* DEV_NETMAP */
348 /*********************************************************************
349  *  Device identification routine
350  *
351  *  igb_probe determines if the driver should be loaded on
352  *  adapter based on PCI vendor/device id of the adapter.
353  *
354  *  return BUS_PROBE_DEFAULT on success, positive on failure
355  *********************************************************************/
356
357 static int
358 igb_probe(device_t dev)
359 {
360         char            adapter_name[256];
361         uint16_t        pci_vendor_id = 0;
362         uint16_t        pci_device_id = 0;
363         uint16_t        pci_subvendor_id = 0;
364         uint16_t        pci_subdevice_id = 0;
365         igb_vendor_info_t *ent;
366
367         INIT_DEBUGOUT("igb_probe: begin");
368
369         pci_vendor_id = pci_get_vendor(dev);
370         if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371                 return (ENXIO);
372
373         pci_device_id = pci_get_device(dev);
374         pci_subvendor_id = pci_get_subvendor(dev);
375         pci_subdevice_id = pci_get_subdevice(dev);
376
377         ent = igb_vendor_info_array;
378         while (ent->vendor_id != 0) {
379                 if ((pci_vendor_id == ent->vendor_id) &&
380                     (pci_device_id == ent->device_id) &&
381
382                     ((pci_subvendor_id == ent->subvendor_id) ||
383                     (ent->subvendor_id == 0)) &&
384
385                     ((pci_subdevice_id == ent->subdevice_id) ||
386                     (ent->subdevice_id == 0))) {
387                         sprintf(adapter_name, "%s, Version - %s",
388                                 igb_strings[ent->index],
389                                 igb_driver_version);
390                         device_set_desc_copy(dev, adapter_name);
391                         return (BUS_PROBE_DEFAULT);
392                 }
393                 ent++;
394         }
395         return (ENXIO);
396 }
397
398 /*********************************************************************
399  *  Device initialization routine
400  *
401  *  The attach entry point is called when the driver is being loaded.
402  *  This routine identifies the type of hardware, allocates all resources
403  *  and initializes the hardware.
404  *
405  *  return 0 on success, positive on failure
406  *********************************************************************/
407
408 static int
409 igb_attach(device_t dev)
410 {
411         struct adapter  *adapter;
412         int             error = 0;
413         u16             eeprom_data;
414
415         INIT_DEBUGOUT("igb_attach: begin");
416
417         if (resource_disabled("igb", device_get_unit(dev))) {
418                 device_printf(dev, "Disabled by device hint\n");
419                 return (ENXIO);
420         }
421
422         adapter = device_get_softc(dev);
423         adapter->dev = adapter->osdep.dev = dev;
424         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425
426         /* SYSCTLs */
427         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430             igb_sysctl_nvm_info, "I", "NVM Information");
431
432         igb_set_sysctl_value(adapter, "enable_aim",
433             "Interrupt Moderation", &adapter->enable_aim,
434             igb_enable_aim);
435
436         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440
441         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442
443         /* Determine hardware and mac info */
444         igb_identify_hardware(adapter);
445
446         /* Setup PCI resources */
447         if (igb_allocate_pci_resources(adapter)) {
448                 device_printf(dev, "Allocation of PCI resources failed\n");
449                 error = ENXIO;
450                 goto err_pci;
451         }
452
453         /* Do Shared Code initialization */
454         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455                 device_printf(dev, "Setup of Shared code failed\n");
456                 error = ENXIO;
457                 goto err_pci;
458         }
459
460         e1000_get_bus_info(&adapter->hw);
461
462         /* Sysctls for limiting the amount of work done in the taskqueues */
463         igb_set_sysctl_value(adapter, "rx_processing_limit",
464             "max number of rx packets to process",
465             &adapter->rx_process_limit, igb_rx_process_limit);
466
467         igb_set_sysctl_value(adapter, "tx_processing_limit",
468             "max number of tx packets to process",
469             &adapter->tx_process_limit, igb_tx_process_limit);
470
471         /*
472          * Validate number of transmit and receive descriptors. It
473          * must not exceed hardware maximum, and must be multiple
474          * of E1000_DBA_ALIGN.
475          */
476         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479                     IGB_DEFAULT_TXD, igb_txd);
480                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
481         } else
482                 adapter->num_tx_desc = igb_txd;
483         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486                     IGB_DEFAULT_RXD, igb_rxd);
487                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
488         } else
489                 adapter->num_rx_desc = igb_rxd;
490
491         adapter->hw.mac.autoneg = DO_AUTO_NEG;
492         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494
495         /* Copper options */
496         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
498                 adapter->hw.phy.disable_polarity_correction = FALSE;
499                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500         }
501
502         /*
503          * Set the frame limits assuming
504          * standard ethernet sized frames.
505          */
506         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507
508         /*
509         ** Allocate and Setup Queues
510         */
511         if (igb_allocate_queues(adapter)) {
512                 error = ENOMEM;
513                 goto err_pci;
514         }
515
516         /* Allocate the appropriate stats memory */
517         if (adapter->vf_ifp) {
518                 adapter->stats =
519                     (struct e1000_vf_stats *)malloc(sizeof \
520                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521                 igb_vf_init_stats(adapter);
522         } else
523                 adapter->stats =
524                     (struct e1000_hw_stats *)malloc(sizeof \
525                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526         if (adapter->stats == NULL) {
527                 device_printf(dev, "Can not allocate stats memory\n");
528                 error = ENOMEM;
529                 goto err_late;
530         }
531
532         /* Allocate multicast array memory. */
533         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535         if (adapter->mta == NULL) {
536                 device_printf(dev, "Can not allocate multicast setup array\n");
537                 error = ENOMEM;
538                 goto err_late;
539         }
540
541         /* Some adapter-specific advanced features */
542         if (adapter->hw.mac.type >= e1000_i350) {
543                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550                     adapter, 0, igb_sysctl_eee, "I",
551                     "Disable Energy Efficient Ethernet");
552                 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553                         if (adapter->hw.mac.type == e1000_i354)
554                                 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
555                         else
556                                 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
557                 }
558         }
559
560         /*
561         ** Start from a known state, this is
562         ** important in reading the nvm and
563         ** mac from that.
564         */
565         e1000_reset_hw(&adapter->hw);
566
567         /* Make sure we have a good EEPROM before we read from it */
568         if (((adapter->hw.mac.type != e1000_i210) &&
569             (adapter->hw.mac.type != e1000_i211)) &&
570             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571                 /*
572                 ** Some PCI-E parts fail the first check due to
573                 ** the link being in sleep state, call it again,
574                 ** if it fails a second time its a real issue.
575                 */
576                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577                         device_printf(dev,
578                             "The EEPROM Checksum Is Not Valid\n");
579                         error = EIO;
580                         goto err_late;
581                 }
582         }
583
584         /*
585         ** Copy the permanent MAC address out of the EEPROM
586         */
587         if (e1000_read_mac_addr(&adapter->hw) < 0) {
588                 device_printf(dev, "EEPROM read error while reading MAC"
589                     " address\n");
590                 error = EIO;
591                 goto err_late;
592         }
593         /* Check its sanity */
594         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595                 device_printf(dev, "Invalid MAC address\n");
596                 error = EIO;
597                 goto err_late;
598         }
599
600         /* Setup OS specific network interface */
601         if (igb_setup_interface(dev, adapter) != 0)
602                 goto err_late;
603
604         /* Now get a good starting state */
605         igb_reset(adapter);
606
607         /* Initialize statistics */
608         igb_update_stats_counters(adapter);
609
610         adapter->hw.mac.get_link_status = 1;
611         igb_update_link_status(adapter);
612
613         /* Indicate SOL/IDER usage */
614         if (e1000_check_reset_block(&adapter->hw))
615                 device_printf(dev,
616                     "PHY reset is blocked due to SOL/IDER session.\n");
617
618         /* Determine if we have to control management hardware */
619         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620
621         /*
622          * Setup Wake-on-Lan
623          */
624         /* APME bit in EEPROM is mapped to WUC.APME */
625         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626         if (eeprom_data)
627                 adapter->wol = E1000_WUFC_MAG;
628
629         /* Register for VLAN events */
630         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634
635         igb_add_hw_stats(adapter);
636
637         /* Tell the stack that the interface is not active */
638         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
640
641         adapter->led_dev = led_create(igb_led_func, adapter,
642             device_get_nameunit(dev));
643
644         /* 
645         ** Configure Interrupts
646         */
647         if ((adapter->msix > 1) && (igb_enable_msix))
648                 error = igb_allocate_msix(adapter);
649         else /* MSI or Legacy */
650                 error = igb_allocate_legacy(adapter);
651         if (error)
652                 goto err_late;
653
654 #ifdef DEV_NETMAP
655         igb_netmap_attach(adapter);
656 #endif /* DEV_NETMAP */
657         INIT_DEBUGOUT("igb_attach: end");
658
659         return (0);
660
661 err_late:
662         igb_detach(dev);
663         igb_free_transmit_structures(adapter);
664         igb_free_receive_structures(adapter);
665         igb_release_hw_control(adapter);
666 err_pci:
667         igb_free_pci_resources(adapter);
668         if (adapter->ifp != NULL)
669                 if_free(adapter->ifp);
670         free(adapter->mta, M_DEVBUF);
671         IGB_CORE_LOCK_DESTROY(adapter);
672
673         return (error);
674 }
675
676 /*********************************************************************
677  *  Device removal routine
678  *
679  *  The detach entry point is called when the driver is being removed.
680  *  This routine stops the adapter and deallocates all the resources
681  *  that were allocated for driver operation.
682  *
683  *  return 0 on success, positive on failure
684  *********************************************************************/
685
686 static int
687 igb_detach(device_t dev)
688 {
689         struct adapter  *adapter = device_get_softc(dev);
690         struct ifnet    *ifp = adapter->ifp;
691
692         INIT_DEBUGOUT("igb_detach: begin");
693
694         /* Make sure VLANS are not using driver */
695         if (adapter->ifp->if_vlantrunk != NULL) {
696                 device_printf(dev,"Vlan in use, detach first\n");
697                 return (EBUSY);
698         }
699
700         ether_ifdetach(adapter->ifp);
701
702         if (adapter->led_dev != NULL)
703                 led_destroy(adapter->led_dev);
704
705 #ifdef DEVICE_POLLING
706         if (ifp->if_capenable & IFCAP_POLLING)
707                 ether_poll_deregister(ifp);
708 #endif
709
710         IGB_CORE_LOCK(adapter);
711         adapter->in_detach = 1;
712         igb_stop(adapter);
713         IGB_CORE_UNLOCK(adapter);
714
715         e1000_phy_hw_reset(&adapter->hw);
716
717         /* Give control back to firmware */
718         igb_release_manageability(adapter);
719         igb_release_hw_control(adapter);
720
721         if (adapter->wol) {
722                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
723                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
724                 igb_enable_wakeup(dev);
725         }
726
727         /* Unregister VLAN events */
728         if (adapter->vlan_attach != NULL)
729                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730         if (adapter->vlan_detach != NULL)
731                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
732
733         callout_drain(&adapter->timer);
734
735 #ifdef DEV_NETMAP
736         netmap_detach(adapter->ifp);
737 #endif /* DEV_NETMAP */
738         igb_free_pci_resources(adapter);
739         bus_generic_detach(dev);
740         if_free(ifp);
741
742         igb_free_transmit_structures(adapter);
743         igb_free_receive_structures(adapter);
744         if (adapter->mta != NULL)
745                 free(adapter->mta, M_DEVBUF);
746
747         IGB_CORE_LOCK_DESTROY(adapter);
748
749         return (0);
750 }
751
752 /*********************************************************************
753  *
754  *  Shutdown entry point
755  *
756  **********************************************************************/
757
758 static int
759 igb_shutdown(device_t dev)
760 {
761         return igb_suspend(dev);
762 }
763
764 /*
765  * Suspend/resume device methods.
766  */
767 static int
768 igb_suspend(device_t dev)
769 {
770         struct adapter *adapter = device_get_softc(dev);
771
772         IGB_CORE_LOCK(adapter);
773
774         igb_stop(adapter);
775
776         igb_release_manageability(adapter);
777         igb_release_hw_control(adapter);
778
779         if (adapter->wol) {
780                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
781                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
782                 igb_enable_wakeup(dev);
783         }
784
785         IGB_CORE_UNLOCK(adapter);
786
787         return bus_generic_suspend(dev);
788 }
789
790 static int
791 igb_resume(device_t dev)
792 {
793         struct adapter *adapter = device_get_softc(dev);
794         struct tx_ring  *txr = adapter->tx_rings;
795         struct ifnet *ifp = adapter->ifp;
796
797         IGB_CORE_LOCK(adapter);
798         igb_init_locked(adapter);
799         igb_init_manageability(adapter);
800
801         if ((ifp->if_flags & IFF_UP) &&
802             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
803                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
804                         IGB_TX_LOCK(txr);
805 #ifndef IGB_LEGACY_TX
806                         /* Process the stack queue only if not depleted */
807                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
808                             !drbr_empty(ifp, txr->br))
809                                 igb_mq_start_locked(ifp, txr);
810 #else
811                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
812                                 igb_start_locked(txr, ifp);
813 #endif
814                         IGB_TX_UNLOCK(txr);
815                 }
816         }
817         IGB_CORE_UNLOCK(adapter);
818
819         return bus_generic_resume(dev);
820 }
821
822
823 #ifdef IGB_LEGACY_TX
824
825 /*********************************************************************
826  *  Transmit entry point
827  *
828  *  igb_start is called by the stack to initiate a transmit.
829  *  The driver will remain in this routine as long as there are
830  *  packets to transmit and transmit resources are available.
831  *  In case resources are not available stack is notified and
832  *  the packet is requeued.
833  **********************************************************************/
834
835 static void
836 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
837 {
838         struct adapter  *adapter = ifp->if_softc;
839         struct mbuf     *m_head;
840
841         IGB_TX_LOCK_ASSERT(txr);
842
843         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
844             IFF_DRV_RUNNING)
845                 return;
846         if (!adapter->link_active)
847                 return;
848
849         /* Call cleanup if number of TX descriptors low */
850         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
851                 igb_txeof(txr);
852
853         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
854                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
855                         txr->queue_status |= IGB_QUEUE_DEPLETED;
856                         break;
857                 }
858                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
859                 if (m_head == NULL)
860                         break;
861                 /*
862                  *  Encapsulation can modify our pointer, and or make it
863                  *  NULL on failure.  In that event, we can't requeue.
864                  */
865                 if (igb_xmit(txr, &m_head)) {
866                         if (m_head != NULL)
867                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
868                         if (txr->tx_avail <= IGB_MAX_SCATTER)
869                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
870                         break;
871                 }
872
873                 /* Send a copy of the frame to the BPF listener */
874                 ETHER_BPF_MTAP(ifp, m_head);
875
876                 /* Set watchdog on */
877                 txr->watchdog_time = ticks;
878                 txr->queue_status |= IGB_QUEUE_WORKING;
879         }
880 }
881  
882 /*
883  * Legacy TX driver routine, called from the
884  * stack, always uses tx[0], and spins for it.
885  * Should not be used with multiqueue tx
886  */
887 static void
888 igb_start(struct ifnet *ifp)
889 {
890         struct adapter  *adapter = ifp->if_softc;
891         struct tx_ring  *txr = adapter->tx_rings;
892
893         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
894                 IGB_TX_LOCK(txr);
895                 igb_start_locked(txr, ifp);
896                 IGB_TX_UNLOCK(txr);
897         }
898         return;
899 }
900
901 #else /* ~IGB_LEGACY_TX */
902
903 /*
904 ** Multiqueue Transmit Entry:
905 **  quick turnaround to the stack
906 **
907 */
908 static int
909 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
910 {
911         struct adapter          *adapter = ifp->if_softc;
912         struct igb_queue        *que;
913         struct tx_ring          *txr;
914         int                     i, err = 0;
915 #ifdef  RSS
916         uint32_t                bucket_id;
917 #endif
918
919         /* Which queue to use */
920         /*
921          * When doing RSS, map it to the same outbound queue
922          * as the incoming flow would be mapped to.
923          *
924          * If everything is setup correctly, it should be the
925          * same bucket that the current CPU we're on is.
926          */
927         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
928 #ifdef  RSS
929                 if (rss_hash2bucket(m->m_pkthdr.flowid,
930                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
931                         /* XXX TODO: spit out something if bucket_id > num_queues? */
932                         i = bucket_id % adapter->num_queues;
933                 } else {
934 #endif
935                         i = m->m_pkthdr.flowid % adapter->num_queues;
936 #ifdef  RSS
937                 }
938 #endif
939         } else {
940                 i = curcpu % adapter->num_queues;
941         }
942         txr = &adapter->tx_rings[i];
943         que = &adapter->queues[i];
944
945         err = drbr_enqueue(ifp, txr->br, m);
946         if (err)
947                 return (err);
948         if (IGB_TX_TRYLOCK(txr)) {
949                 igb_mq_start_locked(ifp, txr);
950                 IGB_TX_UNLOCK(txr);
951         } else
952                 taskqueue_enqueue(que->tq, &txr->txq_task);
953
954         return (0);
955 }
956
957 static int
958 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
959 {
960         struct adapter  *adapter = txr->adapter;
961         struct mbuf     *next;
962         int             err = 0, enq = 0;
963
964         IGB_TX_LOCK_ASSERT(txr);
965
966         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
967             adapter->link_active == 0)
968                 return (ENETDOWN);
969
970         /* Process the queue */
971         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
972                 if ((err = igb_xmit(txr, &next)) != 0) {
973                         if (next == NULL) {
974                                 /* It was freed, move forward */
975                                 drbr_advance(ifp, txr->br);
976                         } else {
977                                 /* 
978                                  * Still have one left, it may not be
979                                  * the same since the transmit function
980                                  * may have changed it.
981                                  */
982                                 drbr_putback(ifp, txr->br, next);
983                         }
984                         break;
985                 }
986                 drbr_advance(ifp, txr->br);
987                 enq++;
988                 if (next->m_flags & M_MCAST && adapter->vf_ifp)
989                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
990                 ETHER_BPF_MTAP(ifp, next);
991                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
992                         break;
993         }
994         if (enq > 0) {
995                 /* Set the watchdog */
996                 txr->queue_status |= IGB_QUEUE_WORKING;
997                 txr->watchdog_time = ticks;
998         }
999         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1000                 igb_txeof(txr);
1001         if (txr->tx_avail <= IGB_MAX_SCATTER)
1002                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1003         return (err);
1004 }
1005
1006 /*
1007  * Called from a taskqueue to drain queued transmit packets.
1008  */
1009 static void
1010 igb_deferred_mq_start(void *arg, int pending)
1011 {
1012         struct tx_ring *txr = arg;
1013         struct adapter *adapter = txr->adapter;
1014         struct ifnet *ifp = adapter->ifp;
1015
1016         IGB_TX_LOCK(txr);
1017         if (!drbr_empty(ifp, txr->br))
1018                 igb_mq_start_locked(ifp, txr);
1019         IGB_TX_UNLOCK(txr);
1020 }
1021
1022 /*
1023 ** Flush all ring buffers
1024 */
1025 static void
1026 igb_qflush(struct ifnet *ifp)
1027 {
1028         struct adapter  *adapter = ifp->if_softc;
1029         struct tx_ring  *txr = adapter->tx_rings;
1030         struct mbuf     *m;
1031
1032         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1033                 IGB_TX_LOCK(txr);
1034                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1035                         m_freem(m);
1036                 IGB_TX_UNLOCK(txr);
1037         }
1038         if_qflush(ifp);
1039 }
1040 #endif /* ~IGB_LEGACY_TX */
1041
1042 /*********************************************************************
1043  *  Ioctl entry point
1044  *
1045  *  igb_ioctl is called when the user wants to configure the
1046  *  interface.
1047  *
1048  *  return 0 on success, positive on failure
1049  **********************************************************************/
1050
1051 static int
1052 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1053 {
1054         struct adapter  *adapter = ifp->if_softc;
1055         struct ifreq    *ifr = (struct ifreq *)data;
1056 #if defined(INET) || defined(INET6)
1057         struct ifaddr   *ifa = (struct ifaddr *)data;
1058 #endif
1059         bool            avoid_reset = FALSE;
1060         int             error = 0;
1061
1062         if (adapter->in_detach)
1063                 return (error);
1064
1065         switch (command) {
1066         case SIOCSIFADDR:
1067 #ifdef INET
1068                 if (ifa->ifa_addr->sa_family == AF_INET)
1069                         avoid_reset = TRUE;
1070 #endif
1071 #ifdef INET6
1072                 if (ifa->ifa_addr->sa_family == AF_INET6)
1073                         avoid_reset = TRUE;
1074 #endif
1075                 /*
1076                 ** Calling init results in link renegotiation,
1077                 ** so we avoid doing it when possible.
1078                 */
1079                 if (avoid_reset) {
1080                         ifp->if_flags |= IFF_UP;
1081                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1082                                 igb_init(adapter);
1083 #ifdef INET
1084                         if (!(ifp->if_flags & IFF_NOARP))
1085                                 arp_ifinit(ifp, ifa);
1086 #endif
1087                 } else
1088                         error = ether_ioctl(ifp, command, data);
1089                 break;
1090         case SIOCSIFMTU:
1091             {
1092                 int max_frame_size;
1093
1094                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1095
1096                 IGB_CORE_LOCK(adapter);
1097                 max_frame_size = 9234;
1098                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1099                     ETHER_CRC_LEN) {
1100                         IGB_CORE_UNLOCK(adapter);
1101                         error = EINVAL;
1102                         break;
1103                 }
1104
1105                 ifp->if_mtu = ifr->ifr_mtu;
1106                 adapter->max_frame_size =
1107                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1108                 igb_init_locked(adapter);
1109                 IGB_CORE_UNLOCK(adapter);
1110                 break;
1111             }
1112         case SIOCSIFFLAGS:
1113                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1114                     SIOCSIFFLAGS (Set Interface Flags)");
1115                 IGB_CORE_LOCK(adapter);
1116                 if (ifp->if_flags & IFF_UP) {
1117                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1118                                 if ((ifp->if_flags ^ adapter->if_flags) &
1119                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1120                                         igb_disable_promisc(adapter);
1121                                         igb_set_promisc(adapter);
1122                                 }
1123                         } else
1124                                 igb_init_locked(adapter);
1125                 } else
1126                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1127                                 igb_stop(adapter);
1128                 adapter->if_flags = ifp->if_flags;
1129                 IGB_CORE_UNLOCK(adapter);
1130                 break;
1131         case SIOCADDMULTI:
1132         case SIOCDELMULTI:
1133                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1134                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1135                         IGB_CORE_LOCK(adapter);
1136                         igb_disable_intr(adapter);
1137                         igb_set_multi(adapter);
1138 #ifdef DEVICE_POLLING
1139                         if (!(ifp->if_capenable & IFCAP_POLLING))
1140 #endif
1141                                 igb_enable_intr(adapter);
1142                         IGB_CORE_UNLOCK(adapter);
1143                 }
1144                 break;
1145         case SIOCSIFMEDIA:
1146                 /* Check SOL/IDER usage */
1147                 IGB_CORE_LOCK(adapter);
1148                 if (e1000_check_reset_block(&adapter->hw)) {
1149                         IGB_CORE_UNLOCK(adapter);
1150                         device_printf(adapter->dev, "Media change is"
1151                             " blocked due to SOL/IDER session.\n");
1152                         break;
1153                 }
1154                 IGB_CORE_UNLOCK(adapter);
1155         case SIOCGIFMEDIA:
1156                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1157                     SIOCxIFMEDIA (Get/Set Interface Media)");
1158                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1159                 break;
1160         case SIOCSIFCAP:
1161             {
1162                 int mask, reinit;
1163
1164                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1165                 reinit = 0;
1166                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1167 #ifdef DEVICE_POLLING
1168                 if (mask & IFCAP_POLLING) {
1169                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1170                                 error = ether_poll_register(igb_poll, ifp);
1171                                 if (error)
1172                                         return (error);
1173                                 IGB_CORE_LOCK(adapter);
1174                                 igb_disable_intr(adapter);
1175                                 ifp->if_capenable |= IFCAP_POLLING;
1176                                 IGB_CORE_UNLOCK(adapter);
1177                         } else {
1178                                 error = ether_poll_deregister(ifp);
1179                                 /* Enable interrupt even in error case */
1180                                 IGB_CORE_LOCK(adapter);
1181                                 igb_enable_intr(adapter);
1182                                 ifp->if_capenable &= ~IFCAP_POLLING;
1183                                 IGB_CORE_UNLOCK(adapter);
1184                         }
1185                 }
1186 #endif
1187                 if (mask & IFCAP_HWCSUM) {
1188                         ifp->if_capenable ^= IFCAP_HWCSUM;
1189                         reinit = 1;
1190                 }
1191                 if (mask & IFCAP_TSO4) {
1192                         ifp->if_capenable ^= IFCAP_TSO4;
1193                         reinit = 1;
1194                 }
1195                 if (mask & IFCAP_TSO6) {
1196                         ifp->if_capenable ^= IFCAP_TSO6;
1197                         reinit = 1;
1198                 }
1199                 if (mask & IFCAP_VLAN_HWTAGGING) {
1200                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1201                         reinit = 1;
1202                 }
1203                 if (mask & IFCAP_VLAN_HWFILTER) {
1204                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1205                         reinit = 1;
1206                 }
1207                 if (mask & IFCAP_VLAN_HWTSO) {
1208                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1209                         reinit = 1;
1210                 }
1211                 if (mask & IFCAP_LRO) {
1212                         ifp->if_capenable ^= IFCAP_LRO;
1213                         reinit = 1;
1214                 }
1215                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1216                         igb_init(adapter);
1217                 VLAN_CAPABILITIES(ifp);
1218                 break;
1219             }
1220
1221         default:
1222                 error = ether_ioctl(ifp, command, data);
1223                 break;
1224         }
1225
1226         return (error);
1227 }
1228
1229
1230 /*********************************************************************
1231  *  Init entry point
1232  *
1233  *  This routine is used in two ways. It is used by the stack as
1234  *  init entry point in network interface structure. It is also used
1235  *  by the driver as a hw/sw initialization routine to get to a
1236  *  consistent state.
1237  *
1238  *  return 0 on success, positive on failure
1239  **********************************************************************/
1240
1241 static void
1242 igb_init_locked(struct adapter *adapter)
1243 {
1244         struct ifnet    *ifp = adapter->ifp;
1245         device_t        dev = adapter->dev;
1246
1247         INIT_DEBUGOUT("igb_init: begin");
1248
1249         IGB_CORE_LOCK_ASSERT(adapter);
1250
1251         igb_disable_intr(adapter);
1252         callout_stop(&adapter->timer);
1253
1254         /* Get the latest mac address, User can use a LAA */
1255         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1256               ETHER_ADDR_LEN);
1257
1258         /* Put the address into the Receive Address Array */
1259         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1260
1261         igb_reset(adapter);
1262         igb_update_link_status(adapter);
1263
1264         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1265
1266         /* Set hardware offload abilities */
1267         ifp->if_hwassist = 0;
1268         if (ifp->if_capenable & IFCAP_TXCSUM) {
1269                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1270 #if __FreeBSD_version >= 800000
1271                 if ((adapter->hw.mac.type == e1000_82576) ||
1272                     (adapter->hw.mac.type == e1000_82580))
1273                         ifp->if_hwassist |= CSUM_SCTP;
1274 #endif
1275         }
1276
1277         if (ifp->if_capenable & IFCAP_TSO)
1278                 ifp->if_hwassist |= CSUM_TSO;
1279
1280         /* Clear bad data from Rx FIFOs */
1281         e1000_rx_fifo_flush_82575(&adapter->hw);
1282
1283         /* Configure for OS presence */
1284         igb_init_manageability(adapter);
1285
1286         /* Prepare transmit descriptors and buffers */
1287         igb_setup_transmit_structures(adapter);
1288         igb_initialize_transmit_units(adapter);
1289
1290         /* Setup Multicast table */
1291         igb_set_multi(adapter);
1292
1293         /*
1294         ** Figure out the desired mbuf pool
1295         ** for doing jumbo/packetsplit
1296         */
1297         if (adapter->max_frame_size <= 2048)
1298                 adapter->rx_mbuf_sz = MCLBYTES;
1299         else if (adapter->max_frame_size <= 4096)
1300                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1301         else
1302                 adapter->rx_mbuf_sz = MJUM9BYTES;
1303
1304         /* Prepare receive descriptors and buffers */
1305         if (igb_setup_receive_structures(adapter)) {
1306                 device_printf(dev, "Could not setup receive structures\n");
1307                 return;
1308         }
1309         igb_initialize_receive_units(adapter);
1310
1311         /* Enable VLAN support */
1312         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1313                 igb_setup_vlan_hw_support(adapter);
1314                                 
1315         /* Don't lose promiscuous settings */
1316         igb_set_promisc(adapter);
1317
1318         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1319         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1320
1321         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1322         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1323
1324         if (adapter->msix > 1) /* Set up queue routing */
1325                 igb_configure_queues(adapter);
1326
1327         /* this clears any pending interrupts */
1328         E1000_READ_REG(&adapter->hw, E1000_ICR);
1329 #ifdef DEVICE_POLLING
1330         /*
1331          * Only enable interrupts if we are not polling, make sure
1332          * they are off otherwise.
1333          */
1334         if (ifp->if_capenable & IFCAP_POLLING)
1335                 igb_disable_intr(adapter);
1336         else
1337 #endif /* DEVICE_POLLING */
1338         {
1339                 igb_enable_intr(adapter);
1340                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1341         }
1342
1343         /* Set Energy Efficient Ethernet */
1344         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1345                 if (adapter->hw.mac.type == e1000_i354)
1346                         e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1347                 else
1348                         e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1349         }
1350 }
1351
1352 static void
1353 igb_init(void *arg)
1354 {
1355         struct adapter *adapter = arg;
1356
1357         IGB_CORE_LOCK(adapter);
1358         igb_init_locked(adapter);
1359         IGB_CORE_UNLOCK(adapter);
1360 }
1361
1362
1363 static void
1364 igb_handle_que(void *context, int pending)
1365 {
1366         struct igb_queue *que = context;
1367         struct adapter *adapter = que->adapter;
1368         struct tx_ring *txr = que->txr;
1369         struct ifnet    *ifp = adapter->ifp;
1370
1371         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1372                 bool    more;
1373
1374                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1375
1376                 IGB_TX_LOCK(txr);
1377                 igb_txeof(txr);
1378 #ifndef IGB_LEGACY_TX
1379                 /* Process the stack queue only if not depleted */
1380                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1381                     !drbr_empty(ifp, txr->br))
1382                         igb_mq_start_locked(ifp, txr);
1383 #else
1384                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1385                         igb_start_locked(txr, ifp);
1386 #endif
1387                 IGB_TX_UNLOCK(txr);
1388                 /* Do we need another? */
1389                 if (more) {
1390                         taskqueue_enqueue(que->tq, &que->que_task);
1391                         return;
1392                 }
1393         }
1394
1395 #ifdef DEVICE_POLLING
1396         if (ifp->if_capenable & IFCAP_POLLING)
1397                 return;
1398 #endif
1399         /* Reenable this interrupt */
1400         if (que->eims)
1401                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1402         else
1403                 igb_enable_intr(adapter);
1404 }
1405
1406 /* Deal with link in a sleepable context */
1407 static void
1408 igb_handle_link(void *context, int pending)
1409 {
1410         struct adapter *adapter = context;
1411
1412         IGB_CORE_LOCK(adapter);
1413         igb_handle_link_locked(adapter);
1414         IGB_CORE_UNLOCK(adapter);
1415 }
1416
1417 static void
1418 igb_handle_link_locked(struct adapter *adapter)
1419 {
1420         struct tx_ring  *txr = adapter->tx_rings;
1421         struct ifnet *ifp = adapter->ifp;
1422
1423         IGB_CORE_LOCK_ASSERT(adapter);
1424         adapter->hw.mac.get_link_status = 1;
1425         igb_update_link_status(adapter);
1426         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1427                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1428                         IGB_TX_LOCK(txr);
1429 #ifndef IGB_LEGACY_TX
1430                         /* Process the stack queue only if not depleted */
1431                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1432                             !drbr_empty(ifp, txr->br))
1433                                 igb_mq_start_locked(ifp, txr);
1434 #else
1435                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1436                                 igb_start_locked(txr, ifp);
1437 #endif
1438                         IGB_TX_UNLOCK(txr);
1439                 }
1440         }
1441 }
1442
1443 /*********************************************************************
1444  *
1445  *  MSI/Legacy Deferred
1446  *  Interrupt Service routine  
1447  *
1448  *********************************************************************/
1449 static int
1450 igb_irq_fast(void *arg)
1451 {
1452         struct adapter          *adapter = arg;
1453         struct igb_queue        *que = adapter->queues;
1454         u32                     reg_icr;
1455
1456
1457         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1458
1459         /* Hot eject?  */
1460         if (reg_icr == 0xffffffff)
1461                 return FILTER_STRAY;
1462
1463         /* Definitely not our interrupt.  */
1464         if (reg_icr == 0x0)
1465                 return FILTER_STRAY;
1466
1467         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1468                 return FILTER_STRAY;
1469
1470         /*
1471          * Mask interrupts until the taskqueue is finished running.  This is
1472          * cheap, just assume that it is needed.  This also works around the
1473          * MSI message reordering errata on certain systems.
1474          */
1475         igb_disable_intr(adapter);
1476         taskqueue_enqueue(que->tq, &que->que_task);
1477
1478         /* Link status change */
1479         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1480                 taskqueue_enqueue(que->tq, &adapter->link_task);
1481
1482         if (reg_icr & E1000_ICR_RXO)
1483                 adapter->rx_overruns++;
1484         return FILTER_HANDLED;
1485 }
1486
1487 #ifdef DEVICE_POLLING
1488 #if __FreeBSD_version >= 800000
1489 #define POLL_RETURN_COUNT(a) (a)
1490 static int
1491 #else
1492 #define POLL_RETURN_COUNT(a)
1493 static void
1494 #endif
1495 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1496 {
1497         struct adapter          *adapter = ifp->if_softc;
1498         struct igb_queue        *que;
1499         struct tx_ring          *txr;
1500         u32                     reg_icr, rx_done = 0;
1501         u32                     loop = IGB_MAX_LOOP;
1502         bool                    more;
1503
1504         IGB_CORE_LOCK(adapter);
1505         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1506                 IGB_CORE_UNLOCK(adapter);
1507                 return POLL_RETURN_COUNT(rx_done);
1508         }
1509
1510         if (cmd == POLL_AND_CHECK_STATUS) {
1511                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1512                 /* Link status change */
1513                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1514                         igb_handle_link_locked(adapter);
1515
1516                 if (reg_icr & E1000_ICR_RXO)
1517                         adapter->rx_overruns++;
1518         }
1519         IGB_CORE_UNLOCK(adapter);
1520
1521         for (int i = 0; i < adapter->num_queues; i++) {
1522                 que = &adapter->queues[i];
1523                 txr = que->txr;
1524
1525                 igb_rxeof(que, count, &rx_done);
1526
1527                 IGB_TX_LOCK(txr);
1528                 do {
1529                         more = igb_txeof(txr);
1530                 } while (loop-- && more);
1531 #ifndef IGB_LEGACY_TX
1532                 if (!drbr_empty(ifp, txr->br))
1533                         igb_mq_start_locked(ifp, txr);
1534 #else
1535                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1536                         igb_start_locked(txr, ifp);
1537 #endif
1538                 IGB_TX_UNLOCK(txr);
1539         }
1540
1541         return POLL_RETURN_COUNT(rx_done);
1542 }
1543 #endif /* DEVICE_POLLING */
1544
1545 /*********************************************************************
1546  *
1547  *  MSIX Que Interrupt Service routine
1548  *
1549  **********************************************************************/
1550 static void
1551 igb_msix_que(void *arg)
1552 {
1553         struct igb_queue *que = arg;
1554         struct adapter *adapter = que->adapter;
1555         struct ifnet   *ifp = adapter->ifp;
1556         struct tx_ring *txr = que->txr;
1557         struct rx_ring *rxr = que->rxr;
1558         u32             newitr = 0;
1559         bool            more_rx;
1560
1561         /* Ignore spurious interrupts */
1562         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1563                 return;
1564
1565         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1566         ++que->irqs;
1567
1568         IGB_TX_LOCK(txr);
1569         igb_txeof(txr);
1570 #ifndef IGB_LEGACY_TX
1571         /* Process the stack queue only if not depleted */
1572         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1573             !drbr_empty(ifp, txr->br))
1574                 igb_mq_start_locked(ifp, txr);
1575 #else
1576         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1577                 igb_start_locked(txr, ifp);
1578 #endif
1579         IGB_TX_UNLOCK(txr);
1580
1581         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1582
1583         if (adapter->enable_aim == FALSE)
1584                 goto no_calc;
1585         /*
1586         ** Do Adaptive Interrupt Moderation:
1587         **  - Write out last calculated setting
1588         **  - Calculate based on average size over
1589         **    the last interval.
1590         */
1591         if (que->eitr_setting)
1592                 E1000_WRITE_REG(&adapter->hw,
1593                     E1000_EITR(que->msix), que->eitr_setting);
1594  
1595         que->eitr_setting = 0;
1596
1597         /* Idle, do nothing */
1598         if ((txr->bytes == 0) && (rxr->bytes == 0))
1599                 goto no_calc;
1600                                 
1601         /* Used half Default if sub-gig */
1602         if (adapter->link_speed != 1000)
1603                 newitr = IGB_DEFAULT_ITR / 2;
1604         else {
1605                 if ((txr->bytes) && (txr->packets))
1606                         newitr = txr->bytes/txr->packets;
1607                 if ((rxr->bytes) && (rxr->packets))
1608                         newitr = max(newitr,
1609                             (rxr->bytes / rxr->packets));
1610                 newitr += 24; /* account for hardware frame, crc */
1611                 /* set an upper boundary */
1612                 newitr = min(newitr, 3000);
1613                 /* Be nice to the mid range */
1614                 if ((newitr > 300) && (newitr < 1200))
1615                         newitr = (newitr / 3);
1616                 else
1617                         newitr = (newitr / 2);
1618         }
1619         newitr &= 0x7FFC;  /* Mask invalid bits */
1620         if (adapter->hw.mac.type == e1000_82575)
1621                 newitr |= newitr << 16;
1622         else
1623                 newitr |= E1000_EITR_CNT_IGNR;
1624                  
1625         /* save for next interrupt */
1626         que->eitr_setting = newitr;
1627
1628         /* Reset state */
1629         txr->bytes = 0;
1630         txr->packets = 0;
1631         rxr->bytes = 0;
1632         rxr->packets = 0;
1633
1634 no_calc:
1635         /* Schedule a clean task if needed*/
1636         if (more_rx)
1637                 taskqueue_enqueue(que->tq, &que->que_task);
1638         else
1639                 /* Reenable this interrupt */
1640                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1641         return;
1642 }
1643
1644
1645 /*********************************************************************
1646  *
1647  *  MSIX Link Interrupt Service routine
1648  *
1649  **********************************************************************/
1650
1651 static void
1652 igb_msix_link(void *arg)
1653 {
1654         struct adapter  *adapter = arg;
1655         u32             icr;
1656
1657         ++adapter->link_irq;
1658         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1659         if (!(icr & E1000_ICR_LSC))
1660                 goto spurious;
1661         igb_handle_link(adapter, 0);
1662
1663 spurious:
1664         /* Rearm */
1665         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1666         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1667         return;
1668 }
1669
1670
1671 /*********************************************************************
1672  *
1673  *  Media Ioctl callback
1674  *
1675  *  This routine is called whenever the user queries the status of
1676  *  the interface using ifconfig.
1677  *
1678  **********************************************************************/
1679 static void
1680 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1681 {
1682         struct adapter *adapter = ifp->if_softc;
1683
1684         INIT_DEBUGOUT("igb_media_status: begin");
1685
1686         IGB_CORE_LOCK(adapter);
1687         igb_update_link_status(adapter);
1688
1689         ifmr->ifm_status = IFM_AVALID;
1690         ifmr->ifm_active = IFM_ETHER;
1691
1692         if (!adapter->link_active) {
1693                 IGB_CORE_UNLOCK(adapter);
1694                 return;
1695         }
1696
1697         ifmr->ifm_status |= IFM_ACTIVE;
1698
1699         switch (adapter->link_speed) {
1700         case 10:
1701                 ifmr->ifm_active |= IFM_10_T;
1702                 break;
1703         case 100:
1704                 /*
1705                 ** Support for 100Mb SFP - these are Fiber 
1706                 ** but the media type appears as serdes
1707                 */
1708                 if (adapter->hw.phy.media_type ==
1709                     e1000_media_type_internal_serdes)
1710                         ifmr->ifm_active |= IFM_100_FX;
1711                 else
1712                         ifmr->ifm_active |= IFM_100_TX;
1713                 break;
1714         case 1000:
1715                 ifmr->ifm_active |= IFM_1000_T;
1716                 break;
1717         case 2500:
1718                 ifmr->ifm_active |= IFM_2500_SX;
1719                 break;
1720         }
1721
1722         if (adapter->link_duplex == FULL_DUPLEX)
1723                 ifmr->ifm_active |= IFM_FDX;
1724         else
1725                 ifmr->ifm_active |= IFM_HDX;
1726
1727         IGB_CORE_UNLOCK(adapter);
1728 }
1729
1730 /*********************************************************************
1731  *
1732  *  Media Ioctl callback
1733  *
1734  *  This routine is called when the user changes speed/duplex using
1735  *  media/mediopt option with ifconfig.
1736  *
1737  **********************************************************************/
1738 static int
1739 igb_media_change(struct ifnet *ifp)
1740 {
1741         struct adapter *adapter = ifp->if_softc;
1742         struct ifmedia  *ifm = &adapter->media;
1743
1744         INIT_DEBUGOUT("igb_media_change: begin");
1745
1746         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1747                 return (EINVAL);
1748
1749         IGB_CORE_LOCK(adapter);
1750         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1751         case IFM_AUTO:
1752                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1753                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1754                 break;
1755         case IFM_1000_LX:
1756         case IFM_1000_SX:
1757         case IFM_1000_T:
1758                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1759                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1760                 break;
1761         case IFM_100_TX:
1762                 adapter->hw.mac.autoneg = FALSE;
1763                 adapter->hw.phy.autoneg_advertised = 0;
1764                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1765                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1766                 else
1767                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1768                 break;
1769         case IFM_10_T:
1770                 adapter->hw.mac.autoneg = FALSE;
1771                 adapter->hw.phy.autoneg_advertised = 0;
1772                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1773                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1774                 else
1775                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1776                 break;
1777         default:
1778                 device_printf(adapter->dev, "Unsupported media type\n");
1779         }
1780
1781         igb_init_locked(adapter);
1782         IGB_CORE_UNLOCK(adapter);
1783
1784         return (0);
1785 }
1786
1787
1788 /*********************************************************************
1789  *
1790  *  This routine maps the mbufs to Advanced TX descriptors.
1791  *  
1792  **********************************************************************/
1793 static int
1794 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1795 {
1796         struct adapter  *adapter = txr->adapter;
1797         u32             olinfo_status = 0, cmd_type_len;
1798         int             i, j, error, nsegs;
1799         int             first;
1800         bool            remap = TRUE;
1801         struct mbuf     *m_head;
1802         bus_dma_segment_t segs[IGB_MAX_SCATTER];
1803         bus_dmamap_t    map;
1804         struct igb_tx_buf *txbuf;
1805         union e1000_adv_tx_desc *txd = NULL;
1806
1807         m_head = *m_headp;
1808
1809         /* Basic descriptor defines */
1810         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1811             E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1812
1813         if (m_head->m_flags & M_VLANTAG)
1814                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1815
1816         /*
1817          * Important to capture the first descriptor
1818          * used because it will contain the index of
1819          * the one we tell the hardware to report back
1820          */
1821         first = txr->next_avail_desc;
1822         txbuf = &txr->tx_buffers[first];
1823         map = txbuf->map;
1824
1825         /*
1826          * Map the packet for DMA.
1827          */
1828 retry:
1829         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1830             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1831
1832         if (__predict_false(error)) {
1833                 struct mbuf *m;
1834
1835                 switch (error) {
1836                 case EFBIG:
1837                         /* Try it again? - one try */
1838                         if (remap == TRUE) {
1839                                 remap = FALSE;
1840                                 m = m_collapse(*m_headp, M_NOWAIT,
1841                                     IGB_MAX_SCATTER);
1842                                 if (m == NULL) {
1843                                         adapter->mbuf_defrag_failed++;
1844                                         m_freem(*m_headp);
1845                                         *m_headp = NULL;
1846                                         return (ENOBUFS);
1847                                 }
1848                                 *m_headp = m;
1849                                 goto retry;
1850                         } else
1851                                 return (error);
1852                 default:
1853                         txr->no_tx_dma_setup++;
1854                         m_freem(*m_headp);
1855                         *m_headp = NULL;
1856                         return (error);
1857                 }
1858         }
1859
1860         /* Make certain there are enough descriptors */
1861         if (nsegs > txr->tx_avail - 2) {
1862                 txr->no_desc_avail++;
1863                 bus_dmamap_unload(txr->txtag, map);
1864                 return (ENOBUFS);
1865         }
1866         m_head = *m_headp;
1867
1868         /*
1869         ** Set up the appropriate offload context
1870         ** this will consume the first descriptor
1871         */
1872         error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1873         if (__predict_false(error)) {
1874                 m_freem(*m_headp);
1875                 *m_headp = NULL;
1876                 return (error);
1877         }
1878
1879         /* 82575 needs the queue index added */
1880         if (adapter->hw.mac.type == e1000_82575)
1881                 olinfo_status |= txr->me << 4;
1882
1883         i = txr->next_avail_desc;
1884         for (j = 0; j < nsegs; j++) {
1885                 bus_size_t seglen;
1886                 bus_addr_t segaddr;
1887
1888                 txbuf = &txr->tx_buffers[i];
1889                 txd = &txr->tx_base[i];
1890                 seglen = segs[j].ds_len;
1891                 segaddr = htole64(segs[j].ds_addr);
1892
1893                 txd->read.buffer_addr = segaddr;
1894                 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1895                     cmd_type_len | seglen);
1896                 txd->read.olinfo_status = htole32(olinfo_status);
1897
1898                 if (++i == txr->num_desc)
1899                         i = 0;
1900         }
1901
1902         txd->read.cmd_type_len |=
1903             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1904         txr->tx_avail -= nsegs;
1905         txr->next_avail_desc = i;
1906
1907         txbuf->m_head = m_head;
1908         /*
1909         ** Here we swap the map so the last descriptor,
1910         ** which gets the completion interrupt has the
1911         ** real map, and the first descriptor gets the
1912         ** unused map from this descriptor.
1913         */
1914         txr->tx_buffers[first].map = txbuf->map;
1915         txbuf->map = map;
1916         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1917
1918         /* Set the EOP descriptor that will be marked done */
1919         txbuf = &txr->tx_buffers[first];
1920         txbuf->eop = txd;
1921
1922         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1923             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1924         /*
1925          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1926          * hardware that this frame is available to transmit.
1927          */
1928         ++txr->total_packets;
1929         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1930
1931         return (0);
1932 }
1933 static void
1934 igb_set_promisc(struct adapter *adapter)
1935 {
1936         struct ifnet    *ifp = adapter->ifp;
1937         struct e1000_hw *hw = &adapter->hw;
1938         u32             reg;
1939
1940         if (adapter->vf_ifp) {
1941                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1942                 return;
1943         }
1944
1945         reg = E1000_READ_REG(hw, E1000_RCTL);
1946         if (ifp->if_flags & IFF_PROMISC) {
1947                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1948                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1949         } else if (ifp->if_flags & IFF_ALLMULTI) {
1950                 reg |= E1000_RCTL_MPE;
1951                 reg &= ~E1000_RCTL_UPE;
1952                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1953         }
1954 }
1955
1956 static void
1957 igb_disable_promisc(struct adapter *adapter)
1958 {
1959         struct e1000_hw *hw = &adapter->hw;
1960         struct ifnet    *ifp = adapter->ifp;
1961         u32             reg;
1962         int             mcnt = 0;
1963
1964         if (adapter->vf_ifp) {
1965                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1966                 return;
1967         }
1968         reg = E1000_READ_REG(hw, E1000_RCTL);
1969         reg &=  (~E1000_RCTL_UPE);
1970         if (ifp->if_flags & IFF_ALLMULTI)
1971                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1972         else {
1973                 struct  ifmultiaddr *ifma;
1974 #if __FreeBSD_version < 800000
1975                 IF_ADDR_LOCK(ifp);
1976 #else   
1977                 if_maddr_rlock(ifp);
1978 #endif
1979                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1980                         if (ifma->ifma_addr->sa_family != AF_LINK)
1981                                 continue;
1982                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1983                                 break;
1984                         mcnt++;
1985                 }
1986 #if __FreeBSD_version < 800000
1987                 IF_ADDR_UNLOCK(ifp);
1988 #else
1989                 if_maddr_runlock(ifp);
1990 #endif
1991         }
1992         /* Don't disable if in MAX groups */
1993         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1994                 reg &=  (~E1000_RCTL_MPE);
1995         E1000_WRITE_REG(hw, E1000_RCTL, reg);
1996 }
1997
1998
1999 /*********************************************************************
2000  *  Multicast Update
2001  *
2002  *  This routine is called whenever multicast address list is updated.
2003  *
2004  **********************************************************************/
2005
2006 static void
2007 igb_set_multi(struct adapter *adapter)
2008 {
2009         struct ifnet    *ifp = adapter->ifp;
2010         struct ifmultiaddr *ifma;
2011         u32 reg_rctl = 0;
2012         u8  *mta;
2013
2014         int mcnt = 0;
2015
2016         IOCTL_DEBUGOUT("igb_set_multi: begin");
2017
2018         mta = adapter->mta;
2019         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2020             MAX_NUM_MULTICAST_ADDRESSES);
2021
2022 #if __FreeBSD_version < 800000
2023         IF_ADDR_LOCK(ifp);
2024 #else
2025         if_maddr_rlock(ifp);
2026 #endif
2027         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2028                 if (ifma->ifma_addr->sa_family != AF_LINK)
2029                         continue;
2030
2031                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2032                         break;
2033
2034                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2035                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2036                 mcnt++;
2037         }
2038 #if __FreeBSD_version < 800000
2039         IF_ADDR_UNLOCK(ifp);
2040 #else
2041         if_maddr_runlock(ifp);
2042 #endif
2043
2044         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2045                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2046                 reg_rctl |= E1000_RCTL_MPE;
2047                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2048         } else
2049                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2050 }
2051
2052
2053 /*********************************************************************
2054  *  Timer routine:
2055  *      This routine checks for link status,
2056  *      updates statistics, and does the watchdog.
2057  *
2058  **********************************************************************/
2059
2060 static void
2061 igb_local_timer(void *arg)
2062 {
2063         struct adapter          *adapter = arg;
2064         device_t                dev = adapter->dev;
2065         struct ifnet            *ifp = adapter->ifp;
2066         struct tx_ring          *txr = adapter->tx_rings;
2067         struct igb_queue        *que = adapter->queues;
2068         int                     hung = 0, busy = 0;
2069
2070
2071         IGB_CORE_LOCK_ASSERT(adapter);
2072
2073         igb_update_link_status(adapter);
2074         igb_update_stats_counters(adapter);
2075
2076         /*
2077         ** Check the TX queues status
2078         **      - central locked handling of OACTIVE
2079         **      - watchdog only if all queues show hung
2080         */
2081         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2082                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2083                     (adapter->pause_frames == 0))
2084                         ++hung;
2085                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2086                         ++busy;
2087                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2088                         taskqueue_enqueue(que->tq, &que->que_task);
2089         }
2090         if (hung == adapter->num_queues)
2091                 goto timeout;
2092         if (busy == adapter->num_queues)
2093                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2094         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2095             (busy < adapter->num_queues))
2096                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2097
2098         adapter->pause_frames = 0;
2099         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2100 #ifndef DEVICE_POLLING
2101         /* Schedule all queue interrupts - deadlock protection */
2102         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2103 #endif
2104         return;
2105
2106 timeout:
2107         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2108         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2109             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2110             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2111         device_printf(dev,"TX(%d) desc avail = %d,"
2112             "Next TX to Clean = %d\n",
2113             txr->me, txr->tx_avail, txr->next_to_clean);
2114         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2115         adapter->watchdog_events++;
2116         igb_init_locked(adapter);
2117 }
2118
2119 static void
2120 igb_update_link_status(struct adapter *adapter)
2121 {
2122         struct e1000_hw         *hw = &adapter->hw;
2123         struct e1000_fc_info    *fc = &hw->fc;
2124         struct ifnet            *ifp = adapter->ifp;
2125         device_t                dev = adapter->dev;
2126         struct tx_ring          *txr = adapter->tx_rings;
2127         u32                     link_check, thstat, ctrl;
2128         char                    *flowctl = NULL;
2129
2130         link_check = thstat = ctrl = 0;
2131
2132         /* Get the cached link value or read for real */
2133         switch (hw->phy.media_type) {
2134         case e1000_media_type_copper:
2135                 if (hw->mac.get_link_status) {
2136                         /* Do the work to read phy */
2137                         e1000_check_for_link(hw);
2138                         link_check = !hw->mac.get_link_status;
2139                 } else
2140                         link_check = TRUE;
2141                 break;
2142         case e1000_media_type_fiber:
2143                 e1000_check_for_link(hw);
2144                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2145                                  E1000_STATUS_LU);
2146                 break;
2147         case e1000_media_type_internal_serdes:
2148                 e1000_check_for_link(hw);
2149                 link_check = adapter->hw.mac.serdes_has_link;
2150                 break;
2151         /* VF device is type_unknown */
2152         case e1000_media_type_unknown:
2153                 e1000_check_for_link(hw);
2154                 link_check = !hw->mac.get_link_status;
2155                 /* Fall thru */
2156         default:
2157                 break;
2158         }
2159
2160         /* Check for thermal downshift or shutdown */
2161         if (hw->mac.type == e1000_i350) {
2162                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2163                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2164         }
2165
2166         /* Get the flow control for display */
2167         switch (fc->current_mode) {
2168         case e1000_fc_rx_pause:
2169                 flowctl = "RX";
2170                 break;  
2171         case e1000_fc_tx_pause:
2172                 flowctl = "TX";
2173                 break;  
2174         case e1000_fc_full:
2175                 flowctl = "Full";
2176                 break;  
2177         case e1000_fc_none:
2178         default:
2179                 flowctl = "None";
2180                 break;  
2181         }
2182
2183         /* Now we check if a transition has happened */
2184         if (link_check && (adapter->link_active == 0)) {
2185                 e1000_get_speed_and_duplex(&adapter->hw, 
2186                     &adapter->link_speed, &adapter->link_duplex);
2187                 if (bootverbose)
2188                         device_printf(dev, "Link is up %d Mbps %s,"
2189                             " Flow Control: %s\n",
2190                             adapter->link_speed,
2191                             ((adapter->link_duplex == FULL_DUPLEX) ?
2192                             "Full Duplex" : "Half Duplex"), flowctl);
2193                 adapter->link_active = 1;
2194                 ifp->if_baudrate = adapter->link_speed * 1000000;
2195                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2196                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2197                         device_printf(dev, "Link: thermal downshift\n");
2198                 /* Delay Link Up for Phy update */
2199                 if (((hw->mac.type == e1000_i210) ||
2200                     (hw->mac.type == e1000_i211)) &&
2201                     (hw->phy.id == I210_I_PHY_ID))
2202                         msec_delay(I210_LINK_DELAY);
2203                 /* Reset if the media type changed. */
2204                 if (hw->dev_spec._82575.media_changed) {
2205                         hw->dev_spec._82575.media_changed = false;
2206                         adapter->flags |= IGB_MEDIA_RESET;
2207                         igb_reset(adapter);
2208                 }       
2209                 /* This can sleep */
2210                 if_link_state_change(ifp, LINK_STATE_UP);
2211         } else if (!link_check && (adapter->link_active == 1)) {
2212                 ifp->if_baudrate = adapter->link_speed = 0;
2213                 adapter->link_duplex = 0;
2214                 if (bootverbose)
2215                         device_printf(dev, "Link is Down\n");
2216                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2217                     (thstat & E1000_THSTAT_PWR_DOWN))
2218                         device_printf(dev, "Link: thermal shutdown\n");
2219                 adapter->link_active = 0;
2220                 /* This can sleep */
2221                 if_link_state_change(ifp, LINK_STATE_DOWN);
2222                 /* Reset queue state */
2223                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2224                         txr->queue_status = IGB_QUEUE_IDLE;
2225         }
2226 }
2227
2228 /*********************************************************************
2229  *
2230  *  This routine disables all traffic on the adapter by issuing a
2231  *  global reset on the MAC and deallocates TX/RX buffers.
2232  *
2233  **********************************************************************/
2234
2235 static void
2236 igb_stop(void *arg)
2237 {
2238         struct adapter  *adapter = arg;
2239         struct ifnet    *ifp = adapter->ifp;
2240         struct tx_ring *txr = adapter->tx_rings;
2241
2242         IGB_CORE_LOCK_ASSERT(adapter);
2243
2244         INIT_DEBUGOUT("igb_stop: begin");
2245
2246         igb_disable_intr(adapter);
2247
2248         callout_stop(&adapter->timer);
2249
2250         /* Tell the stack that the interface is no longer active */
2251         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2252         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2253
2254         /* Disarm watchdog timer. */
2255         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2256                 IGB_TX_LOCK(txr);
2257                 txr->queue_status = IGB_QUEUE_IDLE;
2258                 IGB_TX_UNLOCK(txr);
2259         }
2260
2261         e1000_reset_hw(&adapter->hw);
2262         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2263
2264         e1000_led_off(&adapter->hw);
2265         e1000_cleanup_led(&adapter->hw);
2266 }
2267
2268
2269 /*********************************************************************
2270  *
2271  *  Determine hardware revision.
2272  *
2273  **********************************************************************/
2274 static void
2275 igb_identify_hardware(struct adapter *adapter)
2276 {
2277         device_t dev = adapter->dev;
2278
2279         /* Make sure our PCI config space has the necessary stuff set */
2280         pci_enable_busmaster(dev);
2281         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2282
2283         /* Save off the information about this board */
2284         adapter->hw.vendor_id = pci_get_vendor(dev);
2285         adapter->hw.device_id = pci_get_device(dev);
2286         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2287         adapter->hw.subsystem_vendor_id =
2288             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2289         adapter->hw.subsystem_device_id =
2290             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2291
2292         /* Set MAC type early for PCI setup */
2293         e1000_set_mac_type(&adapter->hw);
2294
2295         /* Are we a VF device? */
2296         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2297             (adapter->hw.mac.type == e1000_vfadapt_i350))
2298                 adapter->vf_ifp = 1;
2299         else
2300                 adapter->vf_ifp = 0;
2301 }
2302
2303 static int
2304 igb_allocate_pci_resources(struct adapter *adapter)
2305 {
2306         device_t        dev = adapter->dev;
2307         int             rid;
2308
2309         rid = PCIR_BAR(0);
2310         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2311             &rid, RF_ACTIVE);
2312         if (adapter->pci_mem == NULL) {
2313                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2314                 return (ENXIO);
2315         }
2316         adapter->osdep.mem_bus_space_tag =
2317             rman_get_bustag(adapter->pci_mem);
2318         adapter->osdep.mem_bus_space_handle =
2319             rman_get_bushandle(adapter->pci_mem);
2320         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2321
2322         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2323
2324         /* This will setup either MSI/X or MSI */
2325         adapter->msix = igb_setup_msix(adapter);
2326         adapter->hw.back = &adapter->osdep;
2327
2328         return (0);
2329 }
2330
2331 /*********************************************************************
2332  *
2333  *  Setup the Legacy or MSI Interrupt handler
2334  *
2335  **********************************************************************/
2336 static int
2337 igb_allocate_legacy(struct adapter *adapter)
2338 {
2339         device_t                dev = adapter->dev;
2340         struct igb_queue        *que = adapter->queues;
2341 #ifndef IGB_LEGACY_TX
2342         struct tx_ring          *txr = adapter->tx_rings;
2343 #endif
2344         int                     error, rid = 0;
2345
2346         /* Turn off all interrupts */
2347         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2348
2349         /* MSI RID is 1 */
2350         if (adapter->msix == 1)
2351                 rid = 1;
2352
2353         /* We allocate a single interrupt resource */
2354         adapter->res = bus_alloc_resource_any(dev,
2355             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2356         if (adapter->res == NULL) {
2357                 device_printf(dev, "Unable to allocate bus resource: "
2358                     "interrupt\n");
2359                 return (ENXIO);
2360         }
2361
2362 #ifndef IGB_LEGACY_TX
2363         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2364 #endif
2365
2366         /*
2367          * Try allocating a fast interrupt and the associated deferred
2368          * processing contexts.
2369          */
2370         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2371         /* Make tasklet for deferred link handling */
2372         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2373         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2374             taskqueue_thread_enqueue, &que->tq);
2375         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2376             device_get_nameunit(adapter->dev));
2377         if ((error = bus_setup_intr(dev, adapter->res,
2378             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2379             adapter, &adapter->tag)) != 0) {
2380                 device_printf(dev, "Failed to register fast interrupt "
2381                             "handler: %d\n", error);
2382                 taskqueue_free(que->tq);
2383                 que->tq = NULL;
2384                 return (error);
2385         }
2386
2387         return (0);
2388 }
2389
2390
2391 /*********************************************************************
2392  *
2393  *  Setup the MSIX Queue Interrupt handlers: 
2394  *
2395  **********************************************************************/
2396 static int
2397 igb_allocate_msix(struct adapter *adapter)
2398 {
2399         device_t                dev = adapter->dev;
2400         struct igb_queue        *que = adapter->queues;
2401         int                     error, rid, vector = 0;
2402         int                     cpu_id = 0;
2403 #ifdef  RSS
2404         cpuset_t cpu_mask;
2405 #endif
2406
2407         /* Be sure to start with all interrupts disabled */
2408         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2409         E1000_WRITE_FLUSH(&adapter->hw);
2410
2411 #ifdef  RSS
2412         /*
2413          * If we're doing RSS, the number of queues needs to
2414          * match the number of RSS buckets that are configured.
2415          *
2416          * + If there's more queues than RSS buckets, we'll end
2417          *   up with queues that get no traffic.
2418          *
2419          * + If there's more RSS buckets than queues, we'll end
2420          *   up having multiple RSS buckets map to the same queue,
2421          *   so there'll be some contention.
2422          */
2423         if (adapter->num_queues != rss_getnumbuckets()) {
2424                 device_printf(dev,
2425                     "%s: number of queues (%d) != number of RSS buckets (%d)"
2426                     "; performance will be impacted.\n",
2427                     __func__,
2428                     adapter->num_queues,
2429                     rss_getnumbuckets());
2430         }
2431 #endif
2432
2433         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2434                 rid = vector +1;
2435                 que->res = bus_alloc_resource_any(dev,
2436                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2437                 if (que->res == NULL) {
2438                         device_printf(dev,
2439                             "Unable to allocate bus resource: "
2440                             "MSIX Queue Interrupt\n");
2441                         return (ENXIO);
2442                 }
2443                 error = bus_setup_intr(dev, que->res,
2444                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2445                     igb_msix_que, que, &que->tag);
2446                 if (error) {
2447                         que->res = NULL;
2448                         device_printf(dev, "Failed to register Queue handler");
2449                         return (error);
2450                 }
2451 #if __FreeBSD_version >= 800504
2452                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2453 #endif
2454                 que->msix = vector;
2455                 if (adapter->hw.mac.type == e1000_82575)
2456                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2457                 else
2458                         que->eims = 1 << vector;
2459
2460 #ifdef  RSS
2461                 /*
2462                  * The queue ID is used as the RSS layer bucket ID.
2463                  * We look up the queue ID -> RSS CPU ID and select
2464                  * that.
2465                  */
2466                 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2467 #else
2468                 /*
2469                  * Bind the msix vector, and thus the
2470                  * rings to the corresponding cpu.
2471                  *
2472                  * This just happens to match the default RSS round-robin
2473                  * bucket -> queue -> CPU allocation.
2474                  */
2475                 if (adapter->num_queues > 1) {
2476                         if (igb_last_bind_cpu < 0)
2477                                 igb_last_bind_cpu = CPU_FIRST();
2478                         cpu_id = igb_last_bind_cpu;
2479                 }
2480 #endif
2481
2482                 if (adapter->num_queues > 1) {
2483                         bus_bind_intr(dev, que->res, cpu_id);
2484 #ifdef  RSS
2485                         device_printf(dev,
2486                                 "Bound queue %d to RSS bucket %d\n",
2487                                 i, cpu_id);
2488 #else
2489                         device_printf(dev,
2490                                 "Bound queue %d to cpu %d\n",
2491                                 i, cpu_id);
2492 #endif
2493                 }
2494
2495 #ifndef IGB_LEGACY_TX
2496                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2497                     que->txr);
2498 #endif
2499                 /* Make tasklet for deferred handling */
2500                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2501                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2502                     taskqueue_thread_enqueue, &que->tq);
2503                 if (adapter->num_queues > 1) {
2504                         /*
2505                          * Only pin the taskqueue thread to a CPU if
2506                          * RSS is in use.
2507                          *
2508                          * This again just happens to match the default RSS
2509                          * round-robin bucket -> queue -> CPU allocation.
2510                          */
2511 #ifdef  RSS
2512                         CPU_SETOF(cpu_id, &cpu_mask);
2513                         taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2514                             &cpu_mask,
2515                             "%s que (bucket %d)",
2516                             device_get_nameunit(adapter->dev),
2517                             cpu_id);
2518 #else
2519                         taskqueue_start_threads(&que->tq, 1, PI_NET,
2520                             "%s que (qid %d)",
2521                             device_get_nameunit(adapter->dev),
2522                             cpu_id);
2523 #endif
2524                 } else {
2525                         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2526                             device_get_nameunit(adapter->dev));
2527                 }
2528
2529                 /* Finally update the last bound CPU id */
2530                 if (adapter->num_queues > 1)
2531                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2532         }
2533
2534         /* And Link */
2535         rid = vector + 1;
2536         adapter->res = bus_alloc_resource_any(dev,
2537             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2538         if (adapter->res == NULL) {
2539                 device_printf(dev,
2540                     "Unable to allocate bus resource: "
2541                     "MSIX Link Interrupt\n");
2542                 return (ENXIO);
2543         }
2544         if ((error = bus_setup_intr(dev, adapter->res,
2545             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2546             igb_msix_link, adapter, &adapter->tag)) != 0) {
2547                 device_printf(dev, "Failed to register Link handler");
2548                 return (error);
2549         }
2550 #if __FreeBSD_version >= 800504
2551         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2552 #endif
2553         adapter->linkvec = vector;
2554
2555         return (0);
2556 }
2557
2558
2559 static void
2560 igb_configure_queues(struct adapter *adapter)
2561 {
2562         struct  e1000_hw        *hw = &adapter->hw;
2563         struct  igb_queue       *que;
2564         u32                     tmp, ivar = 0, newitr = 0;
2565
2566         /* First turn on RSS capability */
2567         if (adapter->hw.mac.type != e1000_82575)
2568                 E1000_WRITE_REG(hw, E1000_GPIE,
2569                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2570                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2571
2572         /* Turn on MSIX */
2573         switch (adapter->hw.mac.type) {
2574         case e1000_82580:
2575         case e1000_i350:
2576         case e1000_i354:
2577         case e1000_i210:
2578         case e1000_i211:
2579         case e1000_vfadapt:
2580         case e1000_vfadapt_i350:
2581                 /* RX entries */
2582                 for (int i = 0; i < adapter->num_queues; i++) {
2583                         u32 index = i >> 1;
2584                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2585                         que = &adapter->queues[i];
2586                         if (i & 1) {
2587                                 ivar &= 0xFF00FFFF;
2588                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2589                         } else {
2590                                 ivar &= 0xFFFFFF00;
2591                                 ivar |= que->msix | E1000_IVAR_VALID;
2592                         }
2593                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2594                 }
2595                 /* TX entries */
2596                 for (int i = 0; i < adapter->num_queues; i++) {
2597                         u32 index = i >> 1;
2598                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2599                         que = &adapter->queues[i];
2600                         if (i & 1) {
2601                                 ivar &= 0x00FFFFFF;
2602                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2603                         } else {
2604                                 ivar &= 0xFFFF00FF;
2605                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2606                         }
2607                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2608                         adapter->que_mask |= que->eims;
2609                 }
2610
2611                 /* And for the link interrupt */
2612                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2613                 adapter->link_mask = 1 << adapter->linkvec;
2614                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2615                 break;
2616         case e1000_82576:
2617                 /* RX entries */
2618                 for (int i = 0; i < adapter->num_queues; i++) {
2619                         u32 index = i & 0x7; /* Each IVAR has two entries */
2620                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2621                         que = &adapter->queues[i];
2622                         if (i < 8) {
2623                                 ivar &= 0xFFFFFF00;
2624                                 ivar |= que->msix | E1000_IVAR_VALID;
2625                         } else {
2626                                 ivar &= 0xFF00FFFF;
2627                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2628                         }
2629                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2630                         adapter->que_mask |= que->eims;
2631                 }
2632                 /* TX entries */
2633                 for (int i = 0; i < adapter->num_queues; i++) {
2634                         u32 index = i & 0x7; /* Each IVAR has two entries */
2635                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2636                         que = &adapter->queues[i];
2637                         if (i < 8) {
2638                                 ivar &= 0xFFFF00FF;
2639                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2640                         } else {
2641                                 ivar &= 0x00FFFFFF;
2642                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2643                         }
2644                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2645                         adapter->que_mask |= que->eims;
2646                 }
2647
2648                 /* And for the link interrupt */
2649                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2650                 adapter->link_mask = 1 << adapter->linkvec;
2651                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2652                 break;
2653
2654         case e1000_82575:
2655                 /* enable MSI-X support*/
2656                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2657                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2658                 /* Auto-Mask interrupts upon ICR read. */
2659                 tmp |= E1000_CTRL_EXT_EIAME;
2660                 tmp |= E1000_CTRL_EXT_IRCA;
2661                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2662
2663                 /* Queues */
2664                 for (int i = 0; i < adapter->num_queues; i++) {
2665                         que = &adapter->queues[i];
2666                         tmp = E1000_EICR_RX_QUEUE0 << i;
2667                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2668                         que->eims = tmp;
2669                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2670                             i, que->eims);
2671                         adapter->que_mask |= que->eims;
2672                 }
2673
2674                 /* Link */
2675                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2676                     E1000_EIMS_OTHER);
2677                 adapter->link_mask |= E1000_EIMS_OTHER;
2678         default:
2679                 break;
2680         }
2681
2682         /* Set the starting interrupt rate */
2683         if (igb_max_interrupt_rate > 0)
2684                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2685
2686         if (hw->mac.type == e1000_82575)
2687                 newitr |= newitr << 16;
2688         else
2689                 newitr |= E1000_EITR_CNT_IGNR;
2690
2691         for (int i = 0; i < adapter->num_queues; i++) {
2692                 que = &adapter->queues[i];
2693                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2694         }
2695
2696         return;
2697 }
2698
2699
2700 static void
2701 igb_free_pci_resources(struct adapter *adapter)
2702 {
2703         struct          igb_queue *que = adapter->queues;
2704         device_t        dev = adapter->dev;
2705         int             rid;
2706
2707         /*
2708         ** There is a slight possibility of a failure mode
2709         ** in attach that will result in entering this function
2710         ** before interrupt resources have been initialized, and
2711         ** in that case we do not want to execute the loops below
2712         ** We can detect this reliably by the state of the adapter
2713         ** res pointer.
2714         */
2715         if (adapter->res == NULL)
2716                 goto mem;
2717
2718         /*
2719          * First release all the interrupt resources:
2720          */
2721         for (int i = 0; i < adapter->num_queues; i++, que++) {
2722                 rid = que->msix + 1;
2723                 if (que->tag != NULL) {
2724                         bus_teardown_intr(dev, que->res, que->tag);
2725                         que->tag = NULL;
2726                 }
2727                 if (que->res != NULL)
2728                         bus_release_resource(dev,
2729                             SYS_RES_IRQ, rid, que->res);
2730         }
2731
2732         /* Clean the Legacy or Link interrupt last */
2733         if (adapter->linkvec) /* we are doing MSIX */
2734                 rid = adapter->linkvec + 1;
2735         else
2736                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2737
2738         que = adapter->queues;
2739         if (adapter->tag != NULL) {
2740                 taskqueue_drain(que->tq, &adapter->link_task);
2741                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2742                 adapter->tag = NULL;
2743         }
2744         if (adapter->res != NULL)
2745                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2746
2747         for (int i = 0; i < adapter->num_queues; i++, que++) {
2748                 if (que->tq != NULL) {
2749 #ifndef IGB_LEGACY_TX
2750                         taskqueue_drain(que->tq, &que->txr->txq_task);
2751 #endif
2752                         taskqueue_drain(que->tq, &que->que_task);
2753                         taskqueue_free(que->tq);
2754                 }
2755         }
2756 mem:
2757         if (adapter->msix)
2758                 pci_release_msi(dev);
2759
2760         if (adapter->msix_mem != NULL)
2761                 bus_release_resource(dev, SYS_RES_MEMORY,
2762                     adapter->memrid, adapter->msix_mem);
2763
2764         if (adapter->pci_mem != NULL)
2765                 bus_release_resource(dev, SYS_RES_MEMORY,
2766                     PCIR_BAR(0), adapter->pci_mem);
2767
2768 }
2769
2770 /*
2771  * Setup Either MSI/X or MSI
2772  */
2773 static int
2774 igb_setup_msix(struct adapter *adapter)
2775 {
2776         device_t        dev = adapter->dev;
2777         int             bar, want, queues, msgs, maxqueues;
2778
2779         /* tuneable override */
2780         if (igb_enable_msix == 0)
2781                 goto msi;
2782
2783         /* First try MSI/X */
2784         msgs = pci_msix_count(dev); 
2785         if (msgs == 0)
2786                 goto msi;
2787         /*
2788         ** Some new devices, as with ixgbe, now may
2789         ** use a different BAR, so we need to keep
2790         ** track of which is used.
2791         */
2792         adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2793         bar = pci_read_config(dev, adapter->memrid, 4);
2794         if (bar == 0) /* use next bar */
2795                 adapter->memrid += 4;
2796         adapter->msix_mem = bus_alloc_resource_any(dev,
2797             SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2798         if (adapter->msix_mem == NULL) {
2799                 /* May not be enabled */
2800                 device_printf(adapter->dev,
2801                     "Unable to map MSIX table \n");
2802                 goto msi;
2803         }
2804
2805         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2806
2807         /* Override via tuneable */
2808         if (igb_num_queues != 0)
2809                 queues = igb_num_queues;
2810
2811 #ifdef  RSS
2812         /* If we're doing RSS, clamp at the number of RSS buckets */
2813         if (queues > rss_getnumbuckets())
2814                 queues = rss_getnumbuckets();
2815 #endif
2816
2817
2818         /* Sanity check based on HW */
2819         switch (adapter->hw.mac.type) {
2820                 case e1000_82575:
2821                         maxqueues = 4;
2822                         break;
2823                 case e1000_82576:
2824                 case e1000_82580:
2825                 case e1000_i350:
2826                 case e1000_i354:
2827                         maxqueues = 8;
2828                         break;
2829                 case e1000_i210:
2830                         maxqueues = 4;
2831                         break;
2832                 case e1000_i211:
2833                         maxqueues = 2;
2834                         break;
2835                 default:  /* VF interfaces */
2836                         maxqueues = 1;
2837                         break;
2838         }
2839
2840         /* Final clamp on the actual hardware capability */
2841         if (queues > maxqueues)
2842                 queues = maxqueues;
2843
2844         /*
2845         ** One vector (RX/TX pair) per queue
2846         ** plus an additional for Link interrupt
2847         */
2848         want = queues + 1;
2849         if (msgs >= want)
2850                 msgs = want;
2851         else {
2852                 device_printf(adapter->dev,
2853                     "MSIX Configuration Problem, "
2854                     "%d vectors configured, but %d queues wanted!\n",
2855                     msgs, want);
2856                 goto msi;
2857         }
2858         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2859                 device_printf(adapter->dev,
2860                     "Using MSIX interrupts with %d vectors\n", msgs);
2861                 adapter->num_queues = queues;
2862                 return (msgs);
2863         }
2864         /*
2865         ** If MSIX alloc failed or provided us with
2866         ** less than needed, free and fall through to MSI
2867         */
2868         pci_release_msi(dev);
2869
2870 msi:
2871         if (adapter->msix_mem != NULL) {
2872                 bus_release_resource(dev, SYS_RES_MEMORY,
2873                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2874                 adapter->msix_mem = NULL;
2875         }
2876         msgs = 1;
2877         if (pci_alloc_msi(dev, &msgs) == 0) {
2878                 device_printf(adapter->dev," Using an MSI interrupt\n");
2879                 return (msgs);
2880         }
2881         device_printf(adapter->dev," Using a Legacy interrupt\n");
2882         return (0);
2883 }
2884
2885 /*********************************************************************
2886  *
2887  *  Initialize the DMA Coalescing feature
2888  *
2889  **********************************************************************/
2890 static void
2891 igb_init_dmac(struct adapter *adapter, u32 pba)
2892 {
2893         device_t        dev = adapter->dev;
2894         struct e1000_hw *hw = &adapter->hw;
2895         u32             dmac, reg = ~E1000_DMACR_DMAC_EN;
2896         u16             hwm;
2897
2898         if (hw->mac.type == e1000_i211)
2899                 return;
2900
2901         if (hw->mac.type > e1000_82580) {
2902
2903                 if (adapter->dmac == 0) { /* Disabling it */
2904                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2905                         return;
2906                 } else
2907                         device_printf(dev, "DMA Coalescing enabled\n");
2908
2909                 /* Set starting threshold */
2910                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2911
2912                 hwm = 64 * pba - adapter->max_frame_size / 16;
2913                 if (hwm < 64 * (pba - 6))
2914                         hwm = 64 * (pba - 6);
2915                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2916                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2917                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2918                     & E1000_FCRTC_RTH_COAL_MASK);
2919                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2920
2921
2922                 dmac = pba - adapter->max_frame_size / 512;
2923                 if (dmac < pba - 10)
2924                         dmac = pba - 10;
2925                 reg = E1000_READ_REG(hw, E1000_DMACR);
2926                 reg &= ~E1000_DMACR_DMACTHR_MASK;
2927                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2928                     & E1000_DMACR_DMACTHR_MASK);
2929
2930                 /* transition to L0x or L1 if available..*/
2931                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2932
2933                 /* Check if status is 2.5Gb backplane connection
2934                 * before configuration of watchdog timer, which is
2935                 * in msec values in 12.8usec intervals
2936                 * watchdog timer= msec values in 32usec intervals
2937                 * for non 2.5Gb connection
2938                 */
2939                 if (hw->mac.type == e1000_i354) {
2940                         int status = E1000_READ_REG(hw, E1000_STATUS);
2941                         if ((status & E1000_STATUS_2P5_SKU) &&
2942                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2943                                 reg |= ((adapter->dmac * 5) >> 6);
2944                         else
2945                                 reg |= (adapter->dmac >> 5);
2946                 } else {
2947                         reg |= (adapter->dmac >> 5);
2948                 }
2949
2950                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2951
2952                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2953
2954                 /* Set the interval before transition */
2955                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2956                 if (hw->mac.type == e1000_i350)
2957                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
2958                 /*
2959                 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2960                 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2961                 */
2962                 if (hw->mac.type == e1000_i354) {
2963                         int status = E1000_READ_REG(hw, E1000_STATUS);
2964                         if ((status & E1000_STATUS_2P5_SKU) &&
2965                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2966                                 reg |= 0xA;
2967                         else
2968                                 reg |= 0x4;
2969                 } else {
2970                         reg |= 0x4;
2971                 }
2972
2973                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2974
2975                 /* free space in tx packet buffer to wake from DMA coal */
2976                 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2977                     (2 * adapter->max_frame_size)) >> 6);
2978
2979                 /* make low power state decision controlled by DMA coal */
2980                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2981                 reg &= ~E1000_PCIEMISC_LX_DECISION;
2982                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2983
2984         } else if (hw->mac.type == e1000_82580) {
2985                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2986                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2987                     reg & ~E1000_PCIEMISC_LX_DECISION);
2988                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
2989         }
2990 }
2991
2992
2993 /*********************************************************************
2994  *
2995  *  Set up an fresh starting state
2996  *
2997  **********************************************************************/
2998 static void
2999 igb_reset(struct adapter *adapter)
3000 {
3001         device_t        dev = adapter->dev;
3002         struct e1000_hw *hw = &adapter->hw;
3003         struct e1000_fc_info *fc = &hw->fc;
3004         struct ifnet    *ifp = adapter->ifp;
3005         u32             pba = 0;
3006         u16             hwm;
3007
3008         INIT_DEBUGOUT("igb_reset: begin");
3009
3010         /* Let the firmware know the OS is in control */
3011         igb_get_hw_control(adapter);
3012
3013         /*
3014          * Packet Buffer Allocation (PBA)
3015          * Writing PBA sets the receive portion of the buffer
3016          * the remainder is used for the transmit buffer.
3017          */
3018         switch (hw->mac.type) {
3019         case e1000_82575:
3020                 pba = E1000_PBA_32K;
3021                 break;
3022         case e1000_82576:
3023         case e1000_vfadapt:
3024                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3025                 pba &= E1000_RXPBS_SIZE_MASK_82576;
3026                 break;
3027         case e1000_82580:
3028         case e1000_i350:
3029         case e1000_i354:
3030         case e1000_vfadapt_i350:
3031                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3032                 pba = e1000_rxpbs_adjust_82580(pba);
3033                 break;
3034         case e1000_i210:
3035         case e1000_i211:
3036                 pba = E1000_PBA_34K;
3037         default:
3038                 break;
3039         }
3040
3041         /* Special needs in case of Jumbo frames */
3042         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3043                 u32 tx_space, min_tx, min_rx;
3044                 pba = E1000_READ_REG(hw, E1000_PBA);
3045                 tx_space = pba >> 16;
3046                 pba &= 0xffff;
3047                 min_tx = (adapter->max_frame_size +
3048                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3049                 min_tx = roundup2(min_tx, 1024);
3050                 min_tx >>= 10;
3051                 min_rx = adapter->max_frame_size;
3052                 min_rx = roundup2(min_rx, 1024);
3053                 min_rx >>= 10;
3054                 if (tx_space < min_tx &&
3055                     ((min_tx - tx_space) < pba)) {
3056                         pba = pba - (min_tx - tx_space);
3057                         /*
3058                          * if short on rx space, rx wins
3059                          * and must trump tx adjustment
3060                          */
3061                         if (pba < min_rx)
3062                                 pba = min_rx;
3063                 }
3064                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3065         }
3066
3067         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3068
3069         /*
3070          * These parameters control the automatic generation (Tx) and
3071          * response (Rx) to Ethernet PAUSE frames.
3072          * - High water mark should allow for at least two frames to be
3073          *   received after sending an XOFF.
3074          * - Low water mark works best when it is very near the high water mark.
3075          *   This allows the receiver to restart by sending XON when it has
3076          *   drained a bit.
3077          */
3078         hwm = min(((pba << 10) * 9 / 10),
3079             ((pba << 10) - 2 * adapter->max_frame_size));
3080
3081         if (hw->mac.type < e1000_82576) {
3082                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3083                 fc->low_water = fc->high_water - 8;
3084         } else {
3085                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3086                 fc->low_water = fc->high_water - 16;
3087         }
3088
3089         fc->pause_time = IGB_FC_PAUSE_TIME;
3090         fc->send_xon = TRUE;
3091         if (adapter->fc)
3092                 fc->requested_mode = adapter->fc;
3093         else
3094                 fc->requested_mode = e1000_fc_default;
3095
3096         /* Issue a global reset */
3097         e1000_reset_hw(hw);
3098         E1000_WRITE_REG(hw, E1000_WUC, 0);
3099
3100         /* Reset for AutoMediaDetect */
3101         if (adapter->flags & IGB_MEDIA_RESET) {
3102                 e1000_setup_init_funcs(hw, TRUE);
3103                 e1000_get_bus_info(hw);
3104                 adapter->flags &= ~IGB_MEDIA_RESET;
3105         }
3106
3107         if (e1000_init_hw(hw) < 0)
3108                 device_printf(dev, "Hardware Initialization Failed\n");
3109
3110         /* Setup DMA Coalescing */
3111         igb_init_dmac(adapter, pba);
3112
3113         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3114         e1000_get_phy_info(hw);
3115         e1000_check_for_link(hw);
3116         return;
3117 }
3118
3119 /*********************************************************************
3120  *
3121  *  Setup networking device structure and register an interface.
3122  *
3123  **********************************************************************/
3124 static int
3125 igb_setup_interface(device_t dev, struct adapter *adapter)
3126 {
3127         struct ifnet   *ifp;
3128
3129         INIT_DEBUGOUT("igb_setup_interface: begin");
3130
3131         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3132         if (ifp == NULL) {
3133                 device_printf(dev, "can not allocate ifnet structure\n");
3134                 return (-1);
3135         }
3136         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3137         ifp->if_init =  igb_init;
3138         ifp->if_softc = adapter;
3139         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3140         ifp->if_ioctl = igb_ioctl;
3141         ifp->if_get_counter = igb_get_counter;
3142
3143         /* TSO parameters */
3144         ifp->if_hw_tsomax = IP_MAXPACKET;
3145         ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3146         ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3147
3148 #ifndef IGB_LEGACY_TX
3149         ifp->if_transmit = igb_mq_start;
3150         ifp->if_qflush = igb_qflush;
3151 #else
3152         ifp->if_start = igb_start;
3153         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3154         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3155         IFQ_SET_READY(&ifp->if_snd);
3156 #endif
3157
3158         ether_ifattach(ifp, adapter->hw.mac.addr);
3159
3160         ifp->if_capabilities = ifp->if_capenable = 0;
3161
3162         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3163         ifp->if_capabilities |= IFCAP_TSO;
3164         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3165         ifp->if_capenable = ifp->if_capabilities;
3166
3167         /* Don't enable LRO by default */
3168         ifp->if_capabilities |= IFCAP_LRO;
3169
3170 #ifdef DEVICE_POLLING
3171         ifp->if_capabilities |= IFCAP_POLLING;
3172 #endif
3173
3174         /*
3175          * Tell the upper layer(s) we
3176          * support full VLAN capability.
3177          */
3178         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3179         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3180                              |  IFCAP_VLAN_HWTSO
3181                              |  IFCAP_VLAN_MTU;
3182         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3183                           |  IFCAP_VLAN_HWTSO
3184                           |  IFCAP_VLAN_MTU;
3185
3186         /*
3187         ** Don't turn this on by default, if vlans are
3188         ** created on another pseudo device (eg. lagg)
3189         ** then vlan events are not passed thru, breaking
3190         ** operation, but with HW FILTER off it works. If
3191         ** using vlans directly on the igb driver you can
3192         ** enable this and get full hardware tag filtering.
3193         */
3194         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3195
3196         /*
3197          * Specify the media types supported by this adapter and register
3198          * callbacks to update media and link information
3199          */
3200         ifmedia_init(&adapter->media, IFM_IMASK,
3201             igb_media_change, igb_media_status);
3202         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3203             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3204                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3205                             0, NULL);
3206                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3207         } else {
3208                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3209                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3210                             0, NULL);
3211                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3212                             0, NULL);
3213                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3214                             0, NULL);
3215                 if (adapter->hw.phy.type != e1000_phy_ife) {
3216                         ifmedia_add(&adapter->media,
3217                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3218                         ifmedia_add(&adapter->media,
3219                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3220                 }
3221         }
3222         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3223         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3224         return (0);
3225 }
3226
3227
3228 /*
3229  * Manage DMA'able memory.
3230  */
3231 static void
3232 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3233 {
3234         if (error)
3235                 return;
3236         *(bus_addr_t *) arg = segs[0].ds_addr;
3237 }
3238
3239 static int
3240 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3241         struct igb_dma_alloc *dma, int mapflags)
3242 {
3243         int error;
3244
3245         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3246                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3247                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3248                                 BUS_SPACE_MAXADDR,      /* highaddr */
3249                                 NULL, NULL,             /* filter, filterarg */
3250                                 size,                   /* maxsize */
3251                                 1,                      /* nsegments */
3252                                 size,                   /* maxsegsize */
3253                                 0,                      /* flags */
3254                                 NULL,                   /* lockfunc */
3255                                 NULL,                   /* lockarg */
3256                                 &dma->dma_tag);
3257         if (error) {
3258                 device_printf(adapter->dev,
3259                     "%s: bus_dma_tag_create failed: %d\n",
3260                     __func__, error);
3261                 goto fail_0;
3262         }
3263
3264         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3265             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3266         if (error) {
3267                 device_printf(adapter->dev,
3268                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3269                     __func__, (uintmax_t)size, error);
3270                 goto fail_2;
3271         }
3272
3273         dma->dma_paddr = 0;
3274         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3275             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3276         if (error || dma->dma_paddr == 0) {
3277                 device_printf(adapter->dev,
3278                     "%s: bus_dmamap_load failed: %d\n",
3279                     __func__, error);
3280                 goto fail_3;
3281         }
3282
3283         return (0);
3284
3285 fail_3:
3286         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3287 fail_2:
3288         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3289         bus_dma_tag_destroy(dma->dma_tag);
3290 fail_0:
3291         dma->dma_tag = NULL;
3292
3293         return (error);
3294 }
3295
3296 static void
3297 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3298 {
3299         if (dma->dma_tag == NULL)
3300                 return;
3301         if (dma->dma_paddr != 0) {
3302                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3303                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3304                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3305                 dma->dma_paddr = 0;
3306         }
3307         if (dma->dma_vaddr != NULL) {
3308                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3309                 dma->dma_vaddr = NULL;
3310         }
3311         bus_dma_tag_destroy(dma->dma_tag);
3312         dma->dma_tag = NULL;
3313 }
3314
3315
3316 /*********************************************************************
3317  *
3318  *  Allocate memory for the transmit and receive rings, and then
3319  *  the descriptors associated with each, called only once at attach.
3320  *
3321  **********************************************************************/
3322 static int
3323 igb_allocate_queues(struct adapter *adapter)
3324 {
3325         device_t dev = adapter->dev;
3326         struct igb_queue        *que = NULL;
3327         struct tx_ring          *txr = NULL;
3328         struct rx_ring          *rxr = NULL;
3329         int rsize, tsize, error = E1000_SUCCESS;
3330         int txconf = 0, rxconf = 0;
3331
3332         /* First allocate the top level queue structs */
3333         if (!(adapter->queues =
3334             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3335             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3336                 device_printf(dev, "Unable to allocate queue memory\n");
3337                 error = ENOMEM;
3338                 goto fail;
3339         }
3340
3341         /* Next allocate the TX ring struct memory */
3342         if (!(adapter->tx_rings =
3343             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3344             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3345                 device_printf(dev, "Unable to allocate TX ring memory\n");
3346                 error = ENOMEM;
3347                 goto tx_fail;
3348         }
3349
3350         /* Now allocate the RX */
3351         if (!(adapter->rx_rings =
3352             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3353             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3354                 device_printf(dev, "Unable to allocate RX ring memory\n");
3355                 error = ENOMEM;
3356                 goto rx_fail;
3357         }
3358
3359         tsize = roundup2(adapter->num_tx_desc *
3360             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3361         /*
3362          * Now set up the TX queues, txconf is needed to handle the
3363          * possibility that things fail midcourse and we need to
3364          * undo memory gracefully
3365          */ 
3366         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3367                 /* Set up some basics */
3368                 txr = &adapter->tx_rings[i];
3369                 txr->adapter = adapter;
3370                 txr->me = i;
3371                 txr->num_desc = adapter->num_tx_desc;
3372
3373                 /* Initialize the TX lock */
3374                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3375                     device_get_nameunit(dev), txr->me);
3376                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3377
3378                 if (igb_dma_malloc(adapter, tsize,
3379                         &txr->txdma, BUS_DMA_NOWAIT)) {
3380                         device_printf(dev,
3381                             "Unable to allocate TX Descriptor memory\n");
3382                         error = ENOMEM;
3383                         goto err_tx_desc;
3384                 }
3385                 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3386                 bzero((void *)txr->tx_base, tsize);
3387
3388                 /* Now allocate transmit buffers for the ring */
3389                 if (igb_allocate_transmit_buffers(txr)) {
3390                         device_printf(dev,
3391                             "Critical Failure setting up transmit buffers\n");
3392                         error = ENOMEM;
3393                         goto err_tx_desc;
3394                 }
3395 #ifndef IGB_LEGACY_TX
3396                 /* Allocate a buf ring */
3397                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3398                     M_WAITOK, &txr->tx_mtx);
3399 #endif
3400         }
3401
3402         /*
3403          * Next the RX queues...
3404          */ 
3405         rsize = roundup2(adapter->num_rx_desc *
3406             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3407         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3408                 rxr = &adapter->rx_rings[i];
3409                 rxr->adapter = adapter;
3410                 rxr->me = i;
3411
3412                 /* Initialize the RX lock */
3413                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3414                     device_get_nameunit(dev), txr->me);
3415                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3416
3417                 if (igb_dma_malloc(adapter, rsize,
3418                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3419                         device_printf(dev,
3420                             "Unable to allocate RxDescriptor memory\n");
3421                         error = ENOMEM;
3422                         goto err_rx_desc;
3423                 }
3424                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3425                 bzero((void *)rxr->rx_base, rsize);
3426
3427                 /* Allocate receive buffers for the ring*/
3428                 if (igb_allocate_receive_buffers(rxr)) {
3429                         device_printf(dev,
3430                             "Critical Failure setting up receive buffers\n");
3431                         error = ENOMEM;
3432                         goto err_rx_desc;
3433                 }
3434         }
3435
3436         /*
3437         ** Finally set up the queue holding structs
3438         */
3439         for (int i = 0; i < adapter->num_queues; i++) {
3440                 que = &adapter->queues[i];
3441                 que->adapter = adapter;
3442                 que->txr = &adapter->tx_rings[i];
3443                 que->rxr = &adapter->rx_rings[i];
3444         }
3445
3446         return (0);
3447
3448 err_rx_desc:
3449         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3450                 igb_dma_free(adapter, &rxr->rxdma);
3451 err_tx_desc:
3452         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3453                 igb_dma_free(adapter, &txr->txdma);
3454         free(adapter->rx_rings, M_DEVBUF);
3455 rx_fail:
3456 #ifndef IGB_LEGACY_TX
3457         buf_ring_free(txr->br, M_DEVBUF);
3458 #endif
3459         free(adapter->tx_rings, M_DEVBUF);
3460 tx_fail:
3461         free(adapter->queues, M_DEVBUF);
3462 fail:
3463         return (error);
3464 }
3465
3466 /*********************************************************************
3467  *
3468  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3469  *  the information needed to transmit a packet on the wire. This is
3470  *  called only once at attach, setup is done every reset.
3471  *
3472  **********************************************************************/
3473 static int
3474 igb_allocate_transmit_buffers(struct tx_ring *txr)
3475 {
3476         struct adapter *adapter = txr->adapter;
3477         device_t dev = adapter->dev;
3478         struct igb_tx_buf *txbuf;
3479         int error, i;
3480
3481         /*
3482          * Setup DMA descriptor areas.
3483          */
3484         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3485                                1, 0,                    /* alignment, bounds */
3486                                BUS_SPACE_MAXADDR,       /* lowaddr */
3487                                BUS_SPACE_MAXADDR,       /* highaddr */
3488                                NULL, NULL,              /* filter, filterarg */
3489                                IGB_TSO_SIZE,            /* maxsize */
3490                                IGB_MAX_SCATTER,         /* nsegments */
3491                                PAGE_SIZE,               /* maxsegsize */
3492                                0,                       /* flags */
3493                                NULL,                    /* lockfunc */
3494                                NULL,                    /* lockfuncarg */
3495                                &txr->txtag))) {
3496                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3497                 goto fail;
3498         }
3499
3500         if (!(txr->tx_buffers =
3501             (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3502             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3503                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3504                 error = ENOMEM;
3505                 goto fail;
3506         }
3507
3508         /* Create the descriptor buffer dma maps */
3509         txbuf = txr->tx_buffers;
3510         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3511                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3512                 if (error != 0) {
3513                         device_printf(dev, "Unable to create TX DMA map\n");
3514                         goto fail;
3515                 }
3516         }
3517
3518         return 0;
3519 fail:
3520         /* We free all, it handles case where we are in the middle */
3521         igb_free_transmit_structures(adapter);
3522         return (error);
3523 }
3524
3525 /*********************************************************************
3526  *
3527  *  Initialize a transmit ring.
3528  *
3529  **********************************************************************/
3530 static void
3531 igb_setup_transmit_ring(struct tx_ring *txr)
3532 {
3533         struct adapter *adapter = txr->adapter;
3534         struct igb_tx_buf *txbuf;
3535         int i;
3536 #ifdef DEV_NETMAP
3537         struct netmap_adapter *na = NA(adapter->ifp);
3538         struct netmap_slot *slot;
3539 #endif /* DEV_NETMAP */
3540
3541         /* Clear the old descriptor contents */
3542         IGB_TX_LOCK(txr);
3543 #ifdef DEV_NETMAP
3544         slot = netmap_reset(na, NR_TX, txr->me, 0);
3545 #endif /* DEV_NETMAP */
3546         bzero((void *)txr->tx_base,
3547               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3548         /* Reset indices */
3549         txr->next_avail_desc = 0;
3550         txr->next_to_clean = 0;
3551
3552         /* Free any existing tx buffers. */
3553         txbuf = txr->tx_buffers;
3554         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3555                 if (txbuf->m_head != NULL) {
3556                         bus_dmamap_sync(txr->txtag, txbuf->map,
3557                             BUS_DMASYNC_POSTWRITE);
3558                         bus_dmamap_unload(txr->txtag, txbuf->map);
3559                         m_freem(txbuf->m_head);
3560                         txbuf->m_head = NULL;
3561                 }
3562 #ifdef DEV_NETMAP
3563                 if (slot) {
3564                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3565                         /* no need to set the address */
3566                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3567                 }
3568 #endif /* DEV_NETMAP */
3569                 /* clear the watch index */
3570                 txbuf->eop = NULL;
3571         }
3572
3573         /* Set number of descriptors available */
3574         txr->tx_avail = adapter->num_tx_desc;
3575
3576         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3577             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3578         IGB_TX_UNLOCK(txr);
3579 }
3580
3581 /*********************************************************************
3582  *
3583  *  Initialize all transmit rings.
3584  *
3585  **********************************************************************/
3586 static void
3587 igb_setup_transmit_structures(struct adapter *adapter)
3588 {
3589         struct tx_ring *txr = adapter->tx_rings;
3590
3591         for (int i = 0; i < adapter->num_queues; i++, txr++)
3592                 igb_setup_transmit_ring(txr);
3593
3594         return;
3595 }
3596
3597 /*********************************************************************
3598  *
3599  *  Enable transmit unit.
3600  *
3601  **********************************************************************/
3602 static void
3603 igb_initialize_transmit_units(struct adapter *adapter)
3604 {
3605         struct tx_ring  *txr = adapter->tx_rings;
3606         struct e1000_hw *hw = &adapter->hw;
3607         u32             tctl, txdctl;
3608
3609         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3610         tctl = txdctl = 0;
3611
3612         /* Setup the Tx Descriptor Rings */
3613         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3614                 u64 bus_addr = txr->txdma.dma_paddr;
3615
3616                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3617                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3618                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3619                     (uint32_t)(bus_addr >> 32));
3620                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3621                     (uint32_t)bus_addr);
3622
3623                 /* Setup the HW Tx Head and Tail descriptor pointers */
3624                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3625                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3626
3627                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3628                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3629                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3630
3631                 txr->queue_status = IGB_QUEUE_IDLE;
3632
3633                 txdctl |= IGB_TX_PTHRESH;
3634                 txdctl |= IGB_TX_HTHRESH << 8;
3635                 txdctl |= IGB_TX_WTHRESH << 16;
3636                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3637                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3638         }
3639
3640         if (adapter->vf_ifp)
3641                 return;
3642
3643         e1000_config_collision_dist(hw);
3644
3645         /* Program the Transmit Control Register */
3646         tctl = E1000_READ_REG(hw, E1000_TCTL);
3647         tctl &= ~E1000_TCTL_CT;
3648         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3649                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3650
3651         /* This write will effectively turn on the transmit unit. */
3652         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3653 }
3654
3655 /*********************************************************************
3656  *
3657  *  Free all transmit rings.
3658  *
3659  **********************************************************************/
3660 static void
3661 igb_free_transmit_structures(struct adapter *adapter)
3662 {
3663         struct tx_ring *txr = adapter->tx_rings;
3664
3665         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3666                 IGB_TX_LOCK(txr);
3667                 igb_free_transmit_buffers(txr);
3668                 igb_dma_free(adapter, &txr->txdma);
3669                 IGB_TX_UNLOCK(txr);
3670                 IGB_TX_LOCK_DESTROY(txr);
3671         }
3672         free(adapter->tx_rings, M_DEVBUF);
3673 }
3674
3675 /*********************************************************************
3676  *
3677  *  Free transmit ring related data structures.
3678  *
3679  **********************************************************************/
3680 static void
3681 igb_free_transmit_buffers(struct tx_ring *txr)
3682 {
3683         struct adapter *adapter = txr->adapter;
3684         struct igb_tx_buf *tx_buffer;
3685         int             i;
3686
3687         INIT_DEBUGOUT("free_transmit_ring: begin");
3688
3689         if (txr->tx_buffers == NULL)
3690                 return;
3691
3692         tx_buffer = txr->tx_buffers;
3693         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3694                 if (tx_buffer->m_head != NULL) {
3695                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3696                             BUS_DMASYNC_POSTWRITE);
3697                         bus_dmamap_unload(txr->txtag,
3698                             tx_buffer->map);
3699                         m_freem(tx_buffer->m_head);
3700                         tx_buffer->m_head = NULL;
3701                         if (tx_buffer->map != NULL) {
3702                                 bus_dmamap_destroy(txr->txtag,
3703                                     tx_buffer->map);
3704                                 tx_buffer->map = NULL;
3705                         }
3706                 } else if (tx_buffer->map != NULL) {
3707                         bus_dmamap_unload(txr->txtag,
3708                             tx_buffer->map);
3709                         bus_dmamap_destroy(txr->txtag,
3710                             tx_buffer->map);
3711                         tx_buffer->map = NULL;
3712                 }
3713         }
3714 #ifndef IGB_LEGACY_TX
3715         if (txr->br != NULL)
3716                 buf_ring_free(txr->br, M_DEVBUF);
3717 #endif
3718         if (txr->tx_buffers != NULL) {
3719                 free(txr->tx_buffers, M_DEVBUF);
3720                 txr->tx_buffers = NULL;
3721         }
3722         if (txr->txtag != NULL) {
3723                 bus_dma_tag_destroy(txr->txtag);
3724                 txr->txtag = NULL;
3725         }
3726         return;
3727 }
3728
3729 /**********************************************************************
3730  *
3731  *  Setup work for hardware segmentation offload (TSO) on
3732  *  adapters using advanced tx descriptors
3733  *
3734  **********************************************************************/
3735 static int
3736 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3737     u32 *cmd_type_len, u32 *olinfo_status)
3738 {
3739         struct adapter *adapter = txr->adapter;
3740         struct e1000_adv_tx_context_desc *TXD;
3741         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3742         u32 mss_l4len_idx = 0, paylen;
3743         u16 vtag = 0, eh_type;
3744         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3745         struct ether_vlan_header *eh;
3746 #ifdef INET6
3747         struct ip6_hdr *ip6;
3748 #endif
3749 #ifdef INET
3750         struct ip *ip;
3751 #endif
3752         struct tcphdr *th;
3753
3754
3755         /*
3756          * Determine where frame payload starts.
3757          * Jump over vlan headers if already present
3758          */
3759         eh = mtod(mp, struct ether_vlan_header *);
3760         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3761                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3762                 eh_type = eh->evl_proto;
3763         } else {
3764                 ehdrlen = ETHER_HDR_LEN;
3765                 eh_type = eh->evl_encap_proto;
3766         }
3767
3768         switch (ntohs(eh_type)) {
3769 #ifdef INET6
3770         case ETHERTYPE_IPV6:
3771                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3772                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3773                 if (ip6->ip6_nxt != IPPROTO_TCP)
3774                         return (ENXIO);
3775                 ip_hlen = sizeof(struct ip6_hdr);
3776                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3777                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3778                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3779                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3780                 break;
3781 #endif
3782 #ifdef INET
3783         case ETHERTYPE_IP:
3784                 ip = (struct ip *)(mp->m_data + ehdrlen);
3785                 if (ip->ip_p != IPPROTO_TCP)
3786                         return (ENXIO);
3787                 ip->ip_sum = 0;
3788                 ip_hlen = ip->ip_hl << 2;
3789                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3790                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3791                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3792                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3793                 /* Tell transmit desc to also do IPv4 checksum. */
3794                 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3795                 break;
3796 #endif
3797         default:
3798                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3799                     __func__, ntohs(eh_type));
3800                 break;
3801         }
3802
3803         ctxd = txr->next_avail_desc;
3804         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3805
3806         tcp_hlen = th->th_off << 2;
3807
3808         /* This is used in the transmit desc in encap */
3809         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3810
3811         /* VLAN MACLEN IPLEN */
3812         if (mp->m_flags & M_VLANTAG) {
3813                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3814                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3815         }
3816
3817         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3818         vlan_macip_lens |= ip_hlen;
3819         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3820
3821         /* ADV DTYPE TUCMD */
3822         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3823         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3824         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3825
3826         /* MSS L4LEN IDX */
3827         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3828         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3829         /* 82575 needs the queue index added */
3830         if (adapter->hw.mac.type == e1000_82575)
3831                 mss_l4len_idx |= txr->me << 4;
3832         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3833
3834         TXD->seqnum_seed = htole32(0);
3835
3836         if (++ctxd == txr->num_desc)
3837                 ctxd = 0;
3838
3839         txr->tx_avail--;
3840         txr->next_avail_desc = ctxd;
3841         *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3842         *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3843         *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3844         ++txr->tso_tx;
3845         return (0);
3846 }
3847
3848 /*********************************************************************
3849  *
3850  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3851  *
3852  **********************************************************************/
3853
3854 static int
3855 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3856     u32 *cmd_type_len, u32 *olinfo_status)
3857 {
3858         struct e1000_adv_tx_context_desc *TXD;
3859         struct adapter *adapter = txr->adapter;
3860         struct ether_vlan_header *eh;
3861         struct ip *ip;
3862         struct ip6_hdr *ip6;
3863         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3864         int     ehdrlen, ip_hlen = 0;
3865         u16     etype;
3866         u8      ipproto = 0;
3867         int     offload = TRUE;
3868         int     ctxd = txr->next_avail_desc;
3869         u16     vtag = 0;
3870
3871         /* First check if TSO is to be used */
3872         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3873                 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3874
3875         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3876                 offload = FALSE;
3877
3878         /* Indicate the whole packet as payload when not doing TSO */
3879         *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3880
3881         /* Now ready a context descriptor */
3882         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3883
3884         /*
3885         ** In advanced descriptors the vlan tag must 
3886         ** be placed into the context descriptor. Hence
3887         ** we need to make one even if not doing offloads.
3888         */
3889         if (mp->m_flags & M_VLANTAG) {
3890                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3891                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3892         } else if (offload == FALSE) /* ... no offload to do */
3893                 return (0);
3894
3895         /*
3896          * Determine where frame payload starts.
3897          * Jump over vlan headers if already present,
3898          * helpful for QinQ too.
3899          */
3900         eh = mtod(mp, struct ether_vlan_header *);
3901         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3902                 etype = ntohs(eh->evl_proto);
3903                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3904         } else {
3905                 etype = ntohs(eh->evl_encap_proto);
3906                 ehdrlen = ETHER_HDR_LEN;
3907         }
3908
3909         /* Set the ether header length */
3910         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3911
3912         switch (etype) {
3913                 case ETHERTYPE_IP:
3914                         ip = (struct ip *)(mp->m_data + ehdrlen);
3915                         ip_hlen = ip->ip_hl << 2;
3916                         ipproto = ip->ip_p;
3917                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3918                         break;
3919                 case ETHERTYPE_IPV6:
3920                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3921                         ip_hlen = sizeof(struct ip6_hdr);
3922                         /* XXX-BZ this will go badly in case of ext hdrs. */
3923                         ipproto = ip6->ip6_nxt;
3924                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3925                         break;
3926                 default:
3927                         offload = FALSE;
3928                         break;
3929         }
3930
3931         vlan_macip_lens |= ip_hlen;
3932         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3933
3934         switch (ipproto) {
3935                 case IPPROTO_TCP:
3936                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3937                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3938                         break;
3939                 case IPPROTO_UDP:
3940                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3941                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3942                         break;
3943
3944 #if __FreeBSD_version >= 800000
3945                 case IPPROTO_SCTP:
3946                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3947                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3948                         break;
3949 #endif
3950                 default:
3951                         offload = FALSE;
3952                         break;
3953         }
3954
3955         if (offload) /* For the TX descriptor setup */
3956                 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3957
3958         /* 82575 needs the queue index added */
3959         if (adapter->hw.mac.type == e1000_82575)
3960                 mss_l4len_idx = txr->me << 4;
3961
3962         /* Now copy bits into descriptor */
3963         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3964         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3965         TXD->seqnum_seed = htole32(0);
3966         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3967
3968         /* We've consumed the first desc, adjust counters */
3969         if (++ctxd == txr->num_desc)
3970                 ctxd = 0;
3971         txr->next_avail_desc = ctxd;
3972         --txr->tx_avail;
3973
3974         return (0);
3975 }
3976
3977 /**********************************************************************
3978  *
3979  *  Examine each tx_buffer in the used queue. If the hardware is done
3980  *  processing the packet then free associated resources. The
3981  *  tx_buffer is put back on the free queue.
3982  *
3983  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3984  **********************************************************************/
3985 static bool
3986 igb_txeof(struct tx_ring *txr)
3987 {
3988         struct adapter          *adapter = txr->adapter;
3989 #ifdef DEV_NETMAP
3990         struct ifnet            *ifp = adapter->ifp;
3991 #endif /* DEV_NETMAP */
3992         u32                     work, processed = 0;
3993         int                     limit = adapter->tx_process_limit;
3994         struct igb_tx_buf       *buf;
3995         union e1000_adv_tx_desc *txd;
3996
3997         mtx_assert(&txr->tx_mtx, MA_OWNED);
3998
3999 #ifdef DEV_NETMAP
4000         if (netmap_tx_irq(ifp, txr->me))
4001                 return (FALSE);
4002 #endif /* DEV_NETMAP */
4003
4004         if (txr->tx_avail == txr->num_desc) {
4005                 txr->queue_status = IGB_QUEUE_IDLE;
4006                 return FALSE;
4007         }
4008
4009         /* Get work starting point */
4010         work = txr->next_to_clean;
4011         buf = &txr->tx_buffers[work];
4012         txd = &txr->tx_base[work];
4013         work -= txr->num_desc; /* The distance to ring end */
4014         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4015             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4016         do {
4017                 union e1000_adv_tx_desc *eop = buf->eop;
4018                 if (eop == NULL) /* No work */
4019                         break;
4020
4021                 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4022                         break;  /* I/O not complete */
4023
4024                 if (buf->m_head) {
4025                         txr->bytes +=
4026                             buf->m_head->m_pkthdr.len;
4027                         bus_dmamap_sync(txr->txtag,
4028                             buf->map,
4029                             BUS_DMASYNC_POSTWRITE);
4030                         bus_dmamap_unload(txr->txtag,
4031                             buf->map);
4032                         m_freem(buf->m_head);
4033                         buf->m_head = NULL;
4034                 }
4035                 buf->eop = NULL;
4036                 ++txr->tx_avail;
4037
4038                 /* We clean the range if multi segment */
4039                 while (txd != eop) {
4040                         ++txd;
4041                         ++buf;
4042                         ++work;
4043                         /* wrap the ring? */
4044                         if (__predict_false(!work)) {
4045                                 work -= txr->num_desc;
4046                                 buf = txr->tx_buffers;
4047                                 txd = txr->tx_base;
4048                         }
4049                         if (buf->m_head) {
4050                                 txr->bytes +=
4051                                     buf->m_head->m_pkthdr.len;
4052                                 bus_dmamap_sync(txr->txtag,
4053                                     buf->map,
4054                                     BUS_DMASYNC_POSTWRITE);
4055                                 bus_dmamap_unload(txr->txtag,
4056                                     buf->map);
4057                                 m_freem(buf->m_head);
4058                                 buf->m_head = NULL;
4059                         }
4060                         ++txr->tx_avail;
4061                         buf->eop = NULL;
4062
4063                 }
4064                 ++txr->packets;
4065                 ++processed;
4066                 txr->watchdog_time = ticks;
4067
4068                 /* Try the next packet */
4069                 ++txd;
4070                 ++buf;
4071                 ++work;
4072                 /* reset with a wrap */
4073                 if (__predict_false(!work)) {
4074                         work -= txr->num_desc;
4075                         buf = txr->tx_buffers;
4076                         txd = txr->tx_base;
4077                 }
4078                 prefetch(txd);
4079         } while (__predict_true(--limit));
4080
4081         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4082             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4083
4084         work += txr->num_desc;
4085         txr->next_to_clean = work;
4086
4087         /*
4088         ** Watchdog calculation, we know there's
4089         ** work outstanding or the first return
4090         ** would have been taken, so none processed
4091         ** for too long indicates a hang.
4092         */
4093         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4094                 txr->queue_status |= IGB_QUEUE_HUNG;
4095
4096         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4097                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;       
4098
4099         if (txr->tx_avail == txr->num_desc) {
4100                 txr->queue_status = IGB_QUEUE_IDLE;
4101                 return (FALSE);
4102         }
4103
4104         return (TRUE);
4105 }
4106
4107 /*********************************************************************
4108  *
4109  *  Refresh mbuf buffers for RX descriptor rings
4110  *   - now keeps its own state so discards due to resource
4111  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4112  *     it just returns, keeping its placeholder, thus it can simply
4113  *     be recalled to try again.
4114  *
4115  **********************************************************************/
4116 static void
4117 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4118 {
4119         struct adapter          *adapter = rxr->adapter;
4120         bus_dma_segment_t       hseg[1];
4121         bus_dma_segment_t       pseg[1];
4122         struct igb_rx_buf       *rxbuf;
4123         struct mbuf             *mh, *mp;
4124         int                     i, j, nsegs, error;
4125         bool                    refreshed = FALSE;
4126
4127         i = j = rxr->next_to_refresh;
4128         /*
4129         ** Get one descriptor beyond
4130         ** our work mark to control
4131         ** the loop.
4132         */
4133         if (++j == adapter->num_rx_desc)
4134                 j = 0;
4135
4136         while (j != limit) {
4137                 rxbuf = &rxr->rx_buffers[i];
4138                 /* No hdr mbuf used with header split off */
4139                 if (rxr->hdr_split == FALSE)
4140                         goto no_split;
4141                 if (rxbuf->m_head == NULL) {
4142                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4143                         if (mh == NULL)
4144                                 goto update;
4145                 } else
4146                         mh = rxbuf->m_head;
4147
4148                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4149                 mh->m_len = MHLEN;
4150                 mh->m_flags |= M_PKTHDR;
4151                 /* Get the memory mapping */
4152                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4153                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4154                 if (error != 0) {
4155                         printf("Refresh mbufs: hdr dmamap load"
4156                             " failure - %d\n", error);
4157                         m_free(mh);
4158                         rxbuf->m_head = NULL;
4159                         goto update;
4160                 }
4161                 rxbuf->m_head = mh;
4162                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4163                     BUS_DMASYNC_PREREAD);
4164                 rxr->rx_base[i].read.hdr_addr =
4165                     htole64(hseg[0].ds_addr);
4166 no_split:
4167                 if (rxbuf->m_pack == NULL) {
4168                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4169                             M_PKTHDR, adapter->rx_mbuf_sz);
4170                         if (mp == NULL)
4171                                 goto update;
4172                 } else
4173                         mp = rxbuf->m_pack;
4174
4175                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4176                 /* Get the memory mapping */
4177                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4178                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4179                 if (error != 0) {
4180                         printf("Refresh mbufs: payload dmamap load"
4181                             " failure - %d\n", error);
4182                         m_free(mp);
4183                         rxbuf->m_pack = NULL;
4184                         goto update;
4185                 }
4186                 rxbuf->m_pack = mp;
4187                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4188                     BUS_DMASYNC_PREREAD);
4189                 rxr->rx_base[i].read.pkt_addr =
4190                     htole64(pseg[0].ds_addr);
4191                 refreshed = TRUE; /* I feel wefreshed :) */
4192
4193                 i = j; /* our next is precalculated */
4194                 rxr->next_to_refresh = i;
4195                 if (++j == adapter->num_rx_desc)
4196                         j = 0;
4197         }
4198 update:
4199         if (refreshed) /* update tail */
4200                 E1000_WRITE_REG(&adapter->hw,
4201                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4202         return;
4203 }
4204
4205
4206 /*********************************************************************
4207  *
4208  *  Allocate memory for rx_buffer structures. Since we use one
4209  *  rx_buffer per received packet, the maximum number of rx_buffer's
4210  *  that we'll need is equal to the number of receive descriptors
4211  *  that we've allocated.
4212  *
4213  **********************************************************************/
4214 static int
4215 igb_allocate_receive_buffers(struct rx_ring *rxr)
4216 {
4217         struct  adapter         *adapter = rxr->adapter;
4218         device_t                dev = adapter->dev;
4219         struct igb_rx_buf       *rxbuf;
4220         int                     i, bsize, error;
4221
4222         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4223         if (!(rxr->rx_buffers =
4224             (struct igb_rx_buf *) malloc(bsize,
4225             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4226                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4227                 error = ENOMEM;
4228                 goto fail;
4229         }
4230
4231         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4232                                    1, 0,                /* alignment, bounds */
4233                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4234                                    BUS_SPACE_MAXADDR,   /* highaddr */
4235                                    NULL, NULL,          /* filter, filterarg */
4236                                    MSIZE,               /* maxsize */
4237                                    1,                   /* nsegments */
4238                                    MSIZE,               /* maxsegsize */
4239                                    0,                   /* flags */
4240                                    NULL,                /* lockfunc */
4241                                    NULL,                /* lockfuncarg */
4242                                    &rxr->htag))) {
4243                 device_printf(dev, "Unable to create RX DMA tag\n");
4244                 goto fail;
4245         }
4246
4247         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4248                                    1, 0,                /* alignment, bounds */
4249                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4250                                    BUS_SPACE_MAXADDR,   /* highaddr */
4251                                    NULL, NULL,          /* filter, filterarg */
4252                                    MJUM9BYTES,          /* maxsize */
4253                                    1,                   /* nsegments */
4254                                    MJUM9BYTES,          /* maxsegsize */
4255                                    0,                   /* flags */
4256                                    NULL,                /* lockfunc */
4257                                    NULL,                /* lockfuncarg */
4258                                    &rxr->ptag))) {
4259                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4260                 goto fail;
4261         }
4262
4263         for (i = 0; i < adapter->num_rx_desc; i++) {
4264                 rxbuf = &rxr->rx_buffers[i];
4265                 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4266                 if (error) {
4267                         device_printf(dev,
4268                             "Unable to create RX head DMA maps\n");
4269                         goto fail;
4270                 }
4271                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4272                 if (error) {
4273                         device_printf(dev,
4274                             "Unable to create RX packet DMA maps\n");
4275                         goto fail;
4276                 }
4277         }
4278
4279         return (0);
4280
4281 fail:
4282         /* Frees all, but can handle partial completion */
4283         igb_free_receive_structures(adapter);
4284         return (error);
4285 }
4286
4287
4288 static void
4289 igb_free_receive_ring(struct rx_ring *rxr)
4290 {
4291         struct  adapter         *adapter = rxr->adapter;
4292         struct igb_rx_buf       *rxbuf;
4293
4294
4295         for (int i = 0; i < adapter->num_rx_desc; i++) {
4296                 rxbuf = &rxr->rx_buffers[i];
4297                 if (rxbuf->m_head != NULL) {
4298                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4299                             BUS_DMASYNC_POSTREAD);
4300                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4301                         rxbuf->m_head->m_flags |= M_PKTHDR;
4302                         m_freem(rxbuf->m_head);
4303                 }
4304                 if (rxbuf->m_pack != NULL) {
4305                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4306                             BUS_DMASYNC_POSTREAD);
4307                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4308                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4309                         m_freem(rxbuf->m_pack);
4310                 }
4311                 rxbuf->m_head = NULL;
4312                 rxbuf->m_pack = NULL;
4313         }
4314 }
4315
4316
4317 /*********************************************************************
4318  *
4319  *  Initialize a receive ring and its buffers.
4320  *
4321  **********************************************************************/
4322 static int
4323 igb_setup_receive_ring(struct rx_ring *rxr)
4324 {
4325         struct  adapter         *adapter;
4326         struct  ifnet           *ifp;
4327         device_t                dev;
4328         struct igb_rx_buf       *rxbuf;
4329         bus_dma_segment_t       pseg[1], hseg[1];
4330         struct lro_ctrl         *lro = &rxr->lro;
4331         int                     rsize, nsegs, error = 0;
4332 #ifdef DEV_NETMAP
4333         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4334         struct netmap_slot *slot;
4335 #endif /* DEV_NETMAP */
4336
4337         adapter = rxr->adapter;
4338         dev = adapter->dev;
4339         ifp = adapter->ifp;
4340
4341         /* Clear the ring contents */
4342         IGB_RX_LOCK(rxr);
4343 #ifdef DEV_NETMAP
4344         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4345 #endif /* DEV_NETMAP */
4346         rsize = roundup2(adapter->num_rx_desc *
4347             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4348         bzero((void *)rxr->rx_base, rsize);
4349
4350         /*
4351         ** Free current RX buffer structures and their mbufs
4352         */
4353         igb_free_receive_ring(rxr);
4354
4355         /* Configure for header split? */
4356         if (igb_header_split)
4357                 rxr->hdr_split = TRUE;
4358
4359         /* Now replenish the ring mbufs */
4360         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4361                 struct mbuf     *mh, *mp;
4362
4363                 rxbuf = &rxr->rx_buffers[j];
4364 #ifdef DEV_NETMAP
4365                 if (slot) {
4366                         /* slot sj is mapped to the j-th NIC-ring entry */
4367                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4368                         uint64_t paddr;
4369                         void *addr;
4370
4371                         addr = PNMB(na, slot + sj, &paddr);
4372                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4373                         /* Update descriptor */
4374                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4375                         continue;
4376                 }
4377 #endif /* DEV_NETMAP */
4378                 if (rxr->hdr_split == FALSE)
4379                         goto skip_head;
4380
4381                 /* First the header */
4382                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4383                 if (rxbuf->m_head == NULL) {
4384                         error = ENOBUFS;
4385                         goto fail;
4386                 }
4387                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4388                 mh = rxbuf->m_head;
4389                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4390                 mh->m_flags |= M_PKTHDR;
4391                 /* Get the memory mapping */
4392                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4393                     rxbuf->hmap, rxbuf->m_head, hseg,
4394                     &nsegs, BUS_DMA_NOWAIT);
4395                 if (error != 0) /* Nothing elegant to do here */
4396                         goto fail;
4397                 bus_dmamap_sync(rxr->htag,
4398                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4399                 /* Update descriptor */
4400                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4401
4402 skip_head:
4403                 /* Now the payload cluster */
4404                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4405                     M_PKTHDR, adapter->rx_mbuf_sz);
4406                 if (rxbuf->m_pack == NULL) {
4407                         error = ENOBUFS;
4408                         goto fail;
4409                 }
4410                 mp = rxbuf->m_pack;
4411                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4412                 /* Get the memory mapping */
4413                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4414                     rxbuf->pmap, mp, pseg,
4415                     &nsegs, BUS_DMA_NOWAIT);
4416                 if (error != 0)
4417                         goto fail;
4418                 bus_dmamap_sync(rxr->ptag,
4419                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4420                 /* Update descriptor */
4421                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4422         }
4423
4424         /* Setup our descriptor indices */
4425         rxr->next_to_check = 0;
4426         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4427         rxr->lro_enabled = FALSE;
4428         rxr->rx_split_packets = 0;
4429         rxr->rx_bytes = 0;
4430
4431         rxr->fmp = NULL;
4432         rxr->lmp = NULL;
4433
4434         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4435             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4436
4437         /*
4438         ** Now set up the LRO interface, we
4439         ** also only do head split when LRO
4440         ** is enabled, since so often they
4441         ** are undesireable in similar setups.
4442         */
4443         if (ifp->if_capenable & IFCAP_LRO) {
4444                 error = tcp_lro_init(lro);
4445                 if (error) {
4446                         device_printf(dev, "LRO Initialization failed!\n");
4447                         goto fail;
4448                 }
4449                 INIT_DEBUGOUT("RX LRO Initialized\n");
4450                 rxr->lro_enabled = TRUE;
4451                 lro->ifp = adapter->ifp;
4452         }
4453
4454         IGB_RX_UNLOCK(rxr);
4455         return (0);
4456
4457 fail:
4458         igb_free_receive_ring(rxr);
4459         IGB_RX_UNLOCK(rxr);
4460         return (error);
4461 }
4462
4463
4464 /*********************************************************************
4465  *
4466  *  Initialize all receive rings.
4467  *
4468  **********************************************************************/
4469 static int
4470 igb_setup_receive_structures(struct adapter *adapter)
4471 {
4472         struct rx_ring *rxr = adapter->rx_rings;
4473         int i;
4474
4475         for (i = 0; i < adapter->num_queues; i++, rxr++)
4476                 if (igb_setup_receive_ring(rxr))
4477                         goto fail;
4478
4479         return (0);
4480 fail:
4481         /*
4482          * Free RX buffers allocated so far, we will only handle
4483          * the rings that completed, the failing case will have
4484          * cleaned up for itself. 'i' is the endpoint.
4485          */
4486         for (int j = 0; j < i; ++j) {
4487                 rxr = &adapter->rx_rings[j];
4488                 IGB_RX_LOCK(rxr);
4489                 igb_free_receive_ring(rxr);
4490                 IGB_RX_UNLOCK(rxr);
4491         }
4492
4493         return (ENOBUFS);
4494 }
4495
4496 /*
4497  * Initialise the RSS mapping for NICs that support multiple transmit/
4498  * receive rings.
4499  */
4500 static void
4501 igb_initialise_rss_mapping(struct adapter *adapter)
4502 {
4503         struct e1000_hw *hw = &adapter->hw;
4504         int i;
4505         int queue_id;
4506         u32 reta;
4507         u32 rss_key[10], mrqc, shift = 0;
4508
4509         /* XXX? */
4510         if (adapter->hw.mac.type == e1000_82575)
4511                 shift = 6;
4512
4513         /*
4514          * The redirection table controls which destination
4515          * queue each bucket redirects traffic to.
4516          * Each DWORD represents four queues, with the LSB
4517          * being the first queue in the DWORD.
4518          *
4519          * This just allocates buckets to queues using round-robin
4520          * allocation.
4521          *
4522          * NOTE: It Just Happens to line up with the default
4523          * RSS allocation method.
4524          */
4525
4526         /* Warning FM follows */
4527         reta = 0;
4528         for (i = 0; i < 128; i++) {
4529 #ifdef  RSS
4530                 queue_id = rss_get_indirection_to_bucket(i);
4531                 /*
4532                  * If we have more queues than buckets, we'll
4533                  * end up mapping buckets to a subset of the
4534                  * queues.
4535                  *
4536                  * If we have more buckets than queues, we'll
4537                  * end up instead assigning multiple buckets
4538                  * to queues.
4539                  *
4540                  * Both are suboptimal, but we need to handle
4541                  * the case so we don't go out of bounds
4542                  * indexing arrays and such.
4543                  */
4544                 queue_id = queue_id % adapter->num_queues;
4545 #else
4546                 queue_id = (i % adapter->num_queues);
4547 #endif
4548                 /* Adjust if required */
4549                 queue_id = queue_id << shift;
4550
4551                 /*
4552                  * The low 8 bits are for hash value (n+0);
4553                  * The next 8 bits are for hash value (n+1), etc.
4554                  */
4555                 reta = reta >> 8;
4556                 reta = reta | ( ((uint32_t) queue_id) << 24);
4557                 if ((i & 3) == 3) {
4558                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4559                         reta = 0;
4560                 }
4561         }
4562
4563         /* Now fill in hash table */
4564
4565         /*
4566          * MRQC: Multiple Receive Queues Command
4567          * Set queuing to RSS control, number depends on the device.
4568          */
4569         mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4570
4571 #ifdef  RSS
4572         /* XXX ew typecasting */
4573         rss_getkey((uint8_t *) &rss_key);
4574 #else
4575         arc4rand(&rss_key, sizeof(rss_key), 0);
4576 #endif
4577         for (i = 0; i < 10; i++)
4578                 E1000_WRITE_REG_ARRAY(hw,
4579                     E1000_RSSRK(0), i, rss_key[i]);
4580
4581         /*
4582          * Configure the RSS fields to hash upon.
4583          */
4584         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4585             E1000_MRQC_RSS_FIELD_IPV4_TCP);
4586         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4587             E1000_MRQC_RSS_FIELD_IPV6_TCP);
4588         mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4589             E1000_MRQC_RSS_FIELD_IPV6_UDP);
4590         mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4591             E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4592
4593         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4594 }
4595
4596 /*********************************************************************
4597  *
4598  *  Enable receive unit.
4599  *
4600  **********************************************************************/
4601 static void
4602 igb_initialize_receive_units(struct adapter *adapter)
4603 {
4604         struct rx_ring  *rxr = adapter->rx_rings;
4605         struct ifnet    *ifp = adapter->ifp;
4606         struct e1000_hw *hw = &adapter->hw;
4607         u32             rctl, rxcsum, psize, srrctl = 0;
4608
4609         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4610
4611         /*
4612          * Make sure receives are disabled while setting
4613          * up the descriptor ring
4614          */
4615         rctl = E1000_READ_REG(hw, E1000_RCTL);
4616         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4617
4618         /*
4619         ** Set up for header split
4620         */
4621         if (igb_header_split) {
4622                 /* Use a standard mbuf for the header */
4623                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4624                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4625         } else
4626                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4627
4628         /*
4629         ** Set up for jumbo frames
4630         */
4631         if (ifp->if_mtu > ETHERMTU) {
4632                 rctl |= E1000_RCTL_LPE;
4633                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4634                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4635                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4636                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4637                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4638                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4639                 }
4640                 /* Set maximum packet len */
4641                 psize = adapter->max_frame_size;
4642                 /* are we on a vlan? */
4643                 if (adapter->ifp->if_vlantrunk != NULL)
4644                         psize += VLAN_TAG_SIZE;
4645                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4646         } else {
4647                 rctl &= ~E1000_RCTL_LPE;
4648                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4649                 rctl |= E1000_RCTL_SZ_2048;
4650         }
4651
4652         /*
4653          * If TX flow control is disabled and there's >1 queue defined,
4654          * enable DROP.
4655          *
4656          * This drops frames rather than hanging the RX MAC for all queues.
4657          */
4658         if ((adapter->num_queues > 1) &&
4659             (adapter->fc == e1000_fc_none ||
4660              adapter->fc == e1000_fc_rx_pause)) {
4661                 srrctl |= E1000_SRRCTL_DROP_EN;
4662         }
4663
4664         /* Setup the Base and Length of the Rx Descriptor Rings */
4665         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4666                 u64 bus_addr = rxr->rxdma.dma_paddr;
4667                 u32 rxdctl;
4668
4669                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4670                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4671                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4672                     (uint32_t)(bus_addr >> 32));
4673                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4674                     (uint32_t)bus_addr);
4675                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4676                 /* Enable this Queue */
4677                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4678                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4679                 rxdctl &= 0xFFF00000;
4680                 rxdctl |= IGB_RX_PTHRESH;
4681                 rxdctl |= IGB_RX_HTHRESH << 8;
4682                 rxdctl |= IGB_RX_WTHRESH << 16;
4683                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4684         }
4685
4686         /*
4687         ** Setup for RX MultiQueue
4688         */
4689         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4690         if (adapter->num_queues >1) {
4691
4692                 /* rss setup */
4693                 igb_initialise_rss_mapping(adapter);
4694
4695                 /*
4696                 ** NOTE: Receive Full-Packet Checksum Offload 
4697                 ** is mutually exclusive with Multiqueue. However
4698                 ** this is not the same as TCP/IP checksums which
4699                 ** still work.
4700                 */
4701                 rxcsum |= E1000_RXCSUM_PCSD;
4702 #if __FreeBSD_version >= 800000
4703                 /* For SCTP Offload */
4704                 if (((hw->mac.type == e1000_82576) ||
4705                      (hw->mac.type == e1000_82580)) &&
4706                     (ifp->if_capenable & IFCAP_RXCSUM))
4707                         rxcsum |= E1000_RXCSUM_CRCOFL;
4708 #endif
4709         } else {
4710                 /* Non RSS setup */
4711                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4712                         rxcsum |= E1000_RXCSUM_IPPCSE;
4713 #if __FreeBSD_version >= 800000
4714                         if ((adapter->hw.mac.type == e1000_82576) ||
4715                             (adapter->hw.mac.type == e1000_82580))
4716                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4717 #endif
4718                 } else
4719                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4720         }
4721         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4722
4723         /* Setup the Receive Control Register */
4724         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4725         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4726                    E1000_RCTL_RDMTS_HALF |
4727                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4728         /* Strip CRC bytes. */
4729         rctl |= E1000_RCTL_SECRC;
4730         /* Make sure VLAN Filters are off */
4731         rctl &= ~E1000_RCTL_VFE;
4732         /* Don't store bad packets */
4733         rctl &= ~E1000_RCTL_SBP;
4734
4735         /* Enable Receives */
4736         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4737
4738         /*
4739          * Setup the HW Rx Head and Tail Descriptor Pointers
4740          *   - needs to be after enable
4741          */
4742         for (int i = 0; i < adapter->num_queues; i++) {
4743                 rxr = &adapter->rx_rings[i];
4744                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4745 #ifdef DEV_NETMAP
4746                 /*
4747                  * an init() while a netmap client is active must
4748                  * preserve the rx buffers passed to userspace.
4749                  * In this driver it means we adjust RDT to
4750                  * something different from next_to_refresh
4751                  * (which is not used in netmap mode).
4752                  */
4753                 if (ifp->if_capenable & IFCAP_NETMAP) {
4754                         struct netmap_adapter *na = NA(adapter->ifp);
4755                         struct netmap_kring *kring = &na->rx_rings[i];
4756                         int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4757
4758                         if (t >= adapter->num_rx_desc)
4759                                 t -= adapter->num_rx_desc;
4760                         else if (t < 0)
4761                                 t += adapter->num_rx_desc;
4762                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4763                 } else
4764 #endif /* DEV_NETMAP */
4765                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4766         }
4767         return;
4768 }
4769
4770 /*********************************************************************
4771  *
4772  *  Free receive rings.
4773  *
4774  **********************************************************************/
4775 static void
4776 igb_free_receive_structures(struct adapter *adapter)
4777 {
4778         struct rx_ring *rxr = adapter->rx_rings;
4779
4780         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4781                 struct lro_ctrl *lro = &rxr->lro;
4782                 igb_free_receive_buffers(rxr);
4783                 tcp_lro_free(lro);
4784                 igb_dma_free(adapter, &rxr->rxdma);
4785         }
4786
4787         free(adapter->rx_rings, M_DEVBUF);
4788 }
4789
4790 /*********************************************************************
4791  *
4792  *  Free receive ring data structures.
4793  *
4794  **********************************************************************/
4795 static void
4796 igb_free_receive_buffers(struct rx_ring *rxr)
4797 {
4798         struct adapter          *adapter = rxr->adapter;
4799         struct igb_rx_buf       *rxbuf;
4800         int i;
4801
4802         INIT_DEBUGOUT("free_receive_structures: begin");
4803
4804         /* Cleanup any existing buffers */
4805         if (rxr->rx_buffers != NULL) {
4806                 for (i = 0; i < adapter->num_rx_desc; i++) {
4807                         rxbuf = &rxr->rx_buffers[i];
4808                         if (rxbuf->m_head != NULL) {
4809                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4810                                     BUS_DMASYNC_POSTREAD);
4811                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4812                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4813                                 m_freem(rxbuf->m_head);
4814                         }
4815                         if (rxbuf->m_pack != NULL) {
4816                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4817                                     BUS_DMASYNC_POSTREAD);
4818                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4819                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4820                                 m_freem(rxbuf->m_pack);
4821                         }
4822                         rxbuf->m_head = NULL;
4823                         rxbuf->m_pack = NULL;
4824                         if (rxbuf->hmap != NULL) {
4825                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4826                                 rxbuf->hmap = NULL;
4827                         }
4828                         if (rxbuf->pmap != NULL) {
4829                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4830                                 rxbuf->pmap = NULL;
4831                         }
4832                 }
4833                 if (rxr->rx_buffers != NULL) {
4834                         free(rxr->rx_buffers, M_DEVBUF);
4835                         rxr->rx_buffers = NULL;
4836                 }
4837         }
4838
4839         if (rxr->htag != NULL) {
4840                 bus_dma_tag_destroy(rxr->htag);
4841                 rxr->htag = NULL;
4842         }
4843         if (rxr->ptag != NULL) {
4844                 bus_dma_tag_destroy(rxr->ptag);
4845                 rxr->ptag = NULL;
4846         }
4847 }
4848
4849 static __inline void
4850 igb_rx_discard(struct rx_ring *rxr, int i)
4851 {
4852         struct igb_rx_buf       *rbuf;
4853
4854         rbuf = &rxr->rx_buffers[i];
4855
4856         /* Partially received? Free the chain */
4857         if (rxr->fmp != NULL) {
4858                 rxr->fmp->m_flags |= M_PKTHDR;
4859                 m_freem(rxr->fmp);
4860                 rxr->fmp = NULL;
4861                 rxr->lmp = NULL;
4862         }
4863
4864         /*
4865         ** With advanced descriptors the writeback
4866         ** clobbers the buffer addrs, so its easier
4867         ** to just free the existing mbufs and take
4868         ** the normal refresh path to get new buffers
4869         ** and mapping.
4870         */
4871         if (rbuf->m_head) {
4872                 m_free(rbuf->m_head);
4873                 rbuf->m_head = NULL;
4874                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4875         }
4876
4877         if (rbuf->m_pack) {
4878                 m_free(rbuf->m_pack);
4879                 rbuf->m_pack = NULL;
4880                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4881         }
4882
4883         return;
4884 }
4885
4886 static __inline void
4887 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4888 {
4889
4890         /*
4891          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4892          * should be computed by hardware. Also it should not have VLAN tag in
4893          * ethernet header.
4894          */
4895         if (rxr->lro_enabled &&
4896             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4897             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4898             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4899             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4900             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4901             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4902                 /*
4903                  * Send to the stack if:
4904                  **  - LRO not enabled, or
4905                  **  - no LRO resources, or
4906                  **  - lro enqueue fails
4907                  */
4908                 if (rxr->lro.lro_cnt != 0)
4909                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4910                                 return;
4911         }
4912         IGB_RX_UNLOCK(rxr);
4913         (*ifp->if_input)(ifp, m);
4914         IGB_RX_LOCK(rxr);
4915 }
4916
4917 /*********************************************************************
4918  *
4919  *  This routine executes in interrupt context. It replenishes
4920  *  the mbufs in the descriptor and sends data which has been
4921  *  dma'ed into host memory to upper layer.
4922  *
4923  *  We loop at most count times if count is > 0, or until done if
4924  *  count < 0.
4925  *
4926  *  Return TRUE if more to clean, FALSE otherwise
4927  *********************************************************************/
4928 static bool
4929 igb_rxeof(struct igb_queue *que, int count, int *done)
4930 {
4931         struct adapter          *adapter = que->adapter;
4932         struct rx_ring          *rxr = que->rxr;
4933         struct ifnet            *ifp = adapter->ifp;
4934         struct lro_ctrl         *lro = &rxr->lro;
4935         struct lro_entry        *queued;
4936         int                     i, processed = 0, rxdone = 0;
4937         u32                     ptype, staterr = 0;
4938         union e1000_adv_rx_desc *cur;
4939
4940         IGB_RX_LOCK(rxr);
4941         /* Sync the ring. */
4942         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4943             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4944
4945 #ifdef DEV_NETMAP
4946         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4947                 IGB_RX_UNLOCK(rxr);
4948                 return (FALSE);
4949         }
4950 #endif /* DEV_NETMAP */
4951
4952         /* Main clean loop */
4953         for (i = rxr->next_to_check; count != 0;) {
4954                 struct mbuf             *sendmp, *mh, *mp;
4955                 struct igb_rx_buf       *rxbuf;
4956                 u16                     hlen, plen, hdr, vtag, pkt_info;
4957                 bool                    eop = FALSE;
4958  
4959                 cur = &rxr->rx_base[i];
4960                 staterr = le32toh(cur->wb.upper.status_error);
4961                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4962                         break;
4963                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4964                         break;
4965                 count--;
4966                 sendmp = mh = mp = NULL;
4967                 cur->wb.upper.status_error = 0;
4968                 rxbuf = &rxr->rx_buffers[i];
4969                 plen = le16toh(cur->wb.upper.length);
4970                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4971                 if (((adapter->hw.mac.type == e1000_i350) ||
4972                     (adapter->hw.mac.type == e1000_i354)) &&
4973                     (staterr & E1000_RXDEXT_STATERR_LB))
4974                         vtag = be16toh(cur->wb.upper.vlan);
4975                 else
4976                         vtag = le16toh(cur->wb.upper.vlan);
4977                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4978                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
4979                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4980
4981                 /*
4982                  * Free the frame (all segments) if we're at EOP and
4983                  * it's an error.
4984                  *
4985                  * The datasheet states that EOP + status is only valid for
4986                  * the final segment in a multi-segment frame.
4987                  */
4988                 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4989                         adapter->dropped_pkts++;
4990                         ++rxr->rx_discarded;
4991                         igb_rx_discard(rxr, i);
4992                         goto next_desc;
4993                 }
4994
4995                 /*
4996                 ** The way the hardware is configured to
4997                 ** split, it will ONLY use the header buffer
4998                 ** when header split is enabled, otherwise we
4999                 ** get normal behavior, ie, both header and
5000                 ** payload are DMA'd into the payload buffer.
5001                 **
5002                 ** The fmp test is to catch the case where a
5003                 ** packet spans multiple descriptors, in that
5004                 ** case only the first header is valid.
5005                 */
5006                 if (rxr->hdr_split && rxr->fmp == NULL) {
5007                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5008                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5009                             E1000_RXDADV_HDRBUFLEN_SHIFT;
5010                         if (hlen > IGB_HDR_BUF)
5011                                 hlen = IGB_HDR_BUF;
5012                         mh = rxr->rx_buffers[i].m_head;
5013                         mh->m_len = hlen;
5014                         /* clear buf pointer for refresh */
5015                         rxbuf->m_head = NULL;
5016                         /*
5017                         ** Get the payload length, this
5018                         ** could be zero if its a small
5019                         ** packet.
5020                         */
5021                         if (plen > 0) {
5022                                 mp = rxr->rx_buffers[i].m_pack;
5023                                 mp->m_len = plen;
5024                                 mh->m_next = mp;
5025                                 /* clear buf pointer */
5026                                 rxbuf->m_pack = NULL;
5027                                 rxr->rx_split_packets++;
5028                         }
5029                 } else {
5030                         /*
5031                         ** Either no header split, or a
5032                         ** secondary piece of a fragmented
5033                         ** split packet.
5034                         */
5035                         mh = rxr->rx_buffers[i].m_pack;
5036                         mh->m_len = plen;
5037                         /* clear buf info for refresh */
5038                         rxbuf->m_pack = NULL;
5039                 }
5040                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5041
5042                 ++processed; /* So we know when to refresh */
5043
5044                 /* Initial frame - setup */
5045                 if (rxr->fmp == NULL) {
5046                         mh->m_pkthdr.len = mh->m_len;
5047                         /* Save the head of the chain */
5048                         rxr->fmp = mh;
5049                         rxr->lmp = mh;
5050                         if (mp != NULL) {
5051                                 /* Add payload if split */
5052                                 mh->m_pkthdr.len += mp->m_len;
5053                                 rxr->lmp = mh->m_next;
5054                         }
5055                 } else {
5056                         /* Chain mbuf's together */
5057                         rxr->lmp->m_next = mh;
5058                         rxr->lmp = rxr->lmp->m_next;
5059                         rxr->fmp->m_pkthdr.len += mh->m_len;
5060                 }
5061
5062                 if (eop) {
5063                         rxr->fmp->m_pkthdr.rcvif = ifp;
5064                         rxr->rx_packets++;
5065                         /* capture data for AIM */
5066                         rxr->packets++;
5067                         rxr->bytes += rxr->fmp->m_pkthdr.len;
5068                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5069
5070                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5071                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
5072
5073                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5074                             (staterr & E1000_RXD_STAT_VP) != 0) {
5075                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5076                                 rxr->fmp->m_flags |= M_VLANTAG;
5077                         }
5078
5079                         /*
5080                          * In case of multiqueue, we have RXCSUM.PCSD bit set
5081                          * and never cleared. This means we have RSS hash
5082                          * available to be used.
5083                          */
5084                         if (adapter->num_queues > 1) {
5085                                 rxr->fmp->m_pkthdr.flowid = 
5086                                     le32toh(cur->wb.lower.hi_dword.rss);
5087                                 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5088                                         case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5089                                                 M_HASHTYPE_SET(rxr->fmp,
5090                                                     M_HASHTYPE_RSS_TCP_IPV4);
5091                                         break;
5092                                         case E1000_RXDADV_RSSTYPE_IPV4:
5093                                                 M_HASHTYPE_SET(rxr->fmp,
5094                                                     M_HASHTYPE_RSS_IPV4);
5095                                         break;
5096                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5097                                                 M_HASHTYPE_SET(rxr->fmp,
5098                                                     M_HASHTYPE_RSS_TCP_IPV6);
5099                                         break;
5100                                         case E1000_RXDADV_RSSTYPE_IPV6_EX:
5101                                                 M_HASHTYPE_SET(rxr->fmp,
5102                                                     M_HASHTYPE_RSS_IPV6_EX);
5103                                         break;
5104                                         case E1000_RXDADV_RSSTYPE_IPV6:
5105                                                 M_HASHTYPE_SET(rxr->fmp,
5106                                                     M_HASHTYPE_RSS_IPV6);
5107                                         break;
5108                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5109                                                 M_HASHTYPE_SET(rxr->fmp,
5110                                                     M_HASHTYPE_RSS_TCP_IPV6_EX);
5111                                         break;
5112                                         default:
5113                                                 /* XXX fallthrough */
5114                                                 M_HASHTYPE_SET(rxr->fmp,
5115                                                     M_HASHTYPE_OPAQUE);
5116                                 }
5117                         } else {
5118 #ifndef IGB_LEGACY_TX
5119                                 rxr->fmp->m_pkthdr.flowid = que->msix;
5120                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5121 #endif
5122                         }
5123                         sendmp = rxr->fmp;
5124                         /* Make sure to set M_PKTHDR. */
5125                         sendmp->m_flags |= M_PKTHDR;
5126                         rxr->fmp = NULL;
5127                         rxr->lmp = NULL;
5128                 }
5129
5130 next_desc:
5131                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5132                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5133
5134                 /* Advance our pointers to the next descriptor. */
5135                 if (++i == adapter->num_rx_desc)
5136                         i = 0;
5137                 /*
5138                 ** Send to the stack or LRO
5139                 */
5140                 if (sendmp != NULL) {
5141                         rxr->next_to_check = i;
5142                         igb_rx_input(rxr, ifp, sendmp, ptype);
5143                         i = rxr->next_to_check;
5144                         rxdone++;
5145                 }
5146
5147                 /* Every 8 descriptors we go to refresh mbufs */
5148                 if (processed == 8) {
5149                         igb_refresh_mbufs(rxr, i);
5150                         processed = 0;
5151                 }
5152         }
5153
5154         /* Catch any remainders */
5155         if (igb_rx_unrefreshed(rxr))
5156                 igb_refresh_mbufs(rxr, i);
5157
5158         rxr->next_to_check = i;
5159
5160         /*
5161          * Flush any outstanding LRO work
5162          */
5163         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5164                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5165                 tcp_lro_flush(lro, queued);
5166         }
5167
5168         if (done != NULL)
5169                 *done += rxdone;
5170
5171         IGB_RX_UNLOCK(rxr);
5172         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5173 }
5174
5175 /*********************************************************************
5176  *
5177  *  Verify that the hardware indicated that the checksum is valid.
5178  *  Inform the stack about the status of checksum so that stack
5179  *  doesn't spend time verifying the checksum.
5180  *
5181  *********************************************************************/
5182 static void
5183 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5184 {
5185         u16 status = (u16)staterr;
5186         u8  errors = (u8) (staterr >> 24);
5187         int sctp;
5188
5189         /* Ignore Checksum bit is set */
5190         if (status & E1000_RXD_STAT_IXSM) {
5191                 mp->m_pkthdr.csum_flags = 0;
5192                 return;
5193         }
5194
5195         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5196             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5197                 sctp = 1;
5198         else
5199                 sctp = 0;
5200         if (status & E1000_RXD_STAT_IPCS) {
5201                 /* Did it pass? */
5202                 if (!(errors & E1000_RXD_ERR_IPE)) {
5203                         /* IP Checksum Good */
5204                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5205                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5206                 } else
5207                         mp->m_pkthdr.csum_flags = 0;
5208         }
5209
5210         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5211                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5212 #if __FreeBSD_version >= 800000
5213                 if (sctp) /* reassign */
5214                         type = CSUM_SCTP_VALID;
5215 #endif
5216                 /* Did it pass? */
5217                 if (!(errors & E1000_RXD_ERR_TCPE)) {
5218                         mp->m_pkthdr.csum_flags |= type;
5219                         if (sctp == 0)
5220                                 mp->m_pkthdr.csum_data = htons(0xffff);
5221                 }
5222         }
5223         return;
5224 }
5225
5226 /*
5227  * This routine is run via an vlan
5228  * config EVENT
5229  */
5230 static void
5231 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5232 {
5233         struct adapter  *adapter = ifp->if_softc;
5234         u32             index, bit;
5235
5236         if (ifp->if_softc !=  arg)   /* Not our event */
5237                 return;
5238
5239         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5240                 return;
5241
5242         IGB_CORE_LOCK(adapter);
5243         index = (vtag >> 5) & 0x7F;
5244         bit = vtag & 0x1F;
5245         adapter->shadow_vfta[index] |= (1 << bit);
5246         ++adapter->num_vlans;
5247         /* Change hw filter setting */
5248         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5249                 igb_setup_vlan_hw_support(adapter);
5250         IGB_CORE_UNLOCK(adapter);
5251 }
5252
5253 /*
5254  * This routine is run via an vlan
5255  * unconfig EVENT
5256  */
5257 static void
5258 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5259 {
5260         struct adapter  *adapter = ifp->if_softc;
5261         u32             index, bit;
5262
5263         if (ifp->if_softc !=  arg)
5264                 return;
5265
5266         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5267                 return;
5268
5269         IGB_CORE_LOCK(adapter);
5270         index = (vtag >> 5) & 0x7F;
5271         bit = vtag & 0x1F;
5272         adapter->shadow_vfta[index] &= ~(1 << bit);
5273         --adapter->num_vlans;
5274         /* Change hw filter setting */
5275         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5276                 igb_setup_vlan_hw_support(adapter);
5277         IGB_CORE_UNLOCK(adapter);
5278 }
5279
5280 static void
5281 igb_setup_vlan_hw_support(struct adapter *adapter)
5282 {
5283         struct e1000_hw *hw = &adapter->hw;
5284         struct ifnet    *ifp = adapter->ifp;
5285         u32             reg;
5286
5287         if (adapter->vf_ifp) {
5288                 e1000_rlpml_set_vf(hw,
5289                     adapter->max_frame_size + VLAN_TAG_SIZE);
5290                 return;
5291         }
5292
5293         reg = E1000_READ_REG(hw, E1000_CTRL);
5294         reg |= E1000_CTRL_VME;
5295         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5296
5297         /* Enable the Filter Table */
5298         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5299                 reg = E1000_READ_REG(hw, E1000_RCTL);
5300                 reg &= ~E1000_RCTL_CFIEN;
5301                 reg |= E1000_RCTL_VFE;
5302                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5303         }
5304
5305         /* Update the frame size */
5306         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5307             adapter->max_frame_size + VLAN_TAG_SIZE);
5308
5309         /* Don't bother with table if no vlans */
5310         if ((adapter->num_vlans == 0) ||
5311             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5312                 return;
5313         /*
5314         ** A soft reset zero's out the VFTA, so
5315         ** we need to repopulate it now.
5316         */
5317         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5318                 if (adapter->shadow_vfta[i] != 0) {
5319                         if (adapter->vf_ifp)
5320                                 e1000_vfta_set_vf(hw,
5321                                     adapter->shadow_vfta[i], TRUE);
5322                         else
5323                                 e1000_write_vfta(hw,
5324                                     i, adapter->shadow_vfta[i]);
5325                 }
5326 }
5327
5328 static void
5329 igb_enable_intr(struct adapter *adapter)
5330 {
5331         /* With RSS set up what to auto clear */
5332         if (adapter->msix_mem) {
5333                 u32 mask = (adapter->que_mask | adapter->link_mask);
5334                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5335                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5336                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5337                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5338                     E1000_IMS_LSC);
5339         } else {
5340                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5341                     IMS_ENABLE_MASK);
5342         }
5343         E1000_WRITE_FLUSH(&adapter->hw);
5344
5345         return;
5346 }
5347
5348 static void
5349 igb_disable_intr(struct adapter *adapter)
5350 {
5351         if (adapter->msix_mem) {
5352                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5353                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5354         } 
5355         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5356         E1000_WRITE_FLUSH(&adapter->hw);
5357         return;
5358 }
5359
5360 /*
5361  * Bit of a misnomer, what this really means is
5362  * to enable OS management of the system... aka
5363  * to disable special hardware management features 
5364  */
5365 static void
5366 igb_init_manageability(struct adapter *adapter)
5367 {
5368         if (adapter->has_manage) {
5369                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5370                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5371
5372                 /* disable hardware interception of ARP */
5373                 manc &= ~(E1000_MANC_ARP_EN);
5374
5375                 /* enable receiving management packets to the host */
5376                 manc |= E1000_MANC_EN_MNG2HOST;
5377                 manc2h |= 1 << 5;  /* Mng Port 623 */
5378                 manc2h |= 1 << 6;  /* Mng Port 664 */
5379                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5380                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5381         }
5382 }
5383
5384 /*
5385  * Give control back to hardware management
5386  * controller if there is one.
5387  */
5388 static void
5389 igb_release_manageability(struct adapter *adapter)
5390 {
5391         if (adapter->has_manage) {
5392                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5393
5394                 /* re-enable hardware interception of ARP */
5395                 manc |= E1000_MANC_ARP_EN;
5396                 manc &= ~E1000_MANC_EN_MNG2HOST;
5397
5398                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5399         }
5400 }
5401
5402 /*
5403  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5404  * For ASF and Pass Through versions of f/w this means that
5405  * the driver is loaded. 
5406  *
5407  */
5408 static void
5409 igb_get_hw_control(struct adapter *adapter)
5410 {
5411         u32 ctrl_ext;
5412
5413         if (adapter->vf_ifp)
5414                 return;
5415
5416         /* Let firmware know the driver has taken over */
5417         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5418         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5419             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5420 }
5421
5422 /*
5423  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5424  * For ASF and Pass Through versions of f/w this means that the
5425  * driver is no longer loaded.
5426  *
5427  */
5428 static void
5429 igb_release_hw_control(struct adapter *adapter)
5430 {
5431         u32 ctrl_ext;
5432
5433         if (adapter->vf_ifp)
5434                 return;
5435
5436         /* Let firmware taken over control of h/w */
5437         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5438         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5439             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5440 }
5441
5442 static int
5443 igb_is_valid_ether_addr(uint8_t *addr)
5444 {
5445         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5446
5447         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5448                 return (FALSE);
5449         }
5450
5451         return (TRUE);
5452 }
5453
5454
5455 /*
5456  * Enable PCI Wake On Lan capability
5457  */
5458 static void
5459 igb_enable_wakeup(device_t dev)
5460 {
5461         u16     cap, status;
5462         u8      id;
5463
5464         /* First find the capabilities pointer*/
5465         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5466         /* Read the PM Capabilities */
5467         id = pci_read_config(dev, cap, 1);
5468         if (id != PCIY_PMG)     /* Something wrong */
5469                 return;
5470         /* OK, we have the power capabilities, so
5471            now get the status register */
5472         cap += PCIR_POWER_STATUS;
5473         status = pci_read_config(dev, cap, 2);
5474         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5475         pci_write_config(dev, cap, status, 2);
5476         return;
5477 }
5478
5479 static void
5480 igb_led_func(void *arg, int onoff)
5481 {
5482         struct adapter  *adapter = arg;
5483
5484         IGB_CORE_LOCK(adapter);
5485         if (onoff) {
5486                 e1000_setup_led(&adapter->hw);
5487                 e1000_led_on(&adapter->hw);
5488         } else {
5489                 e1000_led_off(&adapter->hw);
5490                 e1000_cleanup_led(&adapter->hw);
5491         }
5492         IGB_CORE_UNLOCK(adapter);
5493 }
5494
5495 static uint64_t
5496 igb_get_vf_counter(if_t ifp, ift_counter cnt)
5497 {
5498         struct adapter *adapter;
5499         struct e1000_vf_stats *stats;
5500 #ifndef IGB_LEGACY_TX
5501         struct tx_ring *txr;
5502         uint64_t rv;
5503 #endif
5504
5505         adapter = if_getsoftc(ifp);
5506         stats = (struct e1000_vf_stats *)adapter->stats;
5507
5508         switch (cnt) {
5509         case IFCOUNTER_IPACKETS:
5510                 return (stats->gprc);
5511         case IFCOUNTER_OPACKETS:
5512                 return (stats->gptc);
5513         case IFCOUNTER_IBYTES:
5514                 return (stats->gorc);
5515         case IFCOUNTER_OBYTES:
5516                 return (stats->gotc);
5517         case IFCOUNTER_IMCASTS:
5518                 return (stats->mprc);
5519         case IFCOUNTER_IERRORS:
5520                 return (adapter->dropped_pkts);
5521         case IFCOUNTER_OERRORS:
5522                 return (adapter->watchdog_events);
5523 #ifndef IGB_LEGACY_TX
5524         case IFCOUNTER_OQDROPS:
5525                 rv = 0;
5526                 txr = adapter->tx_rings;
5527                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5528                         rv += txr->br->br_drops;
5529                 return (rv);
5530 #endif
5531         default:
5532                 return (if_get_counter_default(ifp, cnt));
5533         }
5534 }
5535
5536 static uint64_t
5537 igb_get_counter(if_t ifp, ift_counter cnt)
5538 {
5539         struct adapter *adapter;
5540         struct e1000_hw_stats *stats;
5541 #ifndef IGB_LEGACY_TX
5542         struct tx_ring *txr;
5543         uint64_t rv;
5544 #endif
5545
5546         adapter = if_getsoftc(ifp);
5547         if (adapter->vf_ifp)
5548                 return (igb_get_vf_counter(ifp, cnt));
5549
5550         stats = (struct e1000_hw_stats *)adapter->stats;
5551
5552         switch (cnt) {
5553         case IFCOUNTER_IPACKETS:
5554                 return (stats->gprc);
5555         case IFCOUNTER_OPACKETS:
5556                 return (stats->gptc);
5557         case IFCOUNTER_IBYTES:
5558                 return (stats->gorc);
5559         case IFCOUNTER_OBYTES:
5560                 return (stats->gotc);
5561         case IFCOUNTER_IMCASTS:
5562                 return (stats->mprc);
5563         case IFCOUNTER_OMCASTS:
5564                 return (stats->mptc);
5565         case IFCOUNTER_IERRORS:
5566                 return (adapter->dropped_pkts + stats->rxerrc +
5567                     stats->crcerrs + stats->algnerrc +
5568                     stats->ruc + stats->roc + stats->cexterr);
5569         case IFCOUNTER_OERRORS:
5570                 return (stats->ecol + stats->latecol +
5571                     adapter->watchdog_events);
5572         case IFCOUNTER_COLLISIONS:
5573                 return (stats->colc);
5574         case IFCOUNTER_IQDROPS:
5575                 return (stats->mpc);
5576 #ifndef IGB_LEGACY_TX
5577         case IFCOUNTER_OQDROPS:
5578                 rv = 0;
5579                 txr = adapter->tx_rings;
5580                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5581                         rv += txr->br->br_drops;
5582                 return (rv);
5583 #endif
5584         default:
5585                 return (if_get_counter_default(ifp, cnt));
5586         }
5587 }
5588
5589 /**********************************************************************
5590  *
5591  *  Update the board statistics counters.
5592  *
5593  **********************************************************************/
5594 static void
5595 igb_update_stats_counters(struct adapter *adapter)
5596 {
5597         struct e1000_hw         *hw = &adapter->hw;
5598         struct e1000_hw_stats   *stats;
5599
5600         /* 
5601         ** The virtual function adapter has only a
5602         ** small controlled set of stats, do only 
5603         ** those and return.
5604         */
5605         if (adapter->vf_ifp) {
5606                 igb_update_vf_stats_counters(adapter);
5607                 return;
5608         }
5609
5610         stats = (struct e1000_hw_stats  *)adapter->stats;
5611
5612         if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5613            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5614                 stats->symerrs +=
5615                     E1000_READ_REG(hw,E1000_SYMERRS);
5616                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5617         }
5618
5619         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5620         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5621         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5622         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5623
5624         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5625         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5626         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5627         stats->dc += E1000_READ_REG(hw, E1000_DC);
5628         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5629         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5630         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5631         /*
5632         ** For watchdog management we need to know if we have been
5633         ** paused during the last interval, so capture that here.
5634         */ 
5635         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5636         stats->xoffrxc += adapter->pause_frames;
5637         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5638         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5639         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5640         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5641         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5642         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5643         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5644         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5645         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5646         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5647         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5648         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5649
5650         /* For the 64-bit byte counters the low dword must be read first. */
5651         /* Both registers clear on the read of the high dword */
5652
5653         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5654             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5655         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5656             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5657
5658         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5659         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5660         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5661         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5662         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5663
5664         stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5665         stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5666         stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5667
5668         stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5669             ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5670         stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5671             ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5672
5673         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5674         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5675         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5676         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5677         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5678         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5679         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5680         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5681         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5682         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5683
5684         /* Interrupt Counts */
5685
5686         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5687         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5688         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5689         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5690         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5691         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5692         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5693         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5694         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5695
5696         /* Host to Card Statistics */
5697
5698         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5699         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5700         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5701         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5702         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5703         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5704         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5705         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5706             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5707         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5708             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5709         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5710         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5711         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5712
5713         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5714         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5715         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5716         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5717         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5718         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5719
5720         /* Driver specific counters */
5721         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5722         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5723         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5724         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5725         adapter->packet_buf_alloc_tx =
5726             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5727         adapter->packet_buf_alloc_rx =
5728             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5729 }
5730
5731
5732 /**********************************************************************
5733  *
5734  *  Initialize the VF board statistics counters.
5735  *
5736  **********************************************************************/
5737 static void
5738 igb_vf_init_stats(struct adapter *adapter)
5739 {
5740         struct e1000_hw *hw = &adapter->hw;
5741         struct e1000_vf_stats   *stats;
5742
5743         stats = (struct e1000_vf_stats  *)adapter->stats;
5744         if (stats == NULL)
5745                 return;
5746         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5747         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5748         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5749         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5750         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5751 }
5752  
5753 /**********************************************************************
5754  *
5755  *  Update the VF board statistics counters.
5756  *
5757  **********************************************************************/
5758 static void
5759 igb_update_vf_stats_counters(struct adapter *adapter)
5760 {
5761         struct e1000_hw *hw = &adapter->hw;
5762         struct e1000_vf_stats   *stats;
5763
5764         if (adapter->link_speed == 0)
5765                 return;
5766
5767         stats = (struct e1000_vf_stats  *)adapter->stats;
5768
5769         UPDATE_VF_REG(E1000_VFGPRC,
5770             stats->last_gprc, stats->gprc);
5771         UPDATE_VF_REG(E1000_VFGORC,
5772             stats->last_gorc, stats->gorc);
5773         UPDATE_VF_REG(E1000_VFGPTC,
5774             stats->last_gptc, stats->gptc);
5775         UPDATE_VF_REG(E1000_VFGOTC,
5776             stats->last_gotc, stats->gotc);
5777         UPDATE_VF_REG(E1000_VFMPRC,
5778             stats->last_mprc, stats->mprc);
5779 }
5780
5781 /* Export a single 32-bit register via a read-only sysctl. */
5782 static int
5783 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5784 {
5785         struct adapter *adapter;
5786         u_int val;
5787
5788         adapter = oidp->oid_arg1;
5789         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5790         return (sysctl_handle_int(oidp, &val, 0, req));
5791 }
5792
5793 /*
5794 **  Tuneable interrupt rate handler
5795 */
5796 static int
5797 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5798 {
5799         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5800         int                     error;
5801         u32                     reg, usec, rate;
5802                         
5803         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5804         usec = ((reg & 0x7FFC) >> 2);
5805         if (usec > 0)
5806                 rate = 1000000 / usec;
5807         else
5808                 rate = 0;
5809         error = sysctl_handle_int(oidp, &rate, 0, req);
5810         if (error || !req->newptr)
5811                 return error;
5812         return 0;
5813 }
5814
5815 /*
5816  * Add sysctl variables, one per statistic, to the system.
5817  */
5818 static void
5819 igb_add_hw_stats(struct adapter *adapter)
5820 {
5821         device_t dev = adapter->dev;
5822
5823         struct tx_ring *txr = adapter->tx_rings;
5824         struct rx_ring *rxr = adapter->rx_rings;
5825
5826         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5827         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5828         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5829         struct e1000_hw_stats *stats = adapter->stats;
5830
5831         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5832         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5833
5834 #define QUEUE_NAME_LEN 32
5835         char namebuf[QUEUE_NAME_LEN];
5836
5837         /* Driver Statistics */
5838         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5839                         CTLFLAG_RD, &adapter->dropped_pkts,
5840                         "Driver dropped packets");
5841         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 
5842                         CTLFLAG_RD, &adapter->link_irq,
5843                         "Link MSIX IRQ Handled");
5844         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5845                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5846                         "Defragmenting mbuf chain failed");
5847         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5848                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5849                         "Driver tx dma failure in xmit");
5850         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5851                         CTLFLAG_RD, &adapter->rx_overruns,
5852                         "RX overruns");
5853         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5854                         CTLFLAG_RD, &adapter->watchdog_events,
5855                         "Watchdog timeouts");
5856
5857         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5858                         CTLFLAG_RD, &adapter->device_control,
5859                         "Device Control Register");
5860         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5861                         CTLFLAG_RD, &adapter->rx_control,
5862                         "Receiver Control Register");
5863         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5864                         CTLFLAG_RD, &adapter->int_mask,
5865                         "Interrupt Mask");
5866         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5867                         CTLFLAG_RD, &adapter->eint_mask,
5868                         "Extended Interrupt Mask");
5869         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5870                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5871                         "Transmit Buffer Packet Allocation");
5872         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5873                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5874                         "Receive Buffer Packet Allocation");
5875         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5876                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5877                         "Flow Control High Watermark");
5878         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5879                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5880                         "Flow Control Low Watermark");
5881
5882         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5883                 struct lro_ctrl *lro = &rxr->lro;
5884
5885                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5886                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5887                                             CTLFLAG_RD, NULL, "Queue Name");
5888                 queue_list = SYSCTL_CHILDREN(queue_node);
5889
5890                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5891                                 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5892                                 sizeof(&adapter->queues[i]),
5893                                 igb_sysctl_interrupt_rate_handler,
5894                                 "IU", "Interrupt Rate");
5895
5896                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5897                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5898                                 igb_sysctl_reg_handler, "IU",
5899                                 "Transmit Descriptor Head");
5900                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5901                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5902                                 igb_sysctl_reg_handler, "IU",
5903                                 "Transmit Descriptor Tail");
5904                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5905                                 CTLFLAG_RD, &txr->no_desc_avail,
5906                                 "Queue Descriptors Unavailable");
5907                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5908                                 CTLFLAG_RD, &txr->total_packets,
5909                                 "Queue Packets Transmitted");
5910
5911                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5912                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5913                                 igb_sysctl_reg_handler, "IU",
5914                                 "Receive Descriptor Head");
5915                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5916                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5917                                 igb_sysctl_reg_handler, "IU",
5918                                 "Receive Descriptor Tail");
5919                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5920                                 CTLFLAG_RD, &rxr->rx_packets,
5921                                 "Queue Packets Received");
5922                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5923                                 CTLFLAG_RD, &rxr->rx_bytes,
5924                                 "Queue Bytes Received");
5925                 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
5926                                 CTLFLAG_RD, &lro->lro_queued, 0,
5927                                 "LRO Queued");
5928                 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
5929                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5930                                 "LRO Flushed");
5931         }
5932
5933         /* MAC stats get their own sub node */
5934
5935         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5936                                     CTLFLAG_RD, NULL, "MAC Statistics");
5937         stat_list = SYSCTL_CHILDREN(stat_node);
5938
5939         /*
5940         ** VF adapter has a very limited set of stats
5941         ** since its not managing the metal, so to speak.
5942         */
5943         if (adapter->vf_ifp) {
5944         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5945                         CTLFLAG_RD, &stats->gprc,
5946                         "Good Packets Received");
5947         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5948                         CTLFLAG_RD, &stats->gptc,
5949                         "Good Packets Transmitted");
5950         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5951                         CTLFLAG_RD, &stats->gorc, 
5952                         "Good Octets Received"); 
5953         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5954                         CTLFLAG_RD, &stats->gotc, 
5955                         "Good Octets Transmitted"); 
5956         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5957                         CTLFLAG_RD, &stats->mprc,
5958                         "Multicast Packets Received");
5959                 return;
5960         }
5961
5962         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5963                         CTLFLAG_RD, &stats->ecol,
5964                         "Excessive collisions");
5965         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5966                         CTLFLAG_RD, &stats->scc,
5967                         "Single collisions");
5968         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5969                         CTLFLAG_RD, &stats->mcc,
5970                         "Multiple collisions");
5971         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5972                         CTLFLAG_RD, &stats->latecol,
5973                         "Late collisions");
5974         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5975                         CTLFLAG_RD, &stats->colc,
5976                         "Collision Count");
5977         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5978                         CTLFLAG_RD, &stats->symerrs,
5979                         "Symbol Errors");
5980         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5981                         CTLFLAG_RD, &stats->sec,
5982                         "Sequence Errors");
5983         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5984                         CTLFLAG_RD, &stats->dc,
5985                         "Defer Count");
5986         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5987                         CTLFLAG_RD, &stats->mpc,
5988                         "Missed Packets");
5989         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5990                         CTLFLAG_RD, &stats->rlec,
5991                         "Receive Length Errors");
5992         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5993                         CTLFLAG_RD, &stats->rnbc,
5994                         "Receive No Buffers");
5995         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5996                         CTLFLAG_RD, &stats->ruc,
5997                         "Receive Undersize");
5998         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5999                         CTLFLAG_RD, &stats->rfc,
6000                         "Fragmented Packets Received");
6001         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
6002                         CTLFLAG_RD, &stats->roc,
6003                         "Oversized Packets Received");
6004         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
6005                         CTLFLAG_RD, &stats->rjc,
6006                         "Recevied Jabber");
6007         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6008                         CTLFLAG_RD, &stats->rxerrc,
6009                         "Receive Errors");
6010         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6011                         CTLFLAG_RD, &stats->crcerrs,
6012                         "CRC errors");
6013         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6014                         CTLFLAG_RD, &stats->algnerrc,
6015                         "Alignment Errors");
6016         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6017                         CTLFLAG_RD, &stats->tncrs,
6018                         "Transmit with No CRS");
6019         /* On 82575 these are collision counts */
6020         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6021                         CTLFLAG_RD, &stats->cexterr,
6022                         "Collision/Carrier extension errors");
6023         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6024                         CTLFLAG_RD, &stats->xonrxc,
6025                         "XON Received");
6026         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6027                         CTLFLAG_RD, &stats->xontxc,
6028                         "XON Transmitted");
6029         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6030                         CTLFLAG_RD, &stats->xoffrxc,
6031                         "XOFF Received");
6032         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6033                         CTLFLAG_RD, &stats->xofftxc,
6034                         "XOFF Transmitted");
6035         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6036                         CTLFLAG_RD, &stats->fcruc,
6037                         "Unsupported Flow Control Received");
6038         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6039                         CTLFLAG_RD, &stats->mgprc,
6040                         "Management Packets Received");
6041         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6042                         CTLFLAG_RD, &stats->mgpdc,
6043                         "Management Packets Dropped");
6044         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6045                         CTLFLAG_RD, &stats->mgptc,
6046                         "Management Packets Transmitted");
6047         /* Packet Reception Stats */
6048         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6049                         CTLFLAG_RD, &stats->tpr,
6050                         "Total Packets Received");
6051         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6052                         CTLFLAG_RD, &stats->gprc,
6053                         "Good Packets Received");
6054         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6055                         CTLFLAG_RD, &stats->bprc,
6056                         "Broadcast Packets Received");
6057         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6058                         CTLFLAG_RD, &stats->mprc,
6059                         "Multicast Packets Received");
6060         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6061                         CTLFLAG_RD, &stats->prc64,
6062                         "64 byte frames received");
6063         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6064                         CTLFLAG_RD, &stats->prc127,
6065                         "65-127 byte frames received");
6066         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6067                         CTLFLAG_RD, &stats->prc255,
6068                         "128-255 byte frames received");
6069         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6070                         CTLFLAG_RD, &stats->prc511,
6071                         "256-511 byte frames received");
6072         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6073                         CTLFLAG_RD, &stats->prc1023,
6074                         "512-1023 byte frames received");
6075         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6076                         CTLFLAG_RD, &stats->prc1522,
6077                         "1023-1522 byte frames received");
6078         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
6079                         CTLFLAG_RD, &stats->gorc, 
6080                         "Good Octets Received");
6081         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 
6082                         CTLFLAG_RD, &stats->tor, 
6083                         "Total Octets Received");
6084
6085         /* Packet Transmission Stats */
6086         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
6087                         CTLFLAG_RD, &stats->gotc, 
6088                         "Good Octets Transmitted"); 
6089         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 
6090                         CTLFLAG_RD, &stats->tot, 
6091                         "Total Octets Transmitted");
6092         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6093                         CTLFLAG_RD, &stats->tpt,
6094                         "Total Packets Transmitted");
6095         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6096                         CTLFLAG_RD, &stats->gptc,
6097                         "Good Packets Transmitted");
6098         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6099                         CTLFLAG_RD, &stats->bptc,
6100                         "Broadcast Packets Transmitted");
6101         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6102                         CTLFLAG_RD, &stats->mptc,
6103                         "Multicast Packets Transmitted");
6104         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6105                         CTLFLAG_RD, &stats->ptc64,
6106                         "64 byte frames transmitted");
6107         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6108                         CTLFLAG_RD, &stats->ptc127,
6109                         "65-127 byte frames transmitted");
6110         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6111                         CTLFLAG_RD, &stats->ptc255,
6112                         "128-255 byte frames transmitted");
6113         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6114                         CTLFLAG_RD, &stats->ptc511,
6115                         "256-511 byte frames transmitted");
6116         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6117                         CTLFLAG_RD, &stats->ptc1023,
6118                         "512-1023 byte frames transmitted");
6119         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6120                         CTLFLAG_RD, &stats->ptc1522,
6121                         "1024-1522 byte frames transmitted");
6122         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6123                         CTLFLAG_RD, &stats->tsctc,
6124                         "TSO Contexts Transmitted");
6125         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6126                         CTLFLAG_RD, &stats->tsctfc,
6127                         "TSO Contexts Failed");
6128
6129
6130         /* Interrupt Stats */
6131
6132         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
6133                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
6134         int_list = SYSCTL_CHILDREN(int_node);
6135
6136         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6137                         CTLFLAG_RD, &stats->iac,
6138                         "Interrupt Assertion Count");
6139
6140         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6141                         CTLFLAG_RD, &stats->icrxptc,
6142                         "Interrupt Cause Rx Pkt Timer Expire Count");
6143
6144         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6145                         CTLFLAG_RD, &stats->icrxatc,
6146                         "Interrupt Cause Rx Abs Timer Expire Count");
6147
6148         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6149                         CTLFLAG_RD, &stats->ictxptc,
6150                         "Interrupt Cause Tx Pkt Timer Expire Count");
6151
6152         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6153                         CTLFLAG_RD, &stats->ictxatc,
6154                         "Interrupt Cause Tx Abs Timer Expire Count");
6155
6156         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6157                         CTLFLAG_RD, &stats->ictxqec,
6158                         "Interrupt Cause Tx Queue Empty Count");
6159
6160         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6161                         CTLFLAG_RD, &stats->ictxqmtc,
6162                         "Interrupt Cause Tx Queue Min Thresh Count");
6163
6164         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6165                         CTLFLAG_RD, &stats->icrxdmtc,
6166                         "Interrupt Cause Rx Desc Min Thresh Count");
6167
6168         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6169                         CTLFLAG_RD, &stats->icrxoc,
6170                         "Interrupt Cause Receiver Overrun Count");
6171
6172         /* Host to Card Stats */
6173
6174         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
6175                                     CTLFLAG_RD, NULL, 
6176                                     "Host to Card Statistics");
6177
6178         host_list = SYSCTL_CHILDREN(host_node);
6179
6180         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6181                         CTLFLAG_RD, &stats->cbtmpc,
6182                         "Circuit Breaker Tx Packet Count");
6183
6184         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6185                         CTLFLAG_RD, &stats->htdpmc,
6186                         "Host Transmit Discarded Packets");
6187
6188         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6189                         CTLFLAG_RD, &stats->rpthc,
6190                         "Rx Packets To Host");
6191
6192         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6193                         CTLFLAG_RD, &stats->cbrmpc,
6194                         "Circuit Breaker Rx Packet Count");
6195
6196         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6197                         CTLFLAG_RD, &stats->cbrdpc,
6198                         "Circuit Breaker Rx Dropped Count");
6199
6200         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6201                         CTLFLAG_RD, &stats->hgptc,
6202                         "Host Good Packets Tx Count");
6203
6204         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6205                         CTLFLAG_RD, &stats->htcbdpc,
6206                         "Host Tx Circuit Breaker Dropped Count");
6207
6208         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6209                         CTLFLAG_RD, &stats->hgorc,
6210                         "Host Good Octets Received Count");
6211
6212         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6213                         CTLFLAG_RD, &stats->hgotc,
6214                         "Host Good Octets Transmit Count");
6215
6216         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6217                         CTLFLAG_RD, &stats->lenerrs,
6218                         "Length Errors");
6219
6220         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6221                         CTLFLAG_RD, &stats->scvpc,
6222                         "SerDes/SGMII Code Violation Pkt Count");
6223
6224         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6225                         CTLFLAG_RD, &stats->hrmpc,
6226                         "Header Redirection Missed Packet Count");
6227 }
6228
6229
6230 /**********************************************************************
6231  *
6232  *  This routine provides a way to dump out the adapter eeprom,
6233  *  often a useful debug/service tool. This only dumps the first
6234  *  32 words, stuff that matters is in that extent.
6235  *
6236  **********************************************************************/
6237 static int
6238 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6239 {
6240         struct adapter *adapter;
6241         int error;
6242         int result;
6243
6244         result = -1;
6245         error = sysctl_handle_int(oidp, &result, 0, req);
6246
6247         if (error || !req->newptr)
6248                 return (error);
6249
6250         /*
6251          * This value will cause a hex dump of the
6252          * first 32 16-bit words of the EEPROM to
6253          * the screen.
6254          */
6255         if (result == 1) {
6256                 adapter = (struct adapter *)arg1;
6257                 igb_print_nvm_info(adapter);
6258         }
6259
6260         return (error);
6261 }
6262
6263 static void
6264 igb_print_nvm_info(struct adapter *adapter)
6265 {
6266         u16     eeprom_data;
6267         int     i, j, row = 0;
6268
6269         /* Its a bit crude, but it gets the job done */
6270         printf("\nInterface EEPROM Dump:\n");
6271         printf("Offset\n0x0000  ");
6272         for (i = 0, j = 0; i < 32; i++, j++) {
6273                 if (j == 8) { /* Make the offset block */
6274                         j = 0; ++row;
6275                         printf("\n0x00%x0  ",row);
6276                 }
6277                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6278                 printf("%04x ", eeprom_data);
6279         }
6280         printf("\n");
6281 }
6282
6283 static void
6284 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6285         const char *description, int *limit, int value)
6286 {
6287         *limit = value;
6288         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6289             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6290             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6291 }
6292
6293 /*
6294 ** Set flow control using sysctl:
6295 ** Flow control values:
6296 **      0 - off
6297 **      1 - rx pause
6298 **      2 - tx pause
6299 **      3 - full
6300 */
6301 static int
6302 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6303 {
6304         int             error;
6305         static int      input = 3; /* default is full */
6306         struct adapter  *adapter = (struct adapter *) arg1;
6307
6308         error = sysctl_handle_int(oidp, &input, 0, req);
6309
6310         if ((error) || (req->newptr == NULL))
6311                 return (error);
6312
6313         switch (input) {
6314                 case e1000_fc_rx_pause:
6315                 case e1000_fc_tx_pause:
6316                 case e1000_fc_full:
6317                 case e1000_fc_none:
6318                         adapter->hw.fc.requested_mode = input;
6319                         adapter->fc = input;
6320                         break;
6321                 default:
6322                         /* Do nothing */
6323                         return (error);
6324         }
6325
6326         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6327         e1000_force_mac_fc(&adapter->hw);
6328         /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6329         return (error);
6330 }
6331
6332 /*
6333 ** Manage DMA Coalesce:
6334 ** Control values:
6335 **      0/1 - off/on
6336 **      Legal timer values are:
6337 **      250,500,1000-10000 in thousands
6338 */
6339 static int
6340 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6341 {
6342         struct adapter *adapter = (struct adapter *) arg1;
6343         int             error;
6344
6345         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6346
6347         if ((error) || (req->newptr == NULL))
6348                 return (error);
6349
6350         switch (adapter->dmac) {
6351                 case 0:
6352                         /* Disabling */
6353                         break;
6354                 case 1: /* Just enable and use default */
6355                         adapter->dmac = 1000;
6356                         break;
6357                 case 250:
6358                 case 500:
6359                 case 1000:
6360                 case 2000:
6361                 case 3000:
6362                 case 4000:
6363                 case 5000:
6364                 case 6000:
6365                 case 7000:
6366                 case 8000:
6367                 case 9000:
6368                 case 10000:
6369                         /* Legal values - allow */
6370                         break;
6371                 default:
6372                         /* Do nothing, illegal value */
6373                         adapter->dmac = 0;
6374                         return (EINVAL);
6375         }
6376         /* Reinit the interface */
6377         igb_init(adapter);
6378         return (error);
6379 }
6380
6381 /*
6382 ** Manage Energy Efficient Ethernet:
6383 ** Control values:
6384 **     0/1 - enabled/disabled
6385 */
6386 static int
6387 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6388 {
6389         struct adapter  *adapter = (struct adapter *) arg1;
6390         int             error, value;
6391
6392         value = adapter->hw.dev_spec._82575.eee_disable;
6393         error = sysctl_handle_int(oidp, &value, 0, req);
6394         if (error || req->newptr == NULL)
6395                 return (error);
6396         IGB_CORE_LOCK(adapter);
6397         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6398         igb_init_locked(adapter);
6399         IGB_CORE_UNLOCK(adapter);
6400         return (0);
6401 }