]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_igb.c
Add new USB ID to UDL driver.
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "opt_rss.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #include "opt_altq.h"
43 #endif
44
45 #include "if_igb.h"
46
47 /*********************************************************************
48  *  Driver version:
49  *********************************************************************/
50 char igb_driver_version[] = "2.5.3-k";
51
52
53 /*********************************************************************
54  *  PCI Device ID Table
55  *
56  *  Used by probe to select devices to load on
57  *  Last field stores an index into e1000_strings
58  *  Last entry must be all 0s
59  *
60  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61  *********************************************************************/
62
63 static igb_vendor_info_t igb_vendor_info_array[] =
64 {
65         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
72         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
79         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
81         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
88         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,  0, 0, 0},
89         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
90         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,  0, 0, 0},
91         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
93         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,  0, 0, 0},
98         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
99         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,  0, 0, 0},
100         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
101         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,  0, 0, 0},
104         /* required last entry */
105         {0, 0, 0, 0, 0}
106 };
107
108 /*********************************************************************
109  *  Table of branding strings for all supported NICs.
110  *********************************************************************/
111
112 static char *igb_strings[] = {
113         "Intel(R) PRO/1000 Network Connection"
114 };
115
116 /*********************************************************************
117  *  Function prototypes
118  *********************************************************************/
119 static int      igb_probe(device_t);
120 static int      igb_attach(device_t);
121 static int      igb_detach(device_t);
122 static int      igb_shutdown(device_t);
123 static int      igb_suspend(device_t);
124 static int      igb_resume(device_t);
125 #ifndef IGB_LEGACY_TX
126 static int      igb_mq_start(struct ifnet *, struct mbuf *);
127 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128 static void     igb_qflush(struct ifnet *);
129 static void     igb_deferred_mq_start(void *, int);
130 #else
131 static void     igb_start(struct ifnet *);
132 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133 #endif
134 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
135 static uint64_t igb_get_counter(if_t, ift_counter);
136 static void     igb_init(void *);
137 static void     igb_init_locked(struct adapter *);
138 static void     igb_stop(void *);
139 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
140 static int      igb_media_change(struct ifnet *);
141 static void     igb_identify_hardware(struct adapter *);
142 static int      igb_allocate_pci_resources(struct adapter *);
143 static int      igb_allocate_msix(struct adapter *);
144 static int      igb_allocate_legacy(struct adapter *);
145 static int      igb_setup_msix(struct adapter *);
146 static void     igb_free_pci_resources(struct adapter *);
147 static void     igb_local_timer(void *);
148 static void     igb_reset(struct adapter *);
149 static int      igb_setup_interface(device_t, struct adapter *);
150 static int      igb_allocate_queues(struct adapter *);
151 static void     igb_configure_queues(struct adapter *);
152
153 static int      igb_allocate_transmit_buffers(struct tx_ring *);
154 static void     igb_setup_transmit_structures(struct adapter *);
155 static void     igb_setup_transmit_ring(struct tx_ring *);
156 static void     igb_initialize_transmit_units(struct adapter *);
157 static void     igb_free_transmit_structures(struct adapter *);
158 static void     igb_free_transmit_buffers(struct tx_ring *);
159
160 static int      igb_allocate_receive_buffers(struct rx_ring *);
161 static int      igb_setup_receive_structures(struct adapter *);
162 static int      igb_setup_receive_ring(struct rx_ring *);
163 static void     igb_initialize_receive_units(struct adapter *);
164 static void     igb_free_receive_structures(struct adapter *);
165 static void     igb_free_receive_buffers(struct rx_ring *);
166 static void     igb_free_receive_ring(struct rx_ring *);
167
168 static void     igb_enable_intr(struct adapter *);
169 static void     igb_disable_intr(struct adapter *);
170 static void     igb_update_stats_counters(struct adapter *);
171 static bool     igb_txeof(struct tx_ring *);
172
173 static __inline void igb_rx_discard(struct rx_ring *, int);
174 static __inline void igb_rx_input(struct rx_ring *,
175                     struct ifnet *, struct mbuf *, u32);
176
177 static bool     igb_rxeof(struct igb_queue *, int, int *);
178 static void     igb_rx_checksum(u32, struct mbuf *, u32);
179 static int      igb_tx_ctx_setup(struct tx_ring *,
180                     struct mbuf *, u32 *, u32 *);
181 static int      igb_tso_setup(struct tx_ring *,
182                     struct mbuf *, u32 *, u32 *);
183 static void     igb_set_promisc(struct adapter *);
184 static void     igb_disable_promisc(struct adapter *);
185 static void     igb_set_multi(struct adapter *);
186 static void     igb_update_link_status(struct adapter *);
187 static void     igb_refresh_mbufs(struct rx_ring *, int);
188
189 static void     igb_register_vlan(void *, struct ifnet *, u16);
190 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
191 static void     igb_setup_vlan_hw_support(struct adapter *);
192
193 static int      igb_xmit(struct tx_ring *, struct mbuf **);
194 static int      igb_dma_malloc(struct adapter *, bus_size_t,
195                     struct igb_dma_alloc *, int);
196 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198 static void     igb_print_nvm_info(struct adapter *);
199 static int      igb_is_valid_ether_addr(u8 *);
200 static void     igb_add_hw_stats(struct adapter *);
201
202 static void     igb_vf_init_stats(struct adapter *);
203 static void     igb_update_vf_stats_counters(struct adapter *);
204
205 /* Management and WOL Support */
206 static void     igb_init_manageability(struct adapter *);
207 static void     igb_release_manageability(struct adapter *);
208 static void     igb_get_hw_control(struct adapter *);
209 static void     igb_release_hw_control(struct adapter *);
210 static void     igb_enable_wakeup(device_t);
211 static void     igb_led_func(void *, int);
212
213 static int      igb_irq_fast(void *);
214 static void     igb_msix_que(void *);
215 static void     igb_msix_link(void *);
216 static void     igb_handle_que(void *context, int pending);
217 static void     igb_handle_link(void *context, int pending);
218 static void     igb_handle_link_locked(struct adapter *);
219
220 static void     igb_set_sysctl_value(struct adapter *, const char *,
221                     const char *, int *, int);
222 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226 #ifdef DEVICE_POLLING
227 static poll_handler_t igb_poll;
228 #endif /* POLLING */
229
230 /*********************************************************************
231  *  FreeBSD Device Interface Entry Points
232  *********************************************************************/
233
234 static device_method_t igb_methods[] = {
235         /* Device interface */
236         DEVMETHOD(device_probe, igb_probe),
237         DEVMETHOD(device_attach, igb_attach),
238         DEVMETHOD(device_detach, igb_detach),
239         DEVMETHOD(device_shutdown, igb_shutdown),
240         DEVMETHOD(device_suspend, igb_suspend),
241         DEVMETHOD(device_resume, igb_resume),
242         DEVMETHOD_END
243 };
244
245 static driver_t igb_driver = {
246         "igb", igb_methods, sizeof(struct adapter),
247 };
248
249 static devclass_t igb_devclass;
250 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251 MODULE_DEPEND(igb, pci, 1, 1, 1);
252 MODULE_DEPEND(igb, ether, 1, 1, 1);
253 #ifdef DEV_NETMAP
254 MODULE_DEPEND(igb, netmap, 1, 1, 1);
255 #endif /* DEV_NETMAP */
256
257 /*********************************************************************
258  *  Tunable default values.
259  *********************************************************************/
260
261 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263 /* Descriptor defaults */
264 static int igb_rxd = IGB_DEFAULT_RXD;
265 static int igb_txd = IGB_DEFAULT_TXD;
266 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267     "Number of receive descriptors per queue");
268 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269     "Number of transmit descriptors per queue");
270
271 /*
272 ** AIM: Adaptive Interrupt Moderation
273 ** which means that the interrupt rate
274 ** is varied over time based on the
275 ** traffic for that interrupt vector
276 */
277 static int igb_enable_aim = TRUE;
278 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279     "Enable adaptive interrupt moderation");
280
281 /*
282  * MSIX should be the default for best performance,
283  * but this allows it to be forced off for testing.
284  */         
285 static int igb_enable_msix = 1;
286 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287     "Enable MSI-X interrupts");
288
289 /*
290 ** Tuneable Interrupt rate
291 */
292 static int igb_max_interrupt_rate = 8000;
293 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296 #ifndef IGB_LEGACY_TX
297 /*
298 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
299 */
300 static int igb_buf_ring_size = IGB_BR_SIZE;
301 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302     &igb_buf_ring_size, 0, "Size of the bufring");
303 #endif
304
305 /*
306 ** Header split causes the packet header to
307 ** be dma'd to a seperate mbuf from the payload.
308 ** this can have memory alignment benefits. But
309 ** another plus is that small packets often fit
310 ** into the header and thus use no cluster. Its
311 ** a very workload dependent type feature.
312 */
313 static int igb_header_split = FALSE;
314 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315     "Enable receive mbuf header split");
316
317 /*
318 ** This will autoconfigure based on the
319 ** number of CPUs and max supported
320 ** MSIX messages if left at 0.
321 */
322 static int igb_num_queues = 0;
323 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324     "Number of queues to configure, 0 indicates autoconfigure");
325
326 /*
327 ** Global variable to store last used CPU when binding queues
328 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
329 ** queue is bound to a cpu.
330 */
331 static int igb_last_bind_cpu = -1;
332
333 /* How many packets rxeof tries to clean at a time */
334 static int igb_rx_process_limit = 100;
335 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336     &igb_rx_process_limit, 0,
337     "Maximum number of received packets to process at a time, -1 means unlimited");
338
339 /* How many packets txeof tries to clean at a time */
340 static int igb_tx_process_limit = -1;
341 SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342     &igb_tx_process_limit, 0,
343     "Maximum number of sent packets to process at a time, -1 means unlimited");
344
345 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
346 #include <dev/netmap/if_igb_netmap.h>
347 #endif /* DEV_NETMAP */
348 /*********************************************************************
349  *  Device identification routine
350  *
351  *  igb_probe determines if the driver should be loaded on
352  *  adapter based on PCI vendor/device id of the adapter.
353  *
354  *  return BUS_PROBE_DEFAULT on success, positive on failure
355  *********************************************************************/
356
357 static int
358 igb_probe(device_t dev)
359 {
360         char            adapter_name[256];
361         uint16_t        pci_vendor_id = 0;
362         uint16_t        pci_device_id = 0;
363         uint16_t        pci_subvendor_id = 0;
364         uint16_t        pci_subdevice_id = 0;
365         igb_vendor_info_t *ent;
366
367         INIT_DEBUGOUT("igb_probe: begin");
368
369         pci_vendor_id = pci_get_vendor(dev);
370         if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371                 return (ENXIO);
372
373         pci_device_id = pci_get_device(dev);
374         pci_subvendor_id = pci_get_subvendor(dev);
375         pci_subdevice_id = pci_get_subdevice(dev);
376
377         ent = igb_vendor_info_array;
378         while (ent->vendor_id != 0) {
379                 if ((pci_vendor_id == ent->vendor_id) &&
380                     (pci_device_id == ent->device_id) &&
381
382                     ((pci_subvendor_id == ent->subvendor_id) ||
383                     (ent->subvendor_id == 0)) &&
384
385                     ((pci_subdevice_id == ent->subdevice_id) ||
386                     (ent->subdevice_id == 0))) {
387                         sprintf(adapter_name, "%s, Version - %s",
388                                 igb_strings[ent->index],
389                                 igb_driver_version);
390                         device_set_desc_copy(dev, adapter_name);
391                         return (BUS_PROBE_DEFAULT);
392                 }
393                 ent++;
394         }
395         return (ENXIO);
396 }
397
398 /*********************************************************************
399  *  Device initialization routine
400  *
401  *  The attach entry point is called when the driver is being loaded.
402  *  This routine identifies the type of hardware, allocates all resources
403  *  and initializes the hardware.
404  *
405  *  return 0 on success, positive on failure
406  *********************************************************************/
407
408 static int
409 igb_attach(device_t dev)
410 {
411         struct adapter  *adapter;
412         int             error = 0;
413         u16             eeprom_data;
414
415         INIT_DEBUGOUT("igb_attach: begin");
416
417         if (resource_disabled("igb", device_get_unit(dev))) {
418                 device_printf(dev, "Disabled by device hint\n");
419                 return (ENXIO);
420         }
421
422         adapter = device_get_softc(dev);
423         adapter->dev = adapter->osdep.dev = dev;
424         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425
426         /* SYSCTLs */
427         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430             igb_sysctl_nvm_info, "I", "NVM Information");
431
432         igb_set_sysctl_value(adapter, "enable_aim",
433             "Interrupt Moderation", &adapter->enable_aim,
434             igb_enable_aim);
435
436         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440
441         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442
443         /* Determine hardware and mac info */
444         igb_identify_hardware(adapter);
445
446         /* Setup PCI resources */
447         if (igb_allocate_pci_resources(adapter)) {
448                 device_printf(dev, "Allocation of PCI resources failed\n");
449                 error = ENXIO;
450                 goto err_pci;
451         }
452
453         /* Do Shared Code initialization */
454         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455                 device_printf(dev, "Setup of Shared code failed\n");
456                 error = ENXIO;
457                 goto err_pci;
458         }
459
460         e1000_get_bus_info(&adapter->hw);
461
462         /* Sysctls for limiting the amount of work done in the taskqueues */
463         igb_set_sysctl_value(adapter, "rx_processing_limit",
464             "max number of rx packets to process",
465             &adapter->rx_process_limit, igb_rx_process_limit);
466
467         igb_set_sysctl_value(adapter, "tx_processing_limit",
468             "max number of tx packets to process",
469             &adapter->tx_process_limit, igb_tx_process_limit);
470
471         /*
472          * Validate number of transmit and receive descriptors. It
473          * must not exceed hardware maximum, and must be multiple
474          * of E1000_DBA_ALIGN.
475          */
476         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479                     IGB_DEFAULT_TXD, igb_txd);
480                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
481         } else
482                 adapter->num_tx_desc = igb_txd;
483         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486                     IGB_DEFAULT_RXD, igb_rxd);
487                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
488         } else
489                 adapter->num_rx_desc = igb_rxd;
490
491         adapter->hw.mac.autoneg = DO_AUTO_NEG;
492         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494
495         /* Copper options */
496         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
498                 adapter->hw.phy.disable_polarity_correction = FALSE;
499                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500         }
501
502         /*
503          * Set the frame limits assuming
504          * standard ethernet sized frames.
505          */
506         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507
508         /*
509         ** Allocate and Setup Queues
510         */
511         if (igb_allocate_queues(adapter)) {
512                 error = ENOMEM;
513                 goto err_pci;
514         }
515
516         /* Allocate the appropriate stats memory */
517         if (adapter->vf_ifp) {
518                 adapter->stats =
519                     (struct e1000_vf_stats *)malloc(sizeof \
520                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521                 igb_vf_init_stats(adapter);
522         } else
523                 adapter->stats =
524                     (struct e1000_hw_stats *)malloc(sizeof \
525                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526         if (adapter->stats == NULL) {
527                 device_printf(dev, "Can not allocate stats memory\n");
528                 error = ENOMEM;
529                 goto err_late;
530         }
531
532         /* Allocate multicast array memory. */
533         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535         if (adapter->mta == NULL) {
536                 device_printf(dev, "Can not allocate multicast setup array\n");
537                 error = ENOMEM;
538                 goto err_late;
539         }
540
541         /* Some adapter-specific advanced features */
542         if (adapter->hw.mac.type >= e1000_i350) {
543                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550                     adapter, 0, igb_sysctl_eee, "I",
551                     "Disable Energy Efficient Ethernet");
552                 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553                         if (adapter->hw.mac.type == e1000_i354)
554                                 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
555                         else
556                                 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
557                 }
558         }
559
560         /*
561         ** Start from a known state, this is
562         ** important in reading the nvm and
563         ** mac from that.
564         */
565         e1000_reset_hw(&adapter->hw);
566
567         /* Make sure we have a good EEPROM before we read from it */
568         if (((adapter->hw.mac.type != e1000_i210) &&
569             (adapter->hw.mac.type != e1000_i211)) &&
570             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571                 /*
572                 ** Some PCI-E parts fail the first check due to
573                 ** the link being in sleep state, call it again,
574                 ** if it fails a second time its a real issue.
575                 */
576                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577                         device_printf(dev,
578                             "The EEPROM Checksum Is Not Valid\n");
579                         error = EIO;
580                         goto err_late;
581                 }
582         }
583
584         /*
585         ** Copy the permanent MAC address out of the EEPROM
586         */
587         if (e1000_read_mac_addr(&adapter->hw) < 0) {
588                 device_printf(dev, "EEPROM read error while reading MAC"
589                     " address\n");
590                 error = EIO;
591                 goto err_late;
592         }
593         /* Check its sanity */
594         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595                 device_printf(dev, "Invalid MAC address\n");
596                 error = EIO;
597                 goto err_late;
598         }
599
600         /* Setup OS specific network interface */
601         if (igb_setup_interface(dev, adapter) != 0)
602                 goto err_late;
603
604         /* Now get a good starting state */
605         igb_reset(adapter);
606
607         /* Initialize statistics */
608         igb_update_stats_counters(adapter);
609
610         adapter->hw.mac.get_link_status = 1;
611         igb_update_link_status(adapter);
612
613         /* Indicate SOL/IDER usage */
614         if (e1000_check_reset_block(&adapter->hw))
615                 device_printf(dev,
616                     "PHY reset is blocked due to SOL/IDER session.\n");
617
618         /* Determine if we have to control management hardware */
619         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620
621         /*
622          * Setup Wake-on-Lan
623          */
624         /* APME bit in EEPROM is mapped to WUC.APME */
625         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626         if (eeprom_data)
627                 adapter->wol = E1000_WUFC_MAG;
628
629         /* Register for VLAN events */
630         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634
635         igb_add_hw_stats(adapter);
636
637         /* Tell the stack that the interface is not active */
638         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
640
641         adapter->led_dev = led_create(igb_led_func, adapter,
642             device_get_nameunit(dev));
643
644         /* 
645         ** Configure Interrupts
646         */
647         if ((adapter->msix > 1) && (igb_enable_msix))
648                 error = igb_allocate_msix(adapter);
649         else /* MSI or Legacy */
650                 error = igb_allocate_legacy(adapter);
651         if (error)
652                 goto err_late;
653
654 #ifdef DEV_NETMAP
655         igb_netmap_attach(adapter);
656 #endif /* DEV_NETMAP */
657         INIT_DEBUGOUT("igb_attach: end");
658
659         return (0);
660
661 err_late:
662         igb_detach(dev);
663         igb_free_transmit_structures(adapter);
664         igb_free_receive_structures(adapter);
665         igb_release_hw_control(adapter);
666 err_pci:
667         igb_free_pci_resources(adapter);
668         if (adapter->ifp != NULL)
669                 if_free(adapter->ifp);
670         free(adapter->mta, M_DEVBUF);
671         IGB_CORE_LOCK_DESTROY(adapter);
672
673         return (error);
674 }
675
676 /*********************************************************************
677  *  Device removal routine
678  *
679  *  The detach entry point is called when the driver is being removed.
680  *  This routine stops the adapter and deallocates all the resources
681  *  that were allocated for driver operation.
682  *
683  *  return 0 on success, positive on failure
684  *********************************************************************/
685
686 static int
687 igb_detach(device_t dev)
688 {
689         struct adapter  *adapter = device_get_softc(dev);
690         struct ifnet    *ifp = adapter->ifp;
691
692         INIT_DEBUGOUT("igb_detach: begin");
693
694         /* Make sure VLANS are not using driver */
695         if (adapter->ifp->if_vlantrunk != NULL) {
696                 device_printf(dev,"Vlan in use, detach first\n");
697                 return (EBUSY);
698         }
699
700         ether_ifdetach(adapter->ifp);
701
702         if (adapter->led_dev != NULL)
703                 led_destroy(adapter->led_dev);
704
705 #ifdef DEVICE_POLLING
706         if (ifp->if_capenable & IFCAP_POLLING)
707                 ether_poll_deregister(ifp);
708 #endif
709
710         IGB_CORE_LOCK(adapter);
711         adapter->in_detach = 1;
712         igb_stop(adapter);
713         IGB_CORE_UNLOCK(adapter);
714
715         e1000_phy_hw_reset(&adapter->hw);
716
717         /* Give control back to firmware */
718         igb_release_manageability(adapter);
719         igb_release_hw_control(adapter);
720
721         if (adapter->wol) {
722                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
723                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
724                 igb_enable_wakeup(dev);
725         }
726
727         /* Unregister VLAN events */
728         if (adapter->vlan_attach != NULL)
729                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730         if (adapter->vlan_detach != NULL)
731                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
732
733         callout_drain(&adapter->timer);
734
735 #ifdef DEV_NETMAP
736         netmap_detach(adapter->ifp);
737 #endif /* DEV_NETMAP */
738         igb_free_pci_resources(adapter);
739         bus_generic_detach(dev);
740         if_free(ifp);
741
742         igb_free_transmit_structures(adapter);
743         igb_free_receive_structures(adapter);
744         if (adapter->mta != NULL)
745                 free(adapter->mta, M_DEVBUF);
746
747         IGB_CORE_LOCK_DESTROY(adapter);
748
749         return (0);
750 }
751
752 /*********************************************************************
753  *
754  *  Shutdown entry point
755  *
756  **********************************************************************/
757
758 static int
759 igb_shutdown(device_t dev)
760 {
761         return igb_suspend(dev);
762 }
763
764 /*
765  * Suspend/resume device methods.
766  */
767 static int
768 igb_suspend(device_t dev)
769 {
770         struct adapter *adapter = device_get_softc(dev);
771
772         IGB_CORE_LOCK(adapter);
773
774         igb_stop(adapter);
775
776         igb_release_manageability(adapter);
777         igb_release_hw_control(adapter);
778
779         if (adapter->wol) {
780                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
781                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
782                 igb_enable_wakeup(dev);
783         }
784
785         IGB_CORE_UNLOCK(adapter);
786
787         return bus_generic_suspend(dev);
788 }
789
790 static int
791 igb_resume(device_t dev)
792 {
793         struct adapter *adapter = device_get_softc(dev);
794         struct tx_ring  *txr = adapter->tx_rings;
795         struct ifnet *ifp = adapter->ifp;
796
797         IGB_CORE_LOCK(adapter);
798         igb_init_locked(adapter);
799         igb_init_manageability(adapter);
800
801         if ((ifp->if_flags & IFF_UP) &&
802             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
803                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
804                         IGB_TX_LOCK(txr);
805 #ifndef IGB_LEGACY_TX
806                         /* Process the stack queue only if not depleted */
807                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
808                             !drbr_empty(ifp, txr->br))
809                                 igb_mq_start_locked(ifp, txr);
810 #else
811                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
812                                 igb_start_locked(txr, ifp);
813 #endif
814                         IGB_TX_UNLOCK(txr);
815                 }
816         }
817         IGB_CORE_UNLOCK(adapter);
818
819         return bus_generic_resume(dev);
820 }
821
822
823 #ifdef IGB_LEGACY_TX
824
825 /*********************************************************************
826  *  Transmit entry point
827  *
828  *  igb_start is called by the stack to initiate a transmit.
829  *  The driver will remain in this routine as long as there are
830  *  packets to transmit and transmit resources are available.
831  *  In case resources are not available stack is notified and
832  *  the packet is requeued.
833  **********************************************************************/
834
835 static void
836 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
837 {
838         struct adapter  *adapter = ifp->if_softc;
839         struct mbuf     *m_head;
840
841         IGB_TX_LOCK_ASSERT(txr);
842
843         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
844             IFF_DRV_RUNNING)
845                 return;
846         if (!adapter->link_active)
847                 return;
848
849         /* Call cleanup if number of TX descriptors low */
850         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
851                 igb_txeof(txr);
852
853         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
854                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
855                         txr->queue_status |= IGB_QUEUE_DEPLETED;
856                         break;
857                 }
858                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
859                 if (m_head == NULL)
860                         break;
861                 /*
862                  *  Encapsulation can modify our pointer, and or make it
863                  *  NULL on failure.  In that event, we can't requeue.
864                  */
865                 if (igb_xmit(txr, &m_head)) {
866                         if (m_head != NULL)
867                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
868                         if (txr->tx_avail <= IGB_MAX_SCATTER)
869                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
870                         break;
871                 }
872
873                 /* Send a copy of the frame to the BPF listener */
874                 ETHER_BPF_MTAP(ifp, m_head);
875
876                 /* Set watchdog on */
877                 txr->watchdog_time = ticks;
878                 txr->queue_status |= IGB_QUEUE_WORKING;
879         }
880 }
881  
882 /*
883  * Legacy TX driver routine, called from the
884  * stack, always uses tx[0], and spins for it.
885  * Should not be used with multiqueue tx
886  */
887 static void
888 igb_start(struct ifnet *ifp)
889 {
890         struct adapter  *adapter = ifp->if_softc;
891         struct tx_ring  *txr = adapter->tx_rings;
892
893         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
894                 IGB_TX_LOCK(txr);
895                 igb_start_locked(txr, ifp);
896                 IGB_TX_UNLOCK(txr);
897         }
898         return;
899 }
900
901 #else /* ~IGB_LEGACY_TX */
902
903 /*
904 ** Multiqueue Transmit Entry:
905 **  quick turnaround to the stack
906 **
907 */
908 static int
909 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
910 {
911         struct adapter          *adapter = ifp->if_softc;
912         struct igb_queue        *que;
913         struct tx_ring          *txr;
914         int                     i, err = 0;
915 #ifdef  RSS
916         uint32_t                bucket_id;
917 #endif
918
919         /* Which queue to use */
920         /*
921          * When doing RSS, map it to the same outbound queue
922          * as the incoming flow would be mapped to.
923          *
924          * If everything is setup correctly, it should be the
925          * same bucket that the current CPU we're on is.
926          */
927         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
928 #ifdef  RSS
929                 if (rss_hash2bucket(m->m_pkthdr.flowid,
930                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
931                         /* XXX TODO: spit out something if bucket_id > num_queues? */
932                         i = bucket_id % adapter->num_queues;
933                 } else {
934 #endif
935                         i = m->m_pkthdr.flowid % adapter->num_queues;
936 #ifdef  RSS
937                 }
938 #endif
939         } else {
940                 i = curcpu % adapter->num_queues;
941         }
942         txr = &adapter->tx_rings[i];
943         que = &adapter->queues[i];
944
945         err = drbr_enqueue(ifp, txr->br, m);
946         if (err)
947                 return (err);
948         if (IGB_TX_TRYLOCK(txr)) {
949                 igb_mq_start_locked(ifp, txr);
950                 IGB_TX_UNLOCK(txr);
951         } else
952                 taskqueue_enqueue(que->tq, &txr->txq_task);
953
954         return (0);
955 }
956
957 static int
958 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
959 {
960         struct adapter  *adapter = txr->adapter;
961         struct mbuf     *next;
962         int             err = 0, enq = 0;
963
964         IGB_TX_LOCK_ASSERT(txr);
965
966         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
967             adapter->link_active == 0)
968                 return (ENETDOWN);
969
970         /* Process the queue */
971         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
972                 if ((err = igb_xmit(txr, &next)) != 0) {
973                         if (next == NULL) {
974                                 /* It was freed, move forward */
975                                 drbr_advance(ifp, txr->br);
976                         } else {
977                                 /* 
978                                  * Still have one left, it may not be
979                                  * the same since the transmit function
980                                  * may have changed it.
981                                  */
982                                 drbr_putback(ifp, txr->br, next);
983                         }
984                         break;
985                 }
986                 drbr_advance(ifp, txr->br);
987                 enq++;
988                 if (next->m_flags & M_MCAST && adapter->vf_ifp)
989                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
990                 ETHER_BPF_MTAP(ifp, next);
991                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
992                         break;
993         }
994         if (enq > 0) {
995                 /* Set the watchdog */
996                 txr->queue_status |= IGB_QUEUE_WORKING;
997                 txr->watchdog_time = ticks;
998         }
999         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1000                 igb_txeof(txr);
1001         if (txr->tx_avail <= IGB_MAX_SCATTER)
1002                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1003         return (err);
1004 }
1005
1006 /*
1007  * Called from a taskqueue to drain queued transmit packets.
1008  */
1009 static void
1010 igb_deferred_mq_start(void *arg, int pending)
1011 {
1012         struct tx_ring *txr = arg;
1013         struct adapter *adapter = txr->adapter;
1014         struct ifnet *ifp = adapter->ifp;
1015
1016         IGB_TX_LOCK(txr);
1017         if (!drbr_empty(ifp, txr->br))
1018                 igb_mq_start_locked(ifp, txr);
1019         IGB_TX_UNLOCK(txr);
1020 }
1021
1022 /*
1023 ** Flush all ring buffers
1024 */
1025 static void
1026 igb_qflush(struct ifnet *ifp)
1027 {
1028         struct adapter  *adapter = ifp->if_softc;
1029         struct tx_ring  *txr = adapter->tx_rings;
1030         struct mbuf     *m;
1031
1032         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1033                 IGB_TX_LOCK(txr);
1034                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1035                         m_freem(m);
1036                 IGB_TX_UNLOCK(txr);
1037         }
1038         if_qflush(ifp);
1039 }
1040 #endif /* ~IGB_LEGACY_TX */
1041
1042 /*********************************************************************
1043  *  Ioctl entry point
1044  *
1045  *  igb_ioctl is called when the user wants to configure the
1046  *  interface.
1047  *
1048  *  return 0 on success, positive on failure
1049  **********************************************************************/
1050
1051 static int
1052 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1053 {
1054         struct adapter  *adapter = ifp->if_softc;
1055         struct ifreq    *ifr = (struct ifreq *)data;
1056 #if defined(INET) || defined(INET6)
1057         struct ifaddr   *ifa = (struct ifaddr *)data;
1058 #endif
1059         bool            avoid_reset = FALSE;
1060         int             error = 0;
1061
1062         if (adapter->in_detach)
1063                 return (error);
1064
1065         switch (command) {
1066         case SIOCSIFADDR:
1067 #ifdef INET
1068                 if (ifa->ifa_addr->sa_family == AF_INET)
1069                         avoid_reset = TRUE;
1070 #endif
1071 #ifdef INET6
1072                 if (ifa->ifa_addr->sa_family == AF_INET6)
1073                         avoid_reset = TRUE;
1074 #endif
1075                 /*
1076                 ** Calling init results in link renegotiation,
1077                 ** so we avoid doing it when possible.
1078                 */
1079                 if (avoid_reset) {
1080                         ifp->if_flags |= IFF_UP;
1081                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1082                                 igb_init(adapter);
1083 #ifdef INET
1084                         if (!(ifp->if_flags & IFF_NOARP))
1085                                 arp_ifinit(ifp, ifa);
1086 #endif
1087                 } else
1088                         error = ether_ioctl(ifp, command, data);
1089                 break;
1090         case SIOCSIFMTU:
1091             {
1092                 int max_frame_size;
1093
1094                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1095
1096                 IGB_CORE_LOCK(adapter);
1097                 max_frame_size = 9234;
1098                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1099                     ETHER_CRC_LEN) {
1100                         IGB_CORE_UNLOCK(adapter);
1101                         error = EINVAL;
1102                         break;
1103                 }
1104
1105                 ifp->if_mtu = ifr->ifr_mtu;
1106                 adapter->max_frame_size =
1107                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1108                 igb_init_locked(adapter);
1109                 IGB_CORE_UNLOCK(adapter);
1110                 break;
1111             }
1112         case SIOCSIFFLAGS:
1113                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1114                     SIOCSIFFLAGS (Set Interface Flags)");
1115                 IGB_CORE_LOCK(adapter);
1116                 if (ifp->if_flags & IFF_UP) {
1117                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1118                                 if ((ifp->if_flags ^ adapter->if_flags) &
1119                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1120                                         igb_disable_promisc(adapter);
1121                                         igb_set_promisc(adapter);
1122                                 }
1123                         } else
1124                                 igb_init_locked(adapter);
1125                 } else
1126                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1127                                 igb_stop(adapter);
1128                 adapter->if_flags = ifp->if_flags;
1129                 IGB_CORE_UNLOCK(adapter);
1130                 break;
1131         case SIOCADDMULTI:
1132         case SIOCDELMULTI:
1133                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1134                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1135                         IGB_CORE_LOCK(adapter);
1136                         igb_disable_intr(adapter);
1137                         igb_set_multi(adapter);
1138 #ifdef DEVICE_POLLING
1139                         if (!(ifp->if_capenable & IFCAP_POLLING))
1140 #endif
1141                                 igb_enable_intr(adapter);
1142                         IGB_CORE_UNLOCK(adapter);
1143                 }
1144                 break;
1145         case SIOCSIFMEDIA:
1146                 /* Check SOL/IDER usage */
1147                 IGB_CORE_LOCK(adapter);
1148                 if (e1000_check_reset_block(&adapter->hw)) {
1149                         IGB_CORE_UNLOCK(adapter);
1150                         device_printf(adapter->dev, "Media change is"
1151                             " blocked due to SOL/IDER session.\n");
1152                         break;
1153                 }
1154                 IGB_CORE_UNLOCK(adapter);
1155         case SIOCGIFMEDIA:
1156                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1157                     SIOCxIFMEDIA (Get/Set Interface Media)");
1158                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1159                 break;
1160         case SIOCSIFCAP:
1161             {
1162                 int mask, reinit;
1163
1164                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1165                 reinit = 0;
1166                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1167 #ifdef DEVICE_POLLING
1168                 if (mask & IFCAP_POLLING) {
1169                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1170                                 error = ether_poll_register(igb_poll, ifp);
1171                                 if (error)
1172                                         return (error);
1173                                 IGB_CORE_LOCK(adapter);
1174                                 igb_disable_intr(adapter);
1175                                 ifp->if_capenable |= IFCAP_POLLING;
1176                                 IGB_CORE_UNLOCK(adapter);
1177                         } else {
1178                                 error = ether_poll_deregister(ifp);
1179                                 /* Enable interrupt even in error case */
1180                                 IGB_CORE_LOCK(adapter);
1181                                 igb_enable_intr(adapter);
1182                                 ifp->if_capenable &= ~IFCAP_POLLING;
1183                                 IGB_CORE_UNLOCK(adapter);
1184                         }
1185                 }
1186 #endif
1187 #if __FreeBSD_version >= 1000000
1188                 /* HW cannot turn these on/off separately */
1189                 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1190                         ifp->if_capenable ^= IFCAP_RXCSUM;
1191                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1192                         reinit = 1;
1193                 }
1194                 if (mask & IFCAP_TXCSUM) {
1195                         ifp->if_capenable ^= IFCAP_TXCSUM;
1196                         reinit = 1;
1197                 }
1198                 if (mask & IFCAP_TXCSUM_IPV6) {
1199                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1200                         reinit = 1;
1201                 }
1202 #else
1203                 if (mask & IFCAP_HWCSUM) {
1204                         ifp->if_capenable ^= IFCAP_HWCSUM;
1205                         reinit = 1;
1206                 }
1207 #endif
1208                 if (mask & IFCAP_TSO4) {
1209                         ifp->if_capenable ^= IFCAP_TSO4;
1210                         reinit = 1;
1211                 }
1212                 if (mask & IFCAP_TSO6) {
1213                         ifp->if_capenable ^= IFCAP_TSO6;
1214                         reinit = 1;
1215                 }
1216                 if (mask & IFCAP_VLAN_HWTAGGING) {
1217                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1218                         reinit = 1;
1219                 }
1220                 if (mask & IFCAP_VLAN_HWFILTER) {
1221                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1222                         reinit = 1;
1223                 }
1224                 if (mask & IFCAP_VLAN_HWTSO) {
1225                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1226                         reinit = 1;
1227                 }
1228                 if (mask & IFCAP_LRO) {
1229                         ifp->if_capenable ^= IFCAP_LRO;
1230                         reinit = 1;
1231                 }
1232                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1233                         igb_init(adapter);
1234                 VLAN_CAPABILITIES(ifp);
1235                 break;
1236             }
1237
1238         default:
1239                 error = ether_ioctl(ifp, command, data);
1240                 break;
1241         }
1242
1243         return (error);
1244 }
1245
1246
1247 /*********************************************************************
1248  *  Init entry point
1249  *
1250  *  This routine is used in two ways. It is used by the stack as
1251  *  init entry point in network interface structure. It is also used
1252  *  by the driver as a hw/sw initialization routine to get to a
1253  *  consistent state.
1254  *
1255  *  return 0 on success, positive on failure
1256  **********************************************************************/
1257
1258 static void
1259 igb_init_locked(struct adapter *adapter)
1260 {
1261         struct ifnet    *ifp = adapter->ifp;
1262         device_t        dev = adapter->dev;
1263
1264         INIT_DEBUGOUT("igb_init: begin");
1265
1266         IGB_CORE_LOCK_ASSERT(adapter);
1267
1268         igb_disable_intr(adapter);
1269         callout_stop(&adapter->timer);
1270
1271         /* Get the latest mac address, User can use a LAA */
1272         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1273               ETHER_ADDR_LEN);
1274
1275         /* Put the address into the Receive Address Array */
1276         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1277
1278         igb_reset(adapter);
1279         igb_update_link_status(adapter);
1280
1281         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1282
1283         /* Set hardware offload abilities */
1284         ifp->if_hwassist = 0;
1285         if (ifp->if_capenable & IFCAP_TXCSUM) {
1286 #if __FreeBSD_version >= 1000000
1287                 ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
1288                 if (adapter->hw.mac.type != e1000_82575)
1289                         ifp->if_hwassist |= CSUM_IP_SCTP;
1290 #else
1291                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1292 #if __FreeBSD_version >= 800000
1293                 if (adapter->hw.mac.type != e1000_82575)
1294                         ifp->if_hwassist |= CSUM_SCTP;
1295 #endif
1296 #endif
1297         }
1298
1299 #if __FreeBSD_version >= 1000000
1300         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
1301                 ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
1302                 if (adapter->hw.mac.type != e1000_82575)
1303                         ifp->if_hwassist |= CSUM_IP6_SCTP;
1304         }
1305 #endif
1306         if (ifp->if_capenable & IFCAP_TSO)
1307                 ifp->if_hwassist |= CSUM_TSO;
1308
1309         /* Clear bad data from Rx FIFOs */
1310         e1000_rx_fifo_flush_82575(&adapter->hw);
1311
1312         /* Configure for OS presence */
1313         igb_init_manageability(adapter);
1314
1315         /* Prepare transmit descriptors and buffers */
1316         igb_setup_transmit_structures(adapter);
1317         igb_initialize_transmit_units(adapter);
1318
1319         /* Setup Multicast table */
1320         igb_set_multi(adapter);
1321
1322         /*
1323         ** Figure out the desired mbuf pool
1324         ** for doing jumbo/packetsplit
1325         */
1326         if (adapter->max_frame_size <= 2048)
1327                 adapter->rx_mbuf_sz = MCLBYTES;
1328         else if (adapter->max_frame_size <= 4096)
1329                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1330         else
1331                 adapter->rx_mbuf_sz = MJUM9BYTES;
1332
1333         /* Prepare receive descriptors and buffers */
1334         if (igb_setup_receive_structures(adapter)) {
1335                 device_printf(dev, "Could not setup receive structures\n");
1336                 return;
1337         }
1338         igb_initialize_receive_units(adapter);
1339
1340         /* Enable VLAN support */
1341         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1342                 igb_setup_vlan_hw_support(adapter);
1343                                 
1344         /* Don't lose promiscuous settings */
1345         igb_set_promisc(adapter);
1346
1347         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1348         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1349
1350         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1351         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1352
1353         if (adapter->msix > 1) /* Set up queue routing */
1354                 igb_configure_queues(adapter);
1355
1356         /* this clears any pending interrupts */
1357         E1000_READ_REG(&adapter->hw, E1000_ICR);
1358 #ifdef DEVICE_POLLING
1359         /*
1360          * Only enable interrupts if we are not polling, make sure
1361          * they are off otherwise.
1362          */
1363         if (ifp->if_capenable & IFCAP_POLLING)
1364                 igb_disable_intr(adapter);
1365         else
1366 #endif /* DEVICE_POLLING */
1367         {
1368                 igb_enable_intr(adapter);
1369                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1370         }
1371
1372         /* Set Energy Efficient Ethernet */
1373         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1374                 if (adapter->hw.mac.type == e1000_i354)
1375                         e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1376                 else
1377                         e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1378         }
1379 }
1380
1381 static void
1382 igb_init(void *arg)
1383 {
1384         struct adapter *adapter = arg;
1385
1386         IGB_CORE_LOCK(adapter);
1387         igb_init_locked(adapter);
1388         IGB_CORE_UNLOCK(adapter);
1389 }
1390
1391
1392 static void
1393 igb_handle_que(void *context, int pending)
1394 {
1395         struct igb_queue *que = context;
1396         struct adapter *adapter = que->adapter;
1397         struct tx_ring *txr = que->txr;
1398         struct ifnet    *ifp = adapter->ifp;
1399
1400         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1401                 bool    more;
1402
1403                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1404
1405                 IGB_TX_LOCK(txr);
1406                 igb_txeof(txr);
1407 #ifndef IGB_LEGACY_TX
1408                 /* Process the stack queue only if not depleted */
1409                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1410                     !drbr_empty(ifp, txr->br))
1411                         igb_mq_start_locked(ifp, txr);
1412 #else
1413                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1414                         igb_start_locked(txr, ifp);
1415 #endif
1416                 IGB_TX_UNLOCK(txr);
1417                 /* Do we need another? */
1418                 if (more) {
1419                         taskqueue_enqueue(que->tq, &que->que_task);
1420                         return;
1421                 }
1422         }
1423
1424 #ifdef DEVICE_POLLING
1425         if (ifp->if_capenable & IFCAP_POLLING)
1426                 return;
1427 #endif
1428         /* Reenable this interrupt */
1429         if (que->eims)
1430                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1431         else
1432                 igb_enable_intr(adapter);
1433 }
1434
1435 /* Deal with link in a sleepable context */
1436 static void
1437 igb_handle_link(void *context, int pending)
1438 {
1439         struct adapter *adapter = context;
1440
1441         IGB_CORE_LOCK(adapter);
1442         igb_handle_link_locked(adapter);
1443         IGB_CORE_UNLOCK(adapter);
1444 }
1445
1446 static void
1447 igb_handle_link_locked(struct adapter *adapter)
1448 {
1449         struct tx_ring  *txr = adapter->tx_rings;
1450         struct ifnet *ifp = adapter->ifp;
1451
1452         IGB_CORE_LOCK_ASSERT(adapter);
1453         adapter->hw.mac.get_link_status = 1;
1454         igb_update_link_status(adapter);
1455         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1456                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1457                         IGB_TX_LOCK(txr);
1458 #ifndef IGB_LEGACY_TX
1459                         /* Process the stack queue only if not depleted */
1460                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1461                             !drbr_empty(ifp, txr->br))
1462                                 igb_mq_start_locked(ifp, txr);
1463 #else
1464                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1465                                 igb_start_locked(txr, ifp);
1466 #endif
1467                         IGB_TX_UNLOCK(txr);
1468                 }
1469         }
1470 }
1471
1472 /*********************************************************************
1473  *
1474  *  MSI/Legacy Deferred
1475  *  Interrupt Service routine  
1476  *
1477  *********************************************************************/
1478 static int
1479 igb_irq_fast(void *arg)
1480 {
1481         struct adapter          *adapter = arg;
1482         struct igb_queue        *que = adapter->queues;
1483         u32                     reg_icr;
1484
1485
1486         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1487
1488         /* Hot eject?  */
1489         if (reg_icr == 0xffffffff)
1490                 return FILTER_STRAY;
1491
1492         /* Definitely not our interrupt.  */
1493         if (reg_icr == 0x0)
1494                 return FILTER_STRAY;
1495
1496         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1497                 return FILTER_STRAY;
1498
1499         /*
1500          * Mask interrupts until the taskqueue is finished running.  This is
1501          * cheap, just assume that it is needed.  This also works around the
1502          * MSI message reordering errata on certain systems.
1503          */
1504         igb_disable_intr(adapter);
1505         taskqueue_enqueue(que->tq, &que->que_task);
1506
1507         /* Link status change */
1508         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1509                 taskqueue_enqueue(que->tq, &adapter->link_task);
1510
1511         if (reg_icr & E1000_ICR_RXO)
1512                 adapter->rx_overruns++;
1513         return FILTER_HANDLED;
1514 }
1515
1516 #ifdef DEVICE_POLLING
1517 #if __FreeBSD_version >= 800000
1518 #define POLL_RETURN_COUNT(a) (a)
1519 static int
1520 #else
1521 #define POLL_RETURN_COUNT(a)
1522 static void
1523 #endif
1524 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1525 {
1526         struct adapter          *adapter = ifp->if_softc;
1527         struct igb_queue        *que;
1528         struct tx_ring          *txr;
1529         u32                     reg_icr, rx_done = 0;
1530         u32                     loop = IGB_MAX_LOOP;
1531         bool                    more;
1532
1533         IGB_CORE_LOCK(adapter);
1534         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1535                 IGB_CORE_UNLOCK(adapter);
1536                 return POLL_RETURN_COUNT(rx_done);
1537         }
1538
1539         if (cmd == POLL_AND_CHECK_STATUS) {
1540                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1541                 /* Link status change */
1542                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1543                         igb_handle_link_locked(adapter);
1544
1545                 if (reg_icr & E1000_ICR_RXO)
1546                         adapter->rx_overruns++;
1547         }
1548         IGB_CORE_UNLOCK(adapter);
1549
1550         for (int i = 0; i < adapter->num_queues; i++) {
1551                 que = &adapter->queues[i];
1552                 txr = que->txr;
1553
1554                 igb_rxeof(que, count, &rx_done);
1555
1556                 IGB_TX_LOCK(txr);
1557                 do {
1558                         more = igb_txeof(txr);
1559                 } while (loop-- && more);
1560 #ifndef IGB_LEGACY_TX
1561                 if (!drbr_empty(ifp, txr->br))
1562                         igb_mq_start_locked(ifp, txr);
1563 #else
1564                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1565                         igb_start_locked(txr, ifp);
1566 #endif
1567                 IGB_TX_UNLOCK(txr);
1568         }
1569
1570         return POLL_RETURN_COUNT(rx_done);
1571 }
1572 #endif /* DEVICE_POLLING */
1573
1574 /*********************************************************************
1575  *
1576  *  MSIX Que Interrupt Service routine
1577  *
1578  **********************************************************************/
1579 static void
1580 igb_msix_que(void *arg)
1581 {
1582         struct igb_queue *que = arg;
1583         struct adapter *adapter = que->adapter;
1584         struct ifnet   *ifp = adapter->ifp;
1585         struct tx_ring *txr = que->txr;
1586         struct rx_ring *rxr = que->rxr;
1587         u32             newitr = 0;
1588         bool            more_rx;
1589
1590         /* Ignore spurious interrupts */
1591         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1592                 return;
1593
1594         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1595         ++que->irqs;
1596
1597         IGB_TX_LOCK(txr);
1598         igb_txeof(txr);
1599 #ifndef IGB_LEGACY_TX
1600         /* Process the stack queue only if not depleted */
1601         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1602             !drbr_empty(ifp, txr->br))
1603                 igb_mq_start_locked(ifp, txr);
1604 #else
1605         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1606                 igb_start_locked(txr, ifp);
1607 #endif
1608         IGB_TX_UNLOCK(txr);
1609
1610         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1611
1612         if (adapter->enable_aim == FALSE)
1613                 goto no_calc;
1614         /*
1615         ** Do Adaptive Interrupt Moderation:
1616         **  - Write out last calculated setting
1617         **  - Calculate based on average size over
1618         **    the last interval.
1619         */
1620         if (que->eitr_setting)
1621                 E1000_WRITE_REG(&adapter->hw,
1622                     E1000_EITR(que->msix), que->eitr_setting);
1623  
1624         que->eitr_setting = 0;
1625
1626         /* Idle, do nothing */
1627         if ((txr->bytes == 0) && (rxr->bytes == 0))
1628                 goto no_calc;
1629                                 
1630         /* Used half Default if sub-gig */
1631         if (adapter->link_speed != 1000)
1632                 newitr = IGB_DEFAULT_ITR / 2;
1633         else {
1634                 if ((txr->bytes) && (txr->packets))
1635                         newitr = txr->bytes/txr->packets;
1636                 if ((rxr->bytes) && (rxr->packets))
1637                         newitr = max(newitr,
1638                             (rxr->bytes / rxr->packets));
1639                 newitr += 24; /* account for hardware frame, crc */
1640                 /* set an upper boundary */
1641                 newitr = min(newitr, 3000);
1642                 /* Be nice to the mid range */
1643                 if ((newitr > 300) && (newitr < 1200))
1644                         newitr = (newitr / 3);
1645                 else
1646                         newitr = (newitr / 2);
1647         }
1648         newitr &= 0x7FFC;  /* Mask invalid bits */
1649         if (adapter->hw.mac.type == e1000_82575)
1650                 newitr |= newitr << 16;
1651         else
1652                 newitr |= E1000_EITR_CNT_IGNR;
1653                  
1654         /* save for next interrupt */
1655         que->eitr_setting = newitr;
1656
1657         /* Reset state */
1658         txr->bytes = 0;
1659         txr->packets = 0;
1660         rxr->bytes = 0;
1661         rxr->packets = 0;
1662
1663 no_calc:
1664         /* Schedule a clean task if needed*/
1665         if (more_rx)
1666                 taskqueue_enqueue(que->tq, &que->que_task);
1667         else
1668                 /* Reenable this interrupt */
1669                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1670         return;
1671 }
1672
1673
1674 /*********************************************************************
1675  *
1676  *  MSIX Link Interrupt Service routine
1677  *
1678  **********************************************************************/
1679
1680 static void
1681 igb_msix_link(void *arg)
1682 {
1683         struct adapter  *adapter = arg;
1684         u32             icr;
1685
1686         ++adapter->link_irq;
1687         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1688         if (!(icr & E1000_ICR_LSC))
1689                 goto spurious;
1690         igb_handle_link(adapter, 0);
1691
1692 spurious:
1693         /* Rearm */
1694         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1695         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1696         return;
1697 }
1698
1699
1700 /*********************************************************************
1701  *
1702  *  Media Ioctl callback
1703  *
1704  *  This routine is called whenever the user queries the status of
1705  *  the interface using ifconfig.
1706  *
1707  **********************************************************************/
1708 static void
1709 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1710 {
1711         struct adapter *adapter = ifp->if_softc;
1712
1713         INIT_DEBUGOUT("igb_media_status: begin");
1714
1715         IGB_CORE_LOCK(adapter);
1716         igb_update_link_status(adapter);
1717
1718         ifmr->ifm_status = IFM_AVALID;
1719         ifmr->ifm_active = IFM_ETHER;
1720
1721         if (!adapter->link_active) {
1722                 IGB_CORE_UNLOCK(adapter);
1723                 return;
1724         }
1725
1726         ifmr->ifm_status |= IFM_ACTIVE;
1727
1728         switch (adapter->link_speed) {
1729         case 10:
1730                 ifmr->ifm_active |= IFM_10_T;
1731                 break;
1732         case 100:
1733                 /*
1734                 ** Support for 100Mb SFP - these are Fiber 
1735                 ** but the media type appears as serdes
1736                 */
1737                 if (adapter->hw.phy.media_type ==
1738                     e1000_media_type_internal_serdes)
1739                         ifmr->ifm_active |= IFM_100_FX;
1740                 else
1741                         ifmr->ifm_active |= IFM_100_TX;
1742                 break;
1743         case 1000:
1744                 ifmr->ifm_active |= IFM_1000_T;
1745                 break;
1746         case 2500:
1747                 ifmr->ifm_active |= IFM_2500_SX;
1748                 break;
1749         }
1750
1751         if (adapter->link_duplex == FULL_DUPLEX)
1752                 ifmr->ifm_active |= IFM_FDX;
1753         else
1754                 ifmr->ifm_active |= IFM_HDX;
1755
1756         IGB_CORE_UNLOCK(adapter);
1757 }
1758
1759 /*********************************************************************
1760  *
1761  *  Media Ioctl callback
1762  *
1763  *  This routine is called when the user changes speed/duplex using
1764  *  media/mediopt option with ifconfig.
1765  *
1766  **********************************************************************/
1767 static int
1768 igb_media_change(struct ifnet *ifp)
1769 {
1770         struct adapter *adapter = ifp->if_softc;
1771         struct ifmedia  *ifm = &adapter->media;
1772
1773         INIT_DEBUGOUT("igb_media_change: begin");
1774
1775         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1776                 return (EINVAL);
1777
1778         IGB_CORE_LOCK(adapter);
1779         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1780         case IFM_AUTO:
1781                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1782                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1783                 break;
1784         case IFM_1000_LX:
1785         case IFM_1000_SX:
1786         case IFM_1000_T:
1787                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1788                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1789                 break;
1790         case IFM_100_TX:
1791                 adapter->hw.mac.autoneg = FALSE;
1792                 adapter->hw.phy.autoneg_advertised = 0;
1793                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1794                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1795                 else
1796                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1797                 break;
1798         case IFM_10_T:
1799                 adapter->hw.mac.autoneg = FALSE;
1800                 adapter->hw.phy.autoneg_advertised = 0;
1801                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1802                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1803                 else
1804                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1805                 break;
1806         default:
1807                 device_printf(adapter->dev, "Unsupported media type\n");
1808         }
1809
1810         igb_init_locked(adapter);
1811         IGB_CORE_UNLOCK(adapter);
1812
1813         return (0);
1814 }
1815
1816
1817 /*********************************************************************
1818  *
1819  *  This routine maps the mbufs to Advanced TX descriptors.
1820  *  
1821  **********************************************************************/
1822 static int
1823 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1824 {
1825         struct adapter  *adapter = txr->adapter;
1826         u32             olinfo_status = 0, cmd_type_len;
1827         int             i, j, error, nsegs;
1828         int             first;
1829         bool            remap = TRUE;
1830         struct mbuf     *m_head;
1831         bus_dma_segment_t segs[IGB_MAX_SCATTER];
1832         bus_dmamap_t    map;
1833         struct igb_tx_buf *txbuf;
1834         union e1000_adv_tx_desc *txd = NULL;
1835
1836         m_head = *m_headp;
1837
1838         /* Basic descriptor defines */
1839         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1840             E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1841
1842         if (m_head->m_flags & M_VLANTAG)
1843                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1844
1845         /*
1846          * Important to capture the first descriptor
1847          * used because it will contain the index of
1848          * the one we tell the hardware to report back
1849          */
1850         first = txr->next_avail_desc;
1851         txbuf = &txr->tx_buffers[first];
1852         map = txbuf->map;
1853
1854         /*
1855          * Map the packet for DMA.
1856          */
1857 retry:
1858         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1859             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1860
1861         if (__predict_false(error)) {
1862                 struct mbuf *m;
1863
1864                 switch (error) {
1865                 case EFBIG:
1866                         /* Try it again? - one try */
1867                         if (remap == TRUE) {
1868                                 remap = FALSE;
1869                                 m = m_collapse(*m_headp, M_NOWAIT,
1870                                     IGB_MAX_SCATTER);
1871                                 if (m == NULL) {
1872                                         adapter->mbuf_defrag_failed++;
1873                                         m_freem(*m_headp);
1874                                         *m_headp = NULL;
1875                                         return (ENOBUFS);
1876                                 }
1877                                 *m_headp = m;
1878                                 goto retry;
1879                         } else
1880                                 return (error);
1881                 default:
1882                         txr->no_tx_dma_setup++;
1883                         m_freem(*m_headp);
1884                         *m_headp = NULL;
1885                         return (error);
1886                 }
1887         }
1888
1889         /* Make certain there are enough descriptors */
1890         if (nsegs > txr->tx_avail - 2) {
1891                 txr->no_desc_avail++;
1892                 bus_dmamap_unload(txr->txtag, map);
1893                 return (ENOBUFS);
1894         }
1895         m_head = *m_headp;
1896
1897         /*
1898         ** Set up the appropriate offload context
1899         ** this will consume the first descriptor
1900         */
1901         error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1902         if (__predict_false(error)) {
1903                 m_freem(*m_headp);
1904                 *m_headp = NULL;
1905                 return (error);
1906         }
1907
1908         /* 82575 needs the queue index added */
1909         if (adapter->hw.mac.type == e1000_82575)
1910                 olinfo_status |= txr->me << 4;
1911
1912         i = txr->next_avail_desc;
1913         for (j = 0; j < nsegs; j++) {
1914                 bus_size_t seglen;
1915                 bus_addr_t segaddr;
1916
1917                 txbuf = &txr->tx_buffers[i];
1918                 txd = &txr->tx_base[i];
1919                 seglen = segs[j].ds_len;
1920                 segaddr = htole64(segs[j].ds_addr);
1921
1922                 txd->read.buffer_addr = segaddr;
1923                 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1924                     cmd_type_len | seglen);
1925                 txd->read.olinfo_status = htole32(olinfo_status);
1926
1927                 if (++i == txr->num_desc)
1928                         i = 0;
1929         }
1930
1931         txd->read.cmd_type_len |=
1932             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1933         txr->tx_avail -= nsegs;
1934         txr->next_avail_desc = i;
1935
1936         txbuf->m_head = m_head;
1937         /*
1938         ** Here we swap the map so the last descriptor,
1939         ** which gets the completion interrupt has the
1940         ** real map, and the first descriptor gets the
1941         ** unused map from this descriptor.
1942         */
1943         txr->tx_buffers[first].map = txbuf->map;
1944         txbuf->map = map;
1945         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1946
1947         /* Set the EOP descriptor that will be marked done */
1948         txbuf = &txr->tx_buffers[first];
1949         txbuf->eop = txd;
1950
1951         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1952             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1953         /*
1954          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1955          * hardware that this frame is available to transmit.
1956          */
1957         ++txr->total_packets;
1958         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1959
1960         return (0);
1961 }
1962 static void
1963 igb_set_promisc(struct adapter *adapter)
1964 {
1965         struct ifnet    *ifp = adapter->ifp;
1966         struct e1000_hw *hw = &adapter->hw;
1967         u32             reg;
1968
1969         if (adapter->vf_ifp) {
1970                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1971                 return;
1972         }
1973
1974         reg = E1000_READ_REG(hw, E1000_RCTL);
1975         if (ifp->if_flags & IFF_PROMISC) {
1976                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1977                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1978         } else if (ifp->if_flags & IFF_ALLMULTI) {
1979                 reg |= E1000_RCTL_MPE;
1980                 reg &= ~E1000_RCTL_UPE;
1981                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1982         }
1983 }
1984
1985 static void
1986 igb_disable_promisc(struct adapter *adapter)
1987 {
1988         struct e1000_hw *hw = &adapter->hw;
1989         struct ifnet    *ifp = adapter->ifp;
1990         u32             reg;
1991         int             mcnt = 0;
1992
1993         if (adapter->vf_ifp) {
1994                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1995                 return;
1996         }
1997         reg = E1000_READ_REG(hw, E1000_RCTL);
1998         reg &=  (~E1000_RCTL_UPE);
1999         if (ifp->if_flags & IFF_ALLMULTI)
2000                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2001         else {
2002                 struct  ifmultiaddr *ifma;
2003 #if __FreeBSD_version < 800000
2004                 IF_ADDR_LOCK(ifp);
2005 #else   
2006                 if_maddr_rlock(ifp);
2007 #endif
2008                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2009                         if (ifma->ifma_addr->sa_family != AF_LINK)
2010                                 continue;
2011                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2012                                 break;
2013                         mcnt++;
2014                 }
2015 #if __FreeBSD_version < 800000
2016                 IF_ADDR_UNLOCK(ifp);
2017 #else
2018                 if_maddr_runlock(ifp);
2019 #endif
2020         }
2021         /* Don't disable if in MAX groups */
2022         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2023                 reg &=  (~E1000_RCTL_MPE);
2024         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2025 }
2026
2027
2028 /*********************************************************************
2029  *  Multicast Update
2030  *
2031  *  This routine is called whenever multicast address list is updated.
2032  *
2033  **********************************************************************/
2034
2035 static void
2036 igb_set_multi(struct adapter *adapter)
2037 {
2038         struct ifnet    *ifp = adapter->ifp;
2039         struct ifmultiaddr *ifma;
2040         u32 reg_rctl = 0;
2041         u8  *mta;
2042
2043         int mcnt = 0;
2044
2045         IOCTL_DEBUGOUT("igb_set_multi: begin");
2046
2047         mta = adapter->mta;
2048         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2049             MAX_NUM_MULTICAST_ADDRESSES);
2050
2051 #if __FreeBSD_version < 800000
2052         IF_ADDR_LOCK(ifp);
2053 #else
2054         if_maddr_rlock(ifp);
2055 #endif
2056         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2057                 if (ifma->ifma_addr->sa_family != AF_LINK)
2058                         continue;
2059
2060                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2061                         break;
2062
2063                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2064                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2065                 mcnt++;
2066         }
2067 #if __FreeBSD_version < 800000
2068         IF_ADDR_UNLOCK(ifp);
2069 #else
2070         if_maddr_runlock(ifp);
2071 #endif
2072
2073         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2074                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2075                 reg_rctl |= E1000_RCTL_MPE;
2076                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2077         } else
2078                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2079 }
2080
2081
2082 /*********************************************************************
2083  *  Timer routine:
2084  *      This routine checks for link status,
2085  *      updates statistics, and does the watchdog.
2086  *
2087  **********************************************************************/
2088
2089 static void
2090 igb_local_timer(void *arg)
2091 {
2092         struct adapter          *adapter = arg;
2093         device_t                dev = adapter->dev;
2094         struct ifnet            *ifp = adapter->ifp;
2095         struct tx_ring          *txr = adapter->tx_rings;
2096         struct igb_queue        *que = adapter->queues;
2097         int                     hung = 0, busy = 0;
2098
2099
2100         IGB_CORE_LOCK_ASSERT(adapter);
2101
2102         igb_update_link_status(adapter);
2103         igb_update_stats_counters(adapter);
2104
2105         /*
2106         ** Check the TX queues status
2107         **      - central locked handling of OACTIVE
2108         **      - watchdog only if all queues show hung
2109         */
2110         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2111                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2112                     (adapter->pause_frames == 0))
2113                         ++hung;
2114                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2115                         ++busy;
2116                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2117                         taskqueue_enqueue(que->tq, &que->que_task);
2118         }
2119         if (hung == adapter->num_queues)
2120                 goto timeout;
2121         if (busy == adapter->num_queues)
2122                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2123         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2124             (busy < adapter->num_queues))
2125                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2126
2127         adapter->pause_frames = 0;
2128         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2129 #ifndef DEVICE_POLLING
2130         /* Schedule all queue interrupts - deadlock protection */
2131         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2132 #endif
2133         return;
2134
2135 timeout:
2136         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2137         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2138             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2139             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2140         device_printf(dev,"TX(%d) desc avail = %d,"
2141             "Next TX to Clean = %d\n",
2142             txr->me, txr->tx_avail, txr->next_to_clean);
2143         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2144         adapter->watchdog_events++;
2145         igb_init_locked(adapter);
2146 }
2147
2148 static void
2149 igb_update_link_status(struct adapter *adapter)
2150 {
2151         struct e1000_hw         *hw = &adapter->hw;
2152         struct e1000_fc_info    *fc = &hw->fc;
2153         struct ifnet            *ifp = adapter->ifp;
2154         device_t                dev = adapter->dev;
2155         struct tx_ring          *txr = adapter->tx_rings;
2156         u32                     link_check, thstat, ctrl;
2157         char                    *flowctl = NULL;
2158
2159         link_check = thstat = ctrl = 0;
2160
2161         /* Get the cached link value or read for real */
2162         switch (hw->phy.media_type) {
2163         case e1000_media_type_copper:
2164                 if (hw->mac.get_link_status) {
2165                         /* Do the work to read phy */
2166                         e1000_check_for_link(hw);
2167                         link_check = !hw->mac.get_link_status;
2168                 } else
2169                         link_check = TRUE;
2170                 break;
2171         case e1000_media_type_fiber:
2172                 e1000_check_for_link(hw);
2173                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2174                                  E1000_STATUS_LU);
2175                 break;
2176         case e1000_media_type_internal_serdes:
2177                 e1000_check_for_link(hw);
2178                 link_check = adapter->hw.mac.serdes_has_link;
2179                 break;
2180         /* VF device is type_unknown */
2181         case e1000_media_type_unknown:
2182                 e1000_check_for_link(hw);
2183                 link_check = !hw->mac.get_link_status;
2184                 /* Fall thru */
2185         default:
2186                 break;
2187         }
2188
2189         /* Check for thermal downshift or shutdown */
2190         if (hw->mac.type == e1000_i350) {
2191                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2192                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2193         }
2194
2195         /* Get the flow control for display */
2196         switch (fc->current_mode) {
2197         case e1000_fc_rx_pause:
2198                 flowctl = "RX";
2199                 break;  
2200         case e1000_fc_tx_pause:
2201                 flowctl = "TX";
2202                 break;  
2203         case e1000_fc_full:
2204                 flowctl = "Full";
2205                 break;  
2206         case e1000_fc_none:
2207         default:
2208                 flowctl = "None";
2209                 break;  
2210         }
2211
2212         /* Now we check if a transition has happened */
2213         if (link_check && (adapter->link_active == 0)) {
2214                 e1000_get_speed_and_duplex(&adapter->hw, 
2215                     &adapter->link_speed, &adapter->link_duplex);
2216                 if (bootverbose)
2217                         device_printf(dev, "Link is up %d Mbps %s,"
2218                             " Flow Control: %s\n",
2219                             adapter->link_speed,
2220                             ((adapter->link_duplex == FULL_DUPLEX) ?
2221                             "Full Duplex" : "Half Duplex"), flowctl);
2222                 adapter->link_active = 1;
2223                 ifp->if_baudrate = adapter->link_speed * 1000000;
2224                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2225                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2226                         device_printf(dev, "Link: thermal downshift\n");
2227                 /* Delay Link Up for Phy update */
2228                 if (((hw->mac.type == e1000_i210) ||
2229                     (hw->mac.type == e1000_i211)) &&
2230                     (hw->phy.id == I210_I_PHY_ID))
2231                         msec_delay(I210_LINK_DELAY);
2232                 /* Reset if the media type changed. */
2233                 if (hw->dev_spec._82575.media_changed) {
2234                         hw->dev_spec._82575.media_changed = false;
2235                         adapter->flags |= IGB_MEDIA_RESET;
2236                         igb_reset(adapter);
2237                 }       
2238                 /* This can sleep */
2239                 if_link_state_change(ifp, LINK_STATE_UP);
2240         } else if (!link_check && (adapter->link_active == 1)) {
2241                 ifp->if_baudrate = adapter->link_speed = 0;
2242                 adapter->link_duplex = 0;
2243                 if (bootverbose)
2244                         device_printf(dev, "Link is Down\n");
2245                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2246                     (thstat & E1000_THSTAT_PWR_DOWN))
2247                         device_printf(dev, "Link: thermal shutdown\n");
2248                 adapter->link_active = 0;
2249                 /* This can sleep */
2250                 if_link_state_change(ifp, LINK_STATE_DOWN);
2251                 /* Reset queue state */
2252                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2253                         txr->queue_status = IGB_QUEUE_IDLE;
2254         }
2255 }
2256
2257 /*********************************************************************
2258  *
2259  *  This routine disables all traffic on the adapter by issuing a
2260  *  global reset on the MAC and deallocates TX/RX buffers.
2261  *
2262  **********************************************************************/
2263
2264 static void
2265 igb_stop(void *arg)
2266 {
2267         struct adapter  *adapter = arg;
2268         struct ifnet    *ifp = adapter->ifp;
2269         struct tx_ring *txr = adapter->tx_rings;
2270
2271         IGB_CORE_LOCK_ASSERT(adapter);
2272
2273         INIT_DEBUGOUT("igb_stop: begin");
2274
2275         igb_disable_intr(adapter);
2276
2277         callout_stop(&adapter->timer);
2278
2279         /* Tell the stack that the interface is no longer active */
2280         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2281         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2282
2283         /* Disarm watchdog timer. */
2284         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2285                 IGB_TX_LOCK(txr);
2286                 txr->queue_status = IGB_QUEUE_IDLE;
2287                 IGB_TX_UNLOCK(txr);
2288         }
2289
2290         e1000_reset_hw(&adapter->hw);
2291         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2292
2293         e1000_led_off(&adapter->hw);
2294         e1000_cleanup_led(&adapter->hw);
2295 }
2296
2297
2298 /*********************************************************************
2299  *
2300  *  Determine hardware revision.
2301  *
2302  **********************************************************************/
2303 static void
2304 igb_identify_hardware(struct adapter *adapter)
2305 {
2306         device_t dev = adapter->dev;
2307
2308         /* Make sure our PCI config space has the necessary stuff set */
2309         pci_enable_busmaster(dev);
2310         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2311
2312         /* Save off the information about this board */
2313         adapter->hw.vendor_id = pci_get_vendor(dev);
2314         adapter->hw.device_id = pci_get_device(dev);
2315         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2316         adapter->hw.subsystem_vendor_id =
2317             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2318         adapter->hw.subsystem_device_id =
2319             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2320
2321         /* Set MAC type early for PCI setup */
2322         e1000_set_mac_type(&adapter->hw);
2323
2324         /* Are we a VF device? */
2325         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2326             (adapter->hw.mac.type == e1000_vfadapt_i350))
2327                 adapter->vf_ifp = 1;
2328         else
2329                 adapter->vf_ifp = 0;
2330 }
2331
2332 static int
2333 igb_allocate_pci_resources(struct adapter *adapter)
2334 {
2335         device_t        dev = adapter->dev;
2336         int             rid;
2337
2338         rid = PCIR_BAR(0);
2339         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2340             &rid, RF_ACTIVE);
2341         if (adapter->pci_mem == NULL) {
2342                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2343                 return (ENXIO);
2344         }
2345         adapter->osdep.mem_bus_space_tag =
2346             rman_get_bustag(adapter->pci_mem);
2347         adapter->osdep.mem_bus_space_handle =
2348             rman_get_bushandle(adapter->pci_mem);
2349         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2350
2351         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2352
2353         /* This will setup either MSI/X or MSI */
2354         adapter->msix = igb_setup_msix(adapter);
2355         adapter->hw.back = &adapter->osdep;
2356
2357         return (0);
2358 }
2359
2360 /*********************************************************************
2361  *
2362  *  Setup the Legacy or MSI Interrupt handler
2363  *
2364  **********************************************************************/
2365 static int
2366 igb_allocate_legacy(struct adapter *adapter)
2367 {
2368         device_t                dev = adapter->dev;
2369         struct igb_queue        *que = adapter->queues;
2370 #ifndef IGB_LEGACY_TX
2371         struct tx_ring          *txr = adapter->tx_rings;
2372 #endif
2373         int                     error, rid = 0;
2374
2375         /* Turn off all interrupts */
2376         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2377
2378         /* MSI RID is 1 */
2379         if (adapter->msix == 1)
2380                 rid = 1;
2381
2382         /* We allocate a single interrupt resource */
2383         adapter->res = bus_alloc_resource_any(dev,
2384             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2385         if (adapter->res == NULL) {
2386                 device_printf(dev, "Unable to allocate bus resource: "
2387                     "interrupt\n");
2388                 return (ENXIO);
2389         }
2390
2391 #ifndef IGB_LEGACY_TX
2392         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2393 #endif
2394
2395         /*
2396          * Try allocating a fast interrupt and the associated deferred
2397          * processing contexts.
2398          */
2399         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2400         /* Make tasklet for deferred link handling */
2401         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2402         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2403             taskqueue_thread_enqueue, &que->tq);
2404         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2405             device_get_nameunit(adapter->dev));
2406         if ((error = bus_setup_intr(dev, adapter->res,
2407             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2408             adapter, &adapter->tag)) != 0) {
2409                 device_printf(dev, "Failed to register fast interrupt "
2410                             "handler: %d\n", error);
2411                 taskqueue_free(que->tq);
2412                 que->tq = NULL;
2413                 return (error);
2414         }
2415
2416         return (0);
2417 }
2418
2419
2420 /*********************************************************************
2421  *
2422  *  Setup the MSIX Queue Interrupt handlers: 
2423  *
2424  **********************************************************************/
2425 static int
2426 igb_allocate_msix(struct adapter *adapter)
2427 {
2428         device_t                dev = adapter->dev;
2429         struct igb_queue        *que = adapter->queues;
2430         int                     error, rid, vector = 0;
2431         int                     cpu_id = 0;
2432 #ifdef  RSS
2433         cpuset_t cpu_mask;
2434 #endif
2435
2436         /* Be sure to start with all interrupts disabled */
2437         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2438         E1000_WRITE_FLUSH(&adapter->hw);
2439
2440 #ifdef  RSS
2441         /*
2442          * If we're doing RSS, the number of queues needs to
2443          * match the number of RSS buckets that are configured.
2444          *
2445          * + If there's more queues than RSS buckets, we'll end
2446          *   up with queues that get no traffic.
2447          *
2448          * + If there's more RSS buckets than queues, we'll end
2449          *   up having multiple RSS buckets map to the same queue,
2450          *   so there'll be some contention.
2451          */
2452         if (adapter->num_queues != rss_getnumbuckets()) {
2453                 device_printf(dev,
2454                     "%s: number of queues (%d) != number of RSS buckets (%d)"
2455                     "; performance will be impacted.\n",
2456                     __func__,
2457                     adapter->num_queues,
2458                     rss_getnumbuckets());
2459         }
2460 #endif
2461
2462         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2463                 rid = vector +1;
2464                 que->res = bus_alloc_resource_any(dev,
2465                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2466                 if (que->res == NULL) {
2467                         device_printf(dev,
2468                             "Unable to allocate bus resource: "
2469                             "MSIX Queue Interrupt\n");
2470                         return (ENXIO);
2471                 }
2472                 error = bus_setup_intr(dev, que->res,
2473                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2474                     igb_msix_que, que, &que->tag);
2475                 if (error) {
2476                         que->res = NULL;
2477                         device_printf(dev, "Failed to register Queue handler");
2478                         return (error);
2479                 }
2480 #if __FreeBSD_version >= 800504
2481                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2482 #endif
2483                 que->msix = vector;
2484                 if (adapter->hw.mac.type == e1000_82575)
2485                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2486                 else
2487                         que->eims = 1 << vector;
2488
2489 #ifdef  RSS
2490                 /*
2491                  * The queue ID is used as the RSS layer bucket ID.
2492                  * We look up the queue ID -> RSS CPU ID and select
2493                  * that.
2494                  */
2495                 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2496 #else
2497                 /*
2498                  * Bind the msix vector, and thus the
2499                  * rings to the corresponding cpu.
2500                  *
2501                  * This just happens to match the default RSS round-robin
2502                  * bucket -> queue -> CPU allocation.
2503                  */
2504                 if (adapter->num_queues > 1) {
2505                         if (igb_last_bind_cpu < 0)
2506                                 igb_last_bind_cpu = CPU_FIRST();
2507                         cpu_id = igb_last_bind_cpu;
2508                 }
2509 #endif
2510
2511                 if (adapter->num_queues > 1) {
2512                         bus_bind_intr(dev, que->res, cpu_id);
2513 #ifdef  RSS
2514                         device_printf(dev,
2515                                 "Bound queue %d to RSS bucket %d\n",
2516                                 i, cpu_id);
2517 #else
2518                         device_printf(dev,
2519                                 "Bound queue %d to cpu %d\n",
2520                                 i, cpu_id);
2521 #endif
2522                 }
2523
2524 #ifndef IGB_LEGACY_TX
2525                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2526                     que->txr);
2527 #endif
2528                 /* Make tasklet for deferred handling */
2529                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2530                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2531                     taskqueue_thread_enqueue, &que->tq);
2532                 if (adapter->num_queues > 1) {
2533                         /*
2534                          * Only pin the taskqueue thread to a CPU if
2535                          * RSS is in use.
2536                          *
2537                          * This again just happens to match the default RSS
2538                          * round-robin bucket -> queue -> CPU allocation.
2539                          */
2540 #ifdef  RSS
2541                         CPU_SETOF(cpu_id, &cpu_mask);
2542                         taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2543                             &cpu_mask,
2544                             "%s que (bucket %d)",
2545                             device_get_nameunit(adapter->dev),
2546                             cpu_id);
2547 #else
2548                         taskqueue_start_threads(&que->tq, 1, PI_NET,
2549                             "%s que (qid %d)",
2550                             device_get_nameunit(adapter->dev),
2551                             cpu_id);
2552 #endif
2553                 } else {
2554                         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2555                             device_get_nameunit(adapter->dev));
2556                 }
2557
2558                 /* Finally update the last bound CPU id */
2559                 if (adapter->num_queues > 1)
2560                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2561         }
2562
2563         /* And Link */
2564         rid = vector + 1;
2565         adapter->res = bus_alloc_resource_any(dev,
2566             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2567         if (adapter->res == NULL) {
2568                 device_printf(dev,
2569                     "Unable to allocate bus resource: "
2570                     "MSIX Link Interrupt\n");
2571                 return (ENXIO);
2572         }
2573         if ((error = bus_setup_intr(dev, adapter->res,
2574             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2575             igb_msix_link, adapter, &adapter->tag)) != 0) {
2576                 device_printf(dev, "Failed to register Link handler");
2577                 return (error);
2578         }
2579 #if __FreeBSD_version >= 800504
2580         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2581 #endif
2582         adapter->linkvec = vector;
2583
2584         return (0);
2585 }
2586
2587
2588 static void
2589 igb_configure_queues(struct adapter *adapter)
2590 {
2591         struct  e1000_hw        *hw = &adapter->hw;
2592         struct  igb_queue       *que;
2593         u32                     tmp, ivar = 0, newitr = 0;
2594
2595         /* First turn on RSS capability */
2596         if (adapter->hw.mac.type != e1000_82575)
2597                 E1000_WRITE_REG(hw, E1000_GPIE,
2598                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2599                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2600
2601         /* Turn on MSIX */
2602         switch (adapter->hw.mac.type) {
2603         case e1000_82580:
2604         case e1000_i350:
2605         case e1000_i354:
2606         case e1000_i210:
2607         case e1000_i211:
2608         case e1000_vfadapt:
2609         case e1000_vfadapt_i350:
2610                 /* RX entries */
2611                 for (int i = 0; i < adapter->num_queues; i++) {
2612                         u32 index = i >> 1;
2613                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2614                         que = &adapter->queues[i];
2615                         if (i & 1) {
2616                                 ivar &= 0xFF00FFFF;
2617                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2618                         } else {
2619                                 ivar &= 0xFFFFFF00;
2620                                 ivar |= que->msix | E1000_IVAR_VALID;
2621                         }
2622                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2623                 }
2624                 /* TX entries */
2625                 for (int i = 0; i < adapter->num_queues; i++) {
2626                         u32 index = i >> 1;
2627                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2628                         que = &adapter->queues[i];
2629                         if (i & 1) {
2630                                 ivar &= 0x00FFFFFF;
2631                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2632                         } else {
2633                                 ivar &= 0xFFFF00FF;
2634                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2635                         }
2636                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2637                         adapter->que_mask |= que->eims;
2638                 }
2639
2640                 /* And for the link interrupt */
2641                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2642                 adapter->link_mask = 1 << adapter->linkvec;
2643                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2644                 break;
2645         case e1000_82576:
2646                 /* RX entries */
2647                 for (int i = 0; i < adapter->num_queues; i++) {
2648                         u32 index = i & 0x7; /* Each IVAR has two entries */
2649                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2650                         que = &adapter->queues[i];
2651                         if (i < 8) {
2652                                 ivar &= 0xFFFFFF00;
2653                                 ivar |= que->msix | E1000_IVAR_VALID;
2654                         } else {
2655                                 ivar &= 0xFF00FFFF;
2656                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2657                         }
2658                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2659                         adapter->que_mask |= que->eims;
2660                 }
2661                 /* TX entries */
2662                 for (int i = 0; i < adapter->num_queues; i++) {
2663                         u32 index = i & 0x7; /* Each IVAR has two entries */
2664                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2665                         que = &adapter->queues[i];
2666                         if (i < 8) {
2667                                 ivar &= 0xFFFF00FF;
2668                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2669                         } else {
2670                                 ivar &= 0x00FFFFFF;
2671                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2672                         }
2673                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2674                         adapter->que_mask |= que->eims;
2675                 }
2676
2677                 /* And for the link interrupt */
2678                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2679                 adapter->link_mask = 1 << adapter->linkvec;
2680                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2681                 break;
2682
2683         case e1000_82575:
2684                 /* enable MSI-X support*/
2685                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2686                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2687                 /* Auto-Mask interrupts upon ICR read. */
2688                 tmp |= E1000_CTRL_EXT_EIAME;
2689                 tmp |= E1000_CTRL_EXT_IRCA;
2690                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2691
2692                 /* Queues */
2693                 for (int i = 0; i < adapter->num_queues; i++) {
2694                         que = &adapter->queues[i];
2695                         tmp = E1000_EICR_RX_QUEUE0 << i;
2696                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2697                         que->eims = tmp;
2698                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2699                             i, que->eims);
2700                         adapter->que_mask |= que->eims;
2701                 }
2702
2703                 /* Link */
2704                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2705                     E1000_EIMS_OTHER);
2706                 adapter->link_mask |= E1000_EIMS_OTHER;
2707         default:
2708                 break;
2709         }
2710
2711         /* Set the starting interrupt rate */
2712         if (igb_max_interrupt_rate > 0)
2713                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2714
2715         if (hw->mac.type == e1000_82575)
2716                 newitr |= newitr << 16;
2717         else
2718                 newitr |= E1000_EITR_CNT_IGNR;
2719
2720         for (int i = 0; i < adapter->num_queues; i++) {
2721                 que = &adapter->queues[i];
2722                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2723         }
2724
2725         return;
2726 }
2727
2728
2729 static void
2730 igb_free_pci_resources(struct adapter *adapter)
2731 {
2732         struct          igb_queue *que = adapter->queues;
2733         device_t        dev = adapter->dev;
2734         int             rid;
2735
2736         /*
2737         ** There is a slight possibility of a failure mode
2738         ** in attach that will result in entering this function
2739         ** before interrupt resources have been initialized, and
2740         ** in that case we do not want to execute the loops below
2741         ** We can detect this reliably by the state of the adapter
2742         ** res pointer.
2743         */
2744         if (adapter->res == NULL)
2745                 goto mem;
2746
2747         /*
2748          * First release all the interrupt resources:
2749          */
2750         for (int i = 0; i < adapter->num_queues; i++, que++) {
2751                 rid = que->msix + 1;
2752                 if (que->tag != NULL) {
2753                         bus_teardown_intr(dev, que->res, que->tag);
2754                         que->tag = NULL;
2755                 }
2756                 if (que->res != NULL)
2757                         bus_release_resource(dev,
2758                             SYS_RES_IRQ, rid, que->res);
2759         }
2760
2761         /* Clean the Legacy or Link interrupt last */
2762         if (adapter->linkvec) /* we are doing MSIX */
2763                 rid = adapter->linkvec + 1;
2764         else
2765                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2766
2767         que = adapter->queues;
2768         if (adapter->tag != NULL) {
2769                 taskqueue_drain(que->tq, &adapter->link_task);
2770                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2771                 adapter->tag = NULL;
2772         }
2773         if (adapter->res != NULL)
2774                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2775
2776         for (int i = 0; i < adapter->num_queues; i++, que++) {
2777                 if (que->tq != NULL) {
2778 #ifndef IGB_LEGACY_TX
2779                         taskqueue_drain(que->tq, &que->txr->txq_task);
2780 #endif
2781                         taskqueue_drain(que->tq, &que->que_task);
2782                         taskqueue_free(que->tq);
2783                 }
2784         }
2785 mem:
2786         if (adapter->msix)
2787                 pci_release_msi(dev);
2788
2789         if (adapter->msix_mem != NULL)
2790                 bus_release_resource(dev, SYS_RES_MEMORY,
2791                     adapter->memrid, adapter->msix_mem);
2792
2793         if (adapter->pci_mem != NULL)
2794                 bus_release_resource(dev, SYS_RES_MEMORY,
2795                     PCIR_BAR(0), adapter->pci_mem);
2796
2797 }
2798
2799 /*
2800  * Setup Either MSI/X or MSI
2801  */
2802 static int
2803 igb_setup_msix(struct adapter *adapter)
2804 {
2805         device_t        dev = adapter->dev;
2806         int             bar, want, queues, msgs, maxqueues;
2807
2808         /* tuneable override */
2809         if (igb_enable_msix == 0)
2810                 goto msi;
2811
2812         /* First try MSI/X */
2813         msgs = pci_msix_count(dev); 
2814         if (msgs == 0)
2815                 goto msi;
2816         /*
2817         ** Some new devices, as with ixgbe, now may
2818         ** use a different BAR, so we need to keep
2819         ** track of which is used.
2820         */
2821         adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2822         bar = pci_read_config(dev, adapter->memrid, 4);
2823         if (bar == 0) /* use next bar */
2824                 adapter->memrid += 4;
2825         adapter->msix_mem = bus_alloc_resource_any(dev,
2826             SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2827         if (adapter->msix_mem == NULL) {
2828                 /* May not be enabled */
2829                 device_printf(adapter->dev,
2830                     "Unable to map MSIX table \n");
2831                 goto msi;
2832         }
2833
2834         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2835
2836         /* Override via tuneable */
2837         if (igb_num_queues != 0)
2838                 queues = igb_num_queues;
2839
2840 #ifdef  RSS
2841         /* If we're doing RSS, clamp at the number of RSS buckets */
2842         if (queues > rss_getnumbuckets())
2843                 queues = rss_getnumbuckets();
2844 #endif
2845
2846
2847         /* Sanity check based on HW */
2848         switch (adapter->hw.mac.type) {
2849                 case e1000_82575:
2850                         maxqueues = 4;
2851                         break;
2852                 case e1000_82576:
2853                 case e1000_82580:
2854                 case e1000_i350:
2855                 case e1000_i354:
2856                         maxqueues = 8;
2857                         break;
2858                 case e1000_i210:
2859                         maxqueues = 4;
2860                         break;
2861                 case e1000_i211:
2862                         maxqueues = 2;
2863                         break;
2864                 default:  /* VF interfaces */
2865                         maxqueues = 1;
2866                         break;
2867         }
2868
2869         /* Final clamp on the actual hardware capability */
2870         if (queues > maxqueues)
2871                 queues = maxqueues;
2872
2873         /*
2874         ** One vector (RX/TX pair) per queue
2875         ** plus an additional for Link interrupt
2876         */
2877         want = queues + 1;
2878         if (msgs >= want)
2879                 msgs = want;
2880         else {
2881                 device_printf(adapter->dev,
2882                     "MSIX Configuration Problem, "
2883                     "%d vectors configured, but %d queues wanted!\n",
2884                     msgs, want);
2885                 goto msi;
2886         }
2887         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2888                 device_printf(adapter->dev,
2889                     "Using MSIX interrupts with %d vectors\n", msgs);
2890                 adapter->num_queues = queues;
2891                 return (msgs);
2892         }
2893         /*
2894         ** If MSIX alloc failed or provided us with
2895         ** less than needed, free and fall through to MSI
2896         */
2897         pci_release_msi(dev);
2898
2899 msi:
2900         if (adapter->msix_mem != NULL) {
2901                 bus_release_resource(dev, SYS_RES_MEMORY,
2902                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2903                 adapter->msix_mem = NULL;
2904         }
2905         msgs = 1;
2906         if (pci_alloc_msi(dev, &msgs) == 0) {
2907                 device_printf(adapter->dev," Using an MSI interrupt\n");
2908                 return (msgs);
2909         }
2910         device_printf(adapter->dev," Using a Legacy interrupt\n");
2911         return (0);
2912 }
2913
2914 /*********************************************************************
2915  *
2916  *  Initialize the DMA Coalescing feature
2917  *
2918  **********************************************************************/
2919 static void
2920 igb_init_dmac(struct adapter *adapter, u32 pba)
2921 {
2922         device_t        dev = adapter->dev;
2923         struct e1000_hw *hw = &adapter->hw;
2924         u32             dmac, reg = ~E1000_DMACR_DMAC_EN;
2925         u16             hwm;
2926
2927         if (hw->mac.type == e1000_i211)
2928                 return;
2929
2930         if (hw->mac.type > e1000_82580) {
2931
2932                 if (adapter->dmac == 0) { /* Disabling it */
2933                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2934                         return;
2935                 } else
2936                         device_printf(dev, "DMA Coalescing enabled\n");
2937
2938                 /* Set starting threshold */
2939                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2940
2941                 hwm = 64 * pba - adapter->max_frame_size / 16;
2942                 if (hwm < 64 * (pba - 6))
2943                         hwm = 64 * (pba - 6);
2944                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2945                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2946                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2947                     & E1000_FCRTC_RTH_COAL_MASK);
2948                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2949
2950
2951                 dmac = pba - adapter->max_frame_size / 512;
2952                 if (dmac < pba - 10)
2953                         dmac = pba - 10;
2954                 reg = E1000_READ_REG(hw, E1000_DMACR);
2955                 reg &= ~E1000_DMACR_DMACTHR_MASK;
2956                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2957                     & E1000_DMACR_DMACTHR_MASK);
2958
2959                 /* transition to L0x or L1 if available..*/
2960                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2961
2962                 /* Check if status is 2.5Gb backplane connection
2963                 * before configuration of watchdog timer, which is
2964                 * in msec values in 12.8usec intervals
2965                 * watchdog timer= msec values in 32usec intervals
2966                 * for non 2.5Gb connection
2967                 */
2968                 if (hw->mac.type == e1000_i354) {
2969                         int status = E1000_READ_REG(hw, E1000_STATUS);
2970                         if ((status & E1000_STATUS_2P5_SKU) &&
2971                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2972                                 reg |= ((adapter->dmac * 5) >> 6);
2973                         else
2974                                 reg |= (adapter->dmac >> 5);
2975                 } else {
2976                         reg |= (adapter->dmac >> 5);
2977                 }
2978
2979                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2980
2981                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2982
2983                 /* Set the interval before transition */
2984                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2985                 if (hw->mac.type == e1000_i350)
2986                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
2987                 /*
2988                 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2989                 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2990                 */
2991                 if (hw->mac.type == e1000_i354) {
2992                         int status = E1000_READ_REG(hw, E1000_STATUS);
2993                         if ((status & E1000_STATUS_2P5_SKU) &&
2994                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2995                                 reg |= 0xA;
2996                         else
2997                                 reg |= 0x4;
2998                 } else {
2999                         reg |= 0x4;
3000                 }
3001
3002                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3003
3004                 /* free space in tx packet buffer to wake from DMA coal */
3005                 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
3006                     (2 * adapter->max_frame_size)) >> 6);
3007
3008                 /* make low power state decision controlled by DMA coal */
3009                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3010                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3011                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3012
3013         } else if (hw->mac.type == e1000_82580) {
3014                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3015                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3016                     reg & ~E1000_PCIEMISC_LX_DECISION);
3017                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3018         }
3019 }
3020
3021
3022 /*********************************************************************
3023  *
3024  *  Set up an fresh starting state
3025  *
3026  **********************************************************************/
3027 static void
3028 igb_reset(struct adapter *adapter)
3029 {
3030         device_t        dev = adapter->dev;
3031         struct e1000_hw *hw = &adapter->hw;
3032         struct e1000_fc_info *fc = &hw->fc;
3033         struct ifnet    *ifp = adapter->ifp;
3034         u32             pba = 0;
3035         u16             hwm;
3036
3037         INIT_DEBUGOUT("igb_reset: begin");
3038
3039         /* Let the firmware know the OS is in control */
3040         igb_get_hw_control(adapter);
3041
3042         /*
3043          * Packet Buffer Allocation (PBA)
3044          * Writing PBA sets the receive portion of the buffer
3045          * the remainder is used for the transmit buffer.
3046          */
3047         switch (hw->mac.type) {
3048         case e1000_82575:
3049                 pba = E1000_PBA_32K;
3050                 break;
3051         case e1000_82576:
3052         case e1000_vfadapt:
3053                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3054                 pba &= E1000_RXPBS_SIZE_MASK_82576;
3055                 break;
3056         case e1000_82580:
3057         case e1000_i350:
3058         case e1000_i354:
3059         case e1000_vfadapt_i350:
3060                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3061                 pba = e1000_rxpbs_adjust_82580(pba);
3062                 break;
3063         case e1000_i210:
3064         case e1000_i211:
3065                 pba = E1000_PBA_34K;
3066         default:
3067                 break;
3068         }
3069
3070         /* Special needs in case of Jumbo frames */
3071         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3072                 u32 tx_space, min_tx, min_rx;
3073                 pba = E1000_READ_REG(hw, E1000_PBA);
3074                 tx_space = pba >> 16;
3075                 pba &= 0xffff;
3076                 min_tx = (adapter->max_frame_size +
3077                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3078                 min_tx = roundup2(min_tx, 1024);
3079                 min_tx >>= 10;
3080                 min_rx = adapter->max_frame_size;
3081                 min_rx = roundup2(min_rx, 1024);
3082                 min_rx >>= 10;
3083                 if (tx_space < min_tx &&
3084                     ((min_tx - tx_space) < pba)) {
3085                         pba = pba - (min_tx - tx_space);
3086                         /*
3087                          * if short on rx space, rx wins
3088                          * and must trump tx adjustment
3089                          */
3090                         if (pba < min_rx)
3091                                 pba = min_rx;
3092                 }
3093                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3094         }
3095
3096         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3097
3098         /*
3099          * These parameters control the automatic generation (Tx) and
3100          * response (Rx) to Ethernet PAUSE frames.
3101          * - High water mark should allow for at least two frames to be
3102          *   received after sending an XOFF.
3103          * - Low water mark works best when it is very near the high water mark.
3104          *   This allows the receiver to restart by sending XON when it has
3105          *   drained a bit.
3106          */
3107         hwm = min(((pba << 10) * 9 / 10),
3108             ((pba << 10) - 2 * adapter->max_frame_size));
3109
3110         if (hw->mac.type < e1000_82576) {
3111                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3112                 fc->low_water = fc->high_water - 8;
3113         } else {
3114                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3115                 fc->low_water = fc->high_water - 16;
3116         }
3117
3118         fc->pause_time = IGB_FC_PAUSE_TIME;
3119         fc->send_xon = TRUE;
3120         if (adapter->fc)
3121                 fc->requested_mode = adapter->fc;
3122         else
3123                 fc->requested_mode = e1000_fc_default;
3124
3125         /* Issue a global reset */
3126         e1000_reset_hw(hw);
3127         E1000_WRITE_REG(hw, E1000_WUC, 0);
3128
3129         /* Reset for AutoMediaDetect */
3130         if (adapter->flags & IGB_MEDIA_RESET) {
3131                 e1000_setup_init_funcs(hw, TRUE);
3132                 e1000_get_bus_info(hw);
3133                 adapter->flags &= ~IGB_MEDIA_RESET;
3134         }
3135
3136         if (e1000_init_hw(hw) < 0)
3137                 device_printf(dev, "Hardware Initialization Failed\n");
3138
3139         /* Setup DMA Coalescing */
3140         igb_init_dmac(adapter, pba);
3141
3142         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3143         e1000_get_phy_info(hw);
3144         e1000_check_for_link(hw);
3145         return;
3146 }
3147
3148 /*********************************************************************
3149  *
3150  *  Setup networking device structure and register an interface.
3151  *
3152  **********************************************************************/
3153 static int
3154 igb_setup_interface(device_t dev, struct adapter *adapter)
3155 {
3156         struct ifnet   *ifp;
3157
3158         INIT_DEBUGOUT("igb_setup_interface: begin");
3159
3160         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3161         if (ifp == NULL) {
3162                 device_printf(dev, "can not allocate ifnet structure\n");
3163                 return (-1);
3164         }
3165         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3166         ifp->if_init =  igb_init;
3167         ifp->if_softc = adapter;
3168         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3169         ifp->if_ioctl = igb_ioctl;
3170         ifp->if_get_counter = igb_get_counter;
3171
3172         /* TSO parameters */
3173         ifp->if_hw_tsomax = IP_MAXPACKET;
3174         ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3175         ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3176
3177 #ifndef IGB_LEGACY_TX
3178         ifp->if_transmit = igb_mq_start;
3179         ifp->if_qflush = igb_qflush;
3180 #else
3181         ifp->if_start = igb_start;
3182         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3183         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3184         IFQ_SET_READY(&ifp->if_snd);
3185 #endif
3186
3187         ether_ifattach(ifp, adapter->hw.mac.addr);
3188
3189         ifp->if_capabilities = ifp->if_capenable = 0;
3190
3191         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3192 #if __FreeBSD_version >= 1000000
3193         ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
3194 #endif
3195         ifp->if_capabilities |= IFCAP_TSO;
3196         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3197         ifp->if_capenable = ifp->if_capabilities;
3198
3199         /* Don't enable LRO by default */
3200         ifp->if_capabilities |= IFCAP_LRO;
3201
3202 #ifdef DEVICE_POLLING
3203         ifp->if_capabilities |= IFCAP_POLLING;
3204 #endif
3205
3206         /*
3207          * Tell the upper layer(s) we
3208          * support full VLAN capability.
3209          */
3210         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3211         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3212                              |  IFCAP_VLAN_HWTSO
3213                              |  IFCAP_VLAN_MTU;
3214         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3215                           |  IFCAP_VLAN_HWTSO
3216                           |  IFCAP_VLAN_MTU;
3217
3218         /*
3219         ** Don't turn this on by default, if vlans are
3220         ** created on another pseudo device (eg. lagg)
3221         ** then vlan events are not passed thru, breaking
3222         ** operation, but with HW FILTER off it works. If
3223         ** using vlans directly on the igb driver you can
3224         ** enable this and get full hardware tag filtering.
3225         */
3226         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3227
3228         /*
3229          * Specify the media types supported by this adapter and register
3230          * callbacks to update media and link information
3231          */
3232         ifmedia_init(&adapter->media, IFM_IMASK,
3233             igb_media_change, igb_media_status);
3234         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3235             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3236                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3237                             0, NULL);
3238                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3239         } else {
3240                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3241                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3242                             0, NULL);
3243                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3244                             0, NULL);
3245                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3246                             0, NULL);
3247                 if (adapter->hw.phy.type != e1000_phy_ife) {
3248                         ifmedia_add(&adapter->media,
3249                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3250                         ifmedia_add(&adapter->media,
3251                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3252                 }
3253         }
3254         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3255         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3256         return (0);
3257 }
3258
3259
3260 /*
3261  * Manage DMA'able memory.
3262  */
3263 static void
3264 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3265 {
3266         if (error)
3267                 return;
3268         *(bus_addr_t *) arg = segs[0].ds_addr;
3269 }
3270
3271 static int
3272 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3273         struct igb_dma_alloc *dma, int mapflags)
3274 {
3275         int error;
3276
3277         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3278                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3279                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3280                                 BUS_SPACE_MAXADDR,      /* highaddr */
3281                                 NULL, NULL,             /* filter, filterarg */
3282                                 size,                   /* maxsize */
3283                                 1,                      /* nsegments */
3284                                 size,                   /* maxsegsize */
3285                                 0,                      /* flags */
3286                                 NULL,                   /* lockfunc */
3287                                 NULL,                   /* lockarg */
3288                                 &dma->dma_tag);
3289         if (error) {
3290                 device_printf(adapter->dev,
3291                     "%s: bus_dma_tag_create failed: %d\n",
3292                     __func__, error);
3293                 goto fail_0;
3294         }
3295
3296         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3297             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3298         if (error) {
3299                 device_printf(adapter->dev,
3300                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3301                     __func__, (uintmax_t)size, error);
3302                 goto fail_2;
3303         }
3304
3305         dma->dma_paddr = 0;
3306         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3307             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3308         if (error || dma->dma_paddr == 0) {
3309                 device_printf(adapter->dev,
3310                     "%s: bus_dmamap_load failed: %d\n",
3311                     __func__, error);
3312                 goto fail_3;
3313         }
3314
3315         return (0);
3316
3317 fail_3:
3318         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3319 fail_2:
3320         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3321         bus_dma_tag_destroy(dma->dma_tag);
3322 fail_0:
3323         dma->dma_tag = NULL;
3324
3325         return (error);
3326 }
3327
3328 static void
3329 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3330 {
3331         if (dma->dma_tag == NULL)
3332                 return;
3333         if (dma->dma_paddr != 0) {
3334                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3335                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3336                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3337                 dma->dma_paddr = 0;
3338         }
3339         if (dma->dma_vaddr != NULL) {
3340                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3341                 dma->dma_vaddr = NULL;
3342         }
3343         bus_dma_tag_destroy(dma->dma_tag);
3344         dma->dma_tag = NULL;
3345 }
3346
3347
3348 /*********************************************************************
3349  *
3350  *  Allocate memory for the transmit and receive rings, and then
3351  *  the descriptors associated with each, called only once at attach.
3352  *
3353  **********************************************************************/
3354 static int
3355 igb_allocate_queues(struct adapter *adapter)
3356 {
3357         device_t dev = adapter->dev;
3358         struct igb_queue        *que = NULL;
3359         struct tx_ring          *txr = NULL;
3360         struct rx_ring          *rxr = NULL;
3361         int rsize, tsize, error = E1000_SUCCESS;
3362         int txconf = 0, rxconf = 0;
3363
3364         /* First allocate the top level queue structs */
3365         if (!(adapter->queues =
3366             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3367             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3368                 device_printf(dev, "Unable to allocate queue memory\n");
3369                 error = ENOMEM;
3370                 goto fail;
3371         }
3372
3373         /* Next allocate the TX ring struct memory */
3374         if (!(adapter->tx_rings =
3375             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3376             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3377                 device_printf(dev, "Unable to allocate TX ring memory\n");
3378                 error = ENOMEM;
3379                 goto tx_fail;
3380         }
3381
3382         /* Now allocate the RX */
3383         if (!(adapter->rx_rings =
3384             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3385             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3386                 device_printf(dev, "Unable to allocate RX ring memory\n");
3387                 error = ENOMEM;
3388                 goto rx_fail;
3389         }
3390
3391         tsize = roundup2(adapter->num_tx_desc *
3392             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3393         /*
3394          * Now set up the TX queues, txconf is needed to handle the
3395          * possibility that things fail midcourse and we need to
3396          * undo memory gracefully
3397          */ 
3398         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3399                 /* Set up some basics */
3400                 txr = &adapter->tx_rings[i];
3401                 txr->adapter = adapter;
3402                 txr->me = i;
3403                 txr->num_desc = adapter->num_tx_desc;
3404
3405                 /* Initialize the TX lock */
3406                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3407                     device_get_nameunit(dev), txr->me);
3408                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3409
3410                 if (igb_dma_malloc(adapter, tsize,
3411                         &txr->txdma, BUS_DMA_NOWAIT)) {
3412                         device_printf(dev,
3413                             "Unable to allocate TX Descriptor memory\n");
3414                         error = ENOMEM;
3415                         goto err_tx_desc;
3416                 }
3417                 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3418                 bzero((void *)txr->tx_base, tsize);
3419
3420                 /* Now allocate transmit buffers for the ring */
3421                 if (igb_allocate_transmit_buffers(txr)) {
3422                         device_printf(dev,
3423                             "Critical Failure setting up transmit buffers\n");
3424                         error = ENOMEM;
3425                         goto err_tx_desc;
3426                 }
3427 #ifndef IGB_LEGACY_TX
3428                 /* Allocate a buf ring */
3429                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3430                     M_WAITOK, &txr->tx_mtx);
3431 #endif
3432         }
3433
3434         /*
3435          * Next the RX queues...
3436          */ 
3437         rsize = roundup2(adapter->num_rx_desc *
3438             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3439         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3440                 rxr = &adapter->rx_rings[i];
3441                 rxr->adapter = adapter;
3442                 rxr->me = i;
3443
3444                 /* Initialize the RX lock */
3445                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3446                     device_get_nameunit(dev), txr->me);
3447                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3448
3449                 if (igb_dma_malloc(adapter, rsize,
3450                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3451                         device_printf(dev,
3452                             "Unable to allocate RxDescriptor memory\n");
3453                         error = ENOMEM;
3454                         goto err_rx_desc;
3455                 }
3456                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3457                 bzero((void *)rxr->rx_base, rsize);
3458
3459                 /* Allocate receive buffers for the ring*/
3460                 if (igb_allocate_receive_buffers(rxr)) {
3461                         device_printf(dev,
3462                             "Critical Failure setting up receive buffers\n");
3463                         error = ENOMEM;
3464                         goto err_rx_desc;
3465                 }
3466         }
3467
3468         /*
3469         ** Finally set up the queue holding structs
3470         */
3471         for (int i = 0; i < adapter->num_queues; i++) {
3472                 que = &adapter->queues[i];
3473                 que->adapter = adapter;
3474                 que->txr = &adapter->tx_rings[i];
3475                 que->rxr = &adapter->rx_rings[i];
3476         }
3477
3478         return (0);
3479
3480 err_rx_desc:
3481         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3482                 igb_dma_free(adapter, &rxr->rxdma);
3483 err_tx_desc:
3484         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3485                 igb_dma_free(adapter, &txr->txdma);
3486         free(adapter->rx_rings, M_DEVBUF);
3487 rx_fail:
3488 #ifndef IGB_LEGACY_TX
3489         buf_ring_free(txr->br, M_DEVBUF);
3490 #endif
3491         free(adapter->tx_rings, M_DEVBUF);
3492 tx_fail:
3493         free(adapter->queues, M_DEVBUF);
3494 fail:
3495         return (error);
3496 }
3497
3498 /*********************************************************************
3499  *
3500  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3501  *  the information needed to transmit a packet on the wire. This is
3502  *  called only once at attach, setup is done every reset.
3503  *
3504  **********************************************************************/
3505 static int
3506 igb_allocate_transmit_buffers(struct tx_ring *txr)
3507 {
3508         struct adapter *adapter = txr->adapter;
3509         device_t dev = adapter->dev;
3510         struct igb_tx_buf *txbuf;
3511         int error, i;
3512
3513         /*
3514          * Setup DMA descriptor areas.
3515          */
3516         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3517                                1, 0,                    /* alignment, bounds */
3518                                BUS_SPACE_MAXADDR,       /* lowaddr */
3519                                BUS_SPACE_MAXADDR,       /* highaddr */
3520                                NULL, NULL,              /* filter, filterarg */
3521                                IGB_TSO_SIZE,            /* maxsize */
3522                                IGB_MAX_SCATTER,         /* nsegments */
3523                                PAGE_SIZE,               /* maxsegsize */
3524                                0,                       /* flags */
3525                                NULL,                    /* lockfunc */
3526                                NULL,                    /* lockfuncarg */
3527                                &txr->txtag))) {
3528                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3529                 goto fail;
3530         }
3531
3532         if (!(txr->tx_buffers =
3533             (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3534             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3535                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3536                 error = ENOMEM;
3537                 goto fail;
3538         }
3539
3540         /* Create the descriptor buffer dma maps */
3541         txbuf = txr->tx_buffers;
3542         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3543                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3544                 if (error != 0) {
3545                         device_printf(dev, "Unable to create TX DMA map\n");
3546                         goto fail;
3547                 }
3548         }
3549
3550         return 0;
3551 fail:
3552         /* We free all, it handles case where we are in the middle */
3553         igb_free_transmit_structures(adapter);
3554         return (error);
3555 }
3556
3557 /*********************************************************************
3558  *
3559  *  Initialize a transmit ring.
3560  *
3561  **********************************************************************/
3562 static void
3563 igb_setup_transmit_ring(struct tx_ring *txr)
3564 {
3565         struct adapter *adapter = txr->adapter;
3566         struct igb_tx_buf *txbuf;
3567         int i;
3568 #ifdef DEV_NETMAP
3569         struct netmap_adapter *na = NA(adapter->ifp);
3570         struct netmap_slot *slot;
3571 #endif /* DEV_NETMAP */
3572
3573         /* Clear the old descriptor contents */
3574         IGB_TX_LOCK(txr);
3575 #ifdef DEV_NETMAP
3576         slot = netmap_reset(na, NR_TX, txr->me, 0);
3577 #endif /* DEV_NETMAP */
3578         bzero((void *)txr->tx_base,
3579               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3580         /* Reset indices */
3581         txr->next_avail_desc = 0;
3582         txr->next_to_clean = 0;
3583
3584         /* Free any existing tx buffers. */
3585         txbuf = txr->tx_buffers;
3586         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3587                 if (txbuf->m_head != NULL) {
3588                         bus_dmamap_sync(txr->txtag, txbuf->map,
3589                             BUS_DMASYNC_POSTWRITE);
3590                         bus_dmamap_unload(txr->txtag, txbuf->map);
3591                         m_freem(txbuf->m_head);
3592                         txbuf->m_head = NULL;
3593                 }
3594 #ifdef DEV_NETMAP
3595                 if (slot) {
3596                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3597                         /* no need to set the address */
3598                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3599                 }
3600 #endif /* DEV_NETMAP */
3601                 /* clear the watch index */
3602                 txbuf->eop = NULL;
3603         }
3604
3605         /* Set number of descriptors available */
3606         txr->tx_avail = adapter->num_tx_desc;
3607
3608         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3609             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3610         IGB_TX_UNLOCK(txr);
3611 }
3612
3613 /*********************************************************************
3614  *
3615  *  Initialize all transmit rings.
3616  *
3617  **********************************************************************/
3618 static void
3619 igb_setup_transmit_structures(struct adapter *adapter)
3620 {
3621         struct tx_ring *txr = adapter->tx_rings;
3622
3623         for (int i = 0; i < adapter->num_queues; i++, txr++)
3624                 igb_setup_transmit_ring(txr);
3625
3626         return;
3627 }
3628
3629 /*********************************************************************
3630  *
3631  *  Enable transmit unit.
3632  *
3633  **********************************************************************/
3634 static void
3635 igb_initialize_transmit_units(struct adapter *adapter)
3636 {
3637         struct tx_ring  *txr = adapter->tx_rings;
3638         struct e1000_hw *hw = &adapter->hw;
3639         u32             tctl, txdctl;
3640
3641         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3642         tctl = txdctl = 0;
3643
3644         /* Setup the Tx Descriptor Rings */
3645         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3646                 u64 bus_addr = txr->txdma.dma_paddr;
3647
3648                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3649                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3650                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3651                     (uint32_t)(bus_addr >> 32));
3652                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3653                     (uint32_t)bus_addr);
3654
3655                 /* Setup the HW Tx Head and Tail descriptor pointers */
3656                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3657                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3658
3659                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3660                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3661                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3662
3663                 txr->queue_status = IGB_QUEUE_IDLE;
3664
3665                 txdctl |= IGB_TX_PTHRESH;
3666                 txdctl |= IGB_TX_HTHRESH << 8;
3667                 txdctl |= IGB_TX_WTHRESH << 16;
3668                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3669                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3670         }
3671
3672         if (adapter->vf_ifp)
3673                 return;
3674
3675         e1000_config_collision_dist(hw);
3676
3677         /* Program the Transmit Control Register */
3678         tctl = E1000_READ_REG(hw, E1000_TCTL);
3679         tctl &= ~E1000_TCTL_CT;
3680         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3681                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3682
3683         /* This write will effectively turn on the transmit unit. */
3684         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3685 }
3686
3687 /*********************************************************************
3688  *
3689  *  Free all transmit rings.
3690  *
3691  **********************************************************************/
3692 static void
3693 igb_free_transmit_structures(struct adapter *adapter)
3694 {
3695         struct tx_ring *txr = adapter->tx_rings;
3696
3697         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3698                 IGB_TX_LOCK(txr);
3699                 igb_free_transmit_buffers(txr);
3700                 igb_dma_free(adapter, &txr->txdma);
3701                 IGB_TX_UNLOCK(txr);
3702                 IGB_TX_LOCK_DESTROY(txr);
3703         }
3704         free(adapter->tx_rings, M_DEVBUF);
3705 }
3706
3707 /*********************************************************************
3708  *
3709  *  Free transmit ring related data structures.
3710  *
3711  **********************************************************************/
3712 static void
3713 igb_free_transmit_buffers(struct tx_ring *txr)
3714 {
3715         struct adapter *adapter = txr->adapter;
3716         struct igb_tx_buf *tx_buffer;
3717         int             i;
3718
3719         INIT_DEBUGOUT("free_transmit_ring: begin");
3720
3721         if (txr->tx_buffers == NULL)
3722                 return;
3723
3724         tx_buffer = txr->tx_buffers;
3725         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3726                 if (tx_buffer->m_head != NULL) {
3727                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3728                             BUS_DMASYNC_POSTWRITE);
3729                         bus_dmamap_unload(txr->txtag,
3730                             tx_buffer->map);
3731                         m_freem(tx_buffer->m_head);
3732                         tx_buffer->m_head = NULL;
3733                         if (tx_buffer->map != NULL) {
3734                                 bus_dmamap_destroy(txr->txtag,
3735                                     tx_buffer->map);
3736                                 tx_buffer->map = NULL;
3737                         }
3738                 } else if (tx_buffer->map != NULL) {
3739                         bus_dmamap_unload(txr->txtag,
3740                             tx_buffer->map);
3741                         bus_dmamap_destroy(txr->txtag,
3742                             tx_buffer->map);
3743                         tx_buffer->map = NULL;
3744                 }
3745         }
3746 #ifndef IGB_LEGACY_TX
3747         if (txr->br != NULL)
3748                 buf_ring_free(txr->br, M_DEVBUF);
3749 #endif
3750         if (txr->tx_buffers != NULL) {
3751                 free(txr->tx_buffers, M_DEVBUF);
3752                 txr->tx_buffers = NULL;
3753         }
3754         if (txr->txtag != NULL) {
3755                 bus_dma_tag_destroy(txr->txtag);
3756                 txr->txtag = NULL;
3757         }
3758         return;
3759 }
3760
3761 /**********************************************************************
3762  *
3763  *  Setup work for hardware segmentation offload (TSO) on
3764  *  adapters using advanced tx descriptors
3765  *
3766  **********************************************************************/
3767 static int
3768 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3769     u32 *cmd_type_len, u32 *olinfo_status)
3770 {
3771         struct adapter *adapter = txr->adapter;
3772         struct e1000_adv_tx_context_desc *TXD;
3773         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3774         u32 mss_l4len_idx = 0, paylen;
3775         u16 vtag = 0, eh_type;
3776         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3777         struct ether_vlan_header *eh;
3778 #ifdef INET6
3779         struct ip6_hdr *ip6;
3780 #endif
3781 #ifdef INET
3782         struct ip *ip;
3783 #endif
3784         struct tcphdr *th;
3785
3786
3787         /*
3788          * Determine where frame payload starts.
3789          * Jump over vlan headers if already present
3790          */
3791         eh = mtod(mp, struct ether_vlan_header *);
3792         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3793                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3794                 eh_type = eh->evl_proto;
3795         } else {
3796                 ehdrlen = ETHER_HDR_LEN;
3797                 eh_type = eh->evl_encap_proto;
3798         }
3799
3800         switch (ntohs(eh_type)) {
3801 #ifdef INET6
3802         case ETHERTYPE_IPV6:
3803                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3804                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3805                 if (ip6->ip6_nxt != IPPROTO_TCP)
3806                         return (ENXIO);
3807                 ip_hlen = sizeof(struct ip6_hdr);
3808                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3809                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3810                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3811                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3812                 break;
3813 #endif
3814 #ifdef INET
3815         case ETHERTYPE_IP:
3816                 ip = (struct ip *)(mp->m_data + ehdrlen);
3817                 if (ip->ip_p != IPPROTO_TCP)
3818                         return (ENXIO);
3819                 ip->ip_sum = 0;
3820                 ip_hlen = ip->ip_hl << 2;
3821                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3822                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3823                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3824                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3825                 /* Tell transmit desc to also do IPv4 checksum. */
3826                 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3827                 break;
3828 #endif
3829         default:
3830                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3831                     __func__, ntohs(eh_type));
3832                 break;
3833         }
3834
3835         ctxd = txr->next_avail_desc;
3836         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3837
3838         tcp_hlen = th->th_off << 2;
3839
3840         /* This is used in the transmit desc in encap */
3841         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3842
3843         /* VLAN MACLEN IPLEN */
3844         if (mp->m_flags & M_VLANTAG) {
3845                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3846                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3847         }
3848
3849         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3850         vlan_macip_lens |= ip_hlen;
3851         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3852
3853         /* ADV DTYPE TUCMD */
3854         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3855         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3856         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3857
3858         /* MSS L4LEN IDX */
3859         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3860         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3861         /* 82575 needs the queue index added */
3862         if (adapter->hw.mac.type == e1000_82575)
3863                 mss_l4len_idx |= txr->me << 4;
3864         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3865
3866         TXD->seqnum_seed = htole32(0);
3867
3868         if (++ctxd == txr->num_desc)
3869                 ctxd = 0;
3870
3871         txr->tx_avail--;
3872         txr->next_avail_desc = ctxd;
3873         *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3874         *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3875         *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3876         ++txr->tso_tx;
3877         return (0);
3878 }
3879
3880 /*********************************************************************
3881  *
3882  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3883  *
3884  **********************************************************************/
3885
3886 static int
3887 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3888     u32 *cmd_type_len, u32 *olinfo_status)
3889 {
3890         struct e1000_adv_tx_context_desc *TXD;
3891         struct adapter *adapter = txr->adapter;
3892         struct ether_vlan_header *eh;
3893         struct ip *ip;
3894         struct ip6_hdr *ip6;
3895         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3896         int     ehdrlen, ip_hlen = 0;
3897         u16     etype;
3898         u8      ipproto = 0;
3899         int     offload = TRUE;
3900         int     ctxd = txr->next_avail_desc;
3901         u16     vtag = 0;
3902
3903         /* First check if TSO is to be used */
3904         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3905                 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3906
3907         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3908                 offload = FALSE;
3909
3910         /* Indicate the whole packet as payload when not doing TSO */
3911         *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3912
3913         /* Now ready a context descriptor */
3914         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3915
3916         /*
3917         ** In advanced descriptors the vlan tag must 
3918         ** be placed into the context descriptor. Hence
3919         ** we need to make one even if not doing offloads.
3920         */
3921         if (mp->m_flags & M_VLANTAG) {
3922                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3923                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3924         } else if (offload == FALSE) /* ... no offload to do */
3925                 return (0);
3926
3927         /*
3928          * Determine where frame payload starts.
3929          * Jump over vlan headers if already present,
3930          * helpful for QinQ too.
3931          */
3932         eh = mtod(mp, struct ether_vlan_header *);
3933         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3934                 etype = ntohs(eh->evl_proto);
3935                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3936         } else {
3937                 etype = ntohs(eh->evl_encap_proto);
3938                 ehdrlen = ETHER_HDR_LEN;
3939         }
3940
3941         /* Set the ether header length */
3942         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3943
3944         switch (etype) {
3945                 case ETHERTYPE_IP:
3946                         ip = (struct ip *)(mp->m_data + ehdrlen);
3947                         ip_hlen = ip->ip_hl << 2;
3948                         ipproto = ip->ip_p;
3949                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3950                         break;
3951                 case ETHERTYPE_IPV6:
3952                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3953                         ip_hlen = sizeof(struct ip6_hdr);
3954                         /* XXX-BZ this will go badly in case of ext hdrs. */
3955                         ipproto = ip6->ip6_nxt;
3956                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3957                         break;
3958                 default:
3959                         offload = FALSE;
3960                         break;
3961         }
3962
3963         vlan_macip_lens |= ip_hlen;
3964         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3965
3966         switch (ipproto) {
3967                 case IPPROTO_TCP:
3968 #if __FreeBSD_version >= 1000000
3969                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
3970 #else
3971                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3972 #endif
3973                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3974                         break;
3975                 case IPPROTO_UDP:
3976 #if __FreeBSD_version >= 1000000
3977                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
3978 #else
3979                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3980 #endif
3981                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3982                         break;
3983
3984 #if __FreeBSD_version >= 800000
3985                 case IPPROTO_SCTP:
3986 #if __FreeBSD_version >= 1000000
3987                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
3988 #else
3989                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3990 #endif
3991                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3992                         break;
3993 #endif
3994                 default:
3995                         offload = FALSE;
3996                         break;
3997         }
3998
3999         if (offload) /* For the TX descriptor setup */
4000                 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4001
4002         /* 82575 needs the queue index added */
4003         if (adapter->hw.mac.type == e1000_82575)
4004                 mss_l4len_idx = txr->me << 4;
4005
4006         /* Now copy bits into descriptor */
4007         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
4008         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
4009         TXD->seqnum_seed = htole32(0);
4010         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
4011
4012         /* We've consumed the first desc, adjust counters */
4013         if (++ctxd == txr->num_desc)
4014                 ctxd = 0;
4015         txr->next_avail_desc = ctxd;
4016         --txr->tx_avail;
4017
4018         return (0);
4019 }
4020
4021 /**********************************************************************
4022  *
4023  *  Examine each tx_buffer in the used queue. If the hardware is done
4024  *  processing the packet then free associated resources. The
4025  *  tx_buffer is put back on the free queue.
4026  *
4027  *  TRUE return means there's work in the ring to clean, FALSE its empty.
4028  **********************************************************************/
4029 static bool
4030 igb_txeof(struct tx_ring *txr)
4031 {
4032         struct adapter          *adapter = txr->adapter;
4033 #ifdef DEV_NETMAP
4034         struct ifnet            *ifp = adapter->ifp;
4035 #endif /* DEV_NETMAP */
4036         u32                     work, processed = 0;
4037         int                     limit = adapter->tx_process_limit;
4038         struct igb_tx_buf       *buf;
4039         union e1000_adv_tx_desc *txd;
4040
4041         mtx_assert(&txr->tx_mtx, MA_OWNED);
4042
4043 #ifdef DEV_NETMAP
4044         if (netmap_tx_irq(ifp, txr->me))
4045                 return (FALSE);
4046 #endif /* DEV_NETMAP */
4047
4048         if (txr->tx_avail == txr->num_desc) {
4049                 txr->queue_status = IGB_QUEUE_IDLE;
4050                 return FALSE;
4051         }
4052
4053         /* Get work starting point */
4054         work = txr->next_to_clean;
4055         buf = &txr->tx_buffers[work];
4056         txd = &txr->tx_base[work];
4057         work -= txr->num_desc; /* The distance to ring end */
4058         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4059             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4060         do {
4061                 union e1000_adv_tx_desc *eop = buf->eop;
4062                 if (eop == NULL) /* No work */
4063                         break;
4064
4065                 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4066                         break;  /* I/O not complete */
4067
4068                 if (buf->m_head) {
4069                         txr->bytes +=
4070                             buf->m_head->m_pkthdr.len;
4071                         bus_dmamap_sync(txr->txtag,
4072                             buf->map,
4073                             BUS_DMASYNC_POSTWRITE);
4074                         bus_dmamap_unload(txr->txtag,
4075                             buf->map);
4076                         m_freem(buf->m_head);
4077                         buf->m_head = NULL;
4078                 }
4079                 buf->eop = NULL;
4080                 ++txr->tx_avail;
4081
4082                 /* We clean the range if multi segment */
4083                 while (txd != eop) {
4084                         ++txd;
4085                         ++buf;
4086                         ++work;
4087                         /* wrap the ring? */
4088                         if (__predict_false(!work)) {
4089                                 work -= txr->num_desc;
4090                                 buf = txr->tx_buffers;
4091                                 txd = txr->tx_base;
4092                         }
4093                         if (buf->m_head) {
4094                                 txr->bytes +=
4095                                     buf->m_head->m_pkthdr.len;
4096                                 bus_dmamap_sync(txr->txtag,
4097                                     buf->map,
4098                                     BUS_DMASYNC_POSTWRITE);
4099                                 bus_dmamap_unload(txr->txtag,
4100                                     buf->map);
4101                                 m_freem(buf->m_head);
4102                                 buf->m_head = NULL;
4103                         }
4104                         ++txr->tx_avail;
4105                         buf->eop = NULL;
4106
4107                 }
4108                 ++txr->packets;
4109                 ++processed;
4110                 txr->watchdog_time = ticks;
4111
4112                 /* Try the next packet */
4113                 ++txd;
4114                 ++buf;
4115                 ++work;
4116                 /* reset with a wrap */
4117                 if (__predict_false(!work)) {
4118                         work -= txr->num_desc;
4119                         buf = txr->tx_buffers;
4120                         txd = txr->tx_base;
4121                 }
4122                 prefetch(txd);
4123         } while (__predict_true(--limit));
4124
4125         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4126             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4127
4128         work += txr->num_desc;
4129         txr->next_to_clean = work;
4130
4131         /*
4132         ** Watchdog calculation, we know there's
4133         ** work outstanding or the first return
4134         ** would have been taken, so none processed
4135         ** for too long indicates a hang.
4136         */
4137         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4138                 txr->queue_status |= IGB_QUEUE_HUNG;
4139
4140         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4141                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;       
4142
4143         if (txr->tx_avail == txr->num_desc) {
4144                 txr->queue_status = IGB_QUEUE_IDLE;
4145                 return (FALSE);
4146         }
4147
4148         return (TRUE);
4149 }
4150
4151 /*********************************************************************
4152  *
4153  *  Refresh mbuf buffers for RX descriptor rings
4154  *   - now keeps its own state so discards due to resource
4155  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4156  *     it just returns, keeping its placeholder, thus it can simply
4157  *     be recalled to try again.
4158  *
4159  **********************************************************************/
4160 static void
4161 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4162 {
4163         struct adapter          *adapter = rxr->adapter;
4164         bus_dma_segment_t       hseg[1];
4165         bus_dma_segment_t       pseg[1];
4166         struct igb_rx_buf       *rxbuf;
4167         struct mbuf             *mh, *mp;
4168         int                     i, j, nsegs, error;
4169         bool                    refreshed = FALSE;
4170
4171         i = j = rxr->next_to_refresh;
4172         /*
4173         ** Get one descriptor beyond
4174         ** our work mark to control
4175         ** the loop.
4176         */
4177         if (++j == adapter->num_rx_desc)
4178                 j = 0;
4179
4180         while (j != limit) {
4181                 rxbuf = &rxr->rx_buffers[i];
4182                 /* No hdr mbuf used with header split off */
4183                 if (rxr->hdr_split == FALSE)
4184                         goto no_split;
4185                 if (rxbuf->m_head == NULL) {
4186                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4187                         if (mh == NULL)
4188                                 goto update;
4189                 } else
4190                         mh = rxbuf->m_head;
4191
4192                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4193                 mh->m_len = MHLEN;
4194                 mh->m_flags |= M_PKTHDR;
4195                 /* Get the memory mapping */
4196                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4197                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4198                 if (error != 0) {
4199                         printf("Refresh mbufs: hdr dmamap load"
4200                             " failure - %d\n", error);
4201                         m_free(mh);
4202                         rxbuf->m_head = NULL;
4203                         goto update;
4204                 }
4205                 rxbuf->m_head = mh;
4206                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4207                     BUS_DMASYNC_PREREAD);
4208                 rxr->rx_base[i].read.hdr_addr =
4209                     htole64(hseg[0].ds_addr);
4210 no_split:
4211                 if (rxbuf->m_pack == NULL) {
4212                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4213                             M_PKTHDR, adapter->rx_mbuf_sz);
4214                         if (mp == NULL)
4215                                 goto update;
4216                 } else
4217                         mp = rxbuf->m_pack;
4218
4219                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4220                 /* Get the memory mapping */
4221                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4222                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4223                 if (error != 0) {
4224                         printf("Refresh mbufs: payload dmamap load"
4225                             " failure - %d\n", error);
4226                         m_free(mp);
4227                         rxbuf->m_pack = NULL;
4228                         goto update;
4229                 }
4230                 rxbuf->m_pack = mp;
4231                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4232                     BUS_DMASYNC_PREREAD);
4233                 rxr->rx_base[i].read.pkt_addr =
4234                     htole64(pseg[0].ds_addr);
4235                 refreshed = TRUE; /* I feel wefreshed :) */
4236
4237                 i = j; /* our next is precalculated */
4238                 rxr->next_to_refresh = i;
4239                 if (++j == adapter->num_rx_desc)
4240                         j = 0;
4241         }
4242 update:
4243         if (refreshed) /* update tail */
4244                 E1000_WRITE_REG(&adapter->hw,
4245                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4246         return;
4247 }
4248
4249
4250 /*********************************************************************
4251  *
4252  *  Allocate memory for rx_buffer structures. Since we use one
4253  *  rx_buffer per received packet, the maximum number of rx_buffer's
4254  *  that we'll need is equal to the number of receive descriptors
4255  *  that we've allocated.
4256  *
4257  **********************************************************************/
4258 static int
4259 igb_allocate_receive_buffers(struct rx_ring *rxr)
4260 {
4261         struct  adapter         *adapter = rxr->adapter;
4262         device_t                dev = adapter->dev;
4263         struct igb_rx_buf       *rxbuf;
4264         int                     i, bsize, error;
4265
4266         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4267         if (!(rxr->rx_buffers =
4268             (struct igb_rx_buf *) malloc(bsize,
4269             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4270                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4271                 error = ENOMEM;
4272                 goto fail;
4273         }
4274
4275         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4276                                    1, 0,                /* alignment, bounds */
4277                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4278                                    BUS_SPACE_MAXADDR,   /* highaddr */
4279                                    NULL, NULL,          /* filter, filterarg */
4280                                    MSIZE,               /* maxsize */
4281                                    1,                   /* nsegments */
4282                                    MSIZE,               /* maxsegsize */
4283                                    0,                   /* flags */
4284                                    NULL,                /* lockfunc */
4285                                    NULL,                /* lockfuncarg */
4286                                    &rxr->htag))) {
4287                 device_printf(dev, "Unable to create RX DMA tag\n");
4288                 goto fail;
4289         }
4290
4291         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4292                                    1, 0,                /* alignment, bounds */
4293                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4294                                    BUS_SPACE_MAXADDR,   /* highaddr */
4295                                    NULL, NULL,          /* filter, filterarg */
4296                                    MJUM9BYTES,          /* maxsize */
4297                                    1,                   /* nsegments */
4298                                    MJUM9BYTES,          /* maxsegsize */
4299                                    0,                   /* flags */
4300                                    NULL,                /* lockfunc */
4301                                    NULL,                /* lockfuncarg */
4302                                    &rxr->ptag))) {
4303                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4304                 goto fail;
4305         }
4306
4307         for (i = 0; i < adapter->num_rx_desc; i++) {
4308                 rxbuf = &rxr->rx_buffers[i];
4309                 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4310                 if (error) {
4311                         device_printf(dev,
4312                             "Unable to create RX head DMA maps\n");
4313                         goto fail;
4314                 }
4315                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4316                 if (error) {
4317                         device_printf(dev,
4318                             "Unable to create RX packet DMA maps\n");
4319                         goto fail;
4320                 }
4321         }
4322
4323         return (0);
4324
4325 fail:
4326         /* Frees all, but can handle partial completion */
4327         igb_free_receive_structures(adapter);
4328         return (error);
4329 }
4330
4331
4332 static void
4333 igb_free_receive_ring(struct rx_ring *rxr)
4334 {
4335         struct  adapter         *adapter = rxr->adapter;
4336         struct igb_rx_buf       *rxbuf;
4337
4338
4339         for (int i = 0; i < adapter->num_rx_desc; i++) {
4340                 rxbuf = &rxr->rx_buffers[i];
4341                 if (rxbuf->m_head != NULL) {
4342                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4343                             BUS_DMASYNC_POSTREAD);
4344                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4345                         rxbuf->m_head->m_flags |= M_PKTHDR;
4346                         m_freem(rxbuf->m_head);
4347                 }
4348                 if (rxbuf->m_pack != NULL) {
4349                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4350                             BUS_DMASYNC_POSTREAD);
4351                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4352                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4353                         m_freem(rxbuf->m_pack);
4354                 }
4355                 rxbuf->m_head = NULL;
4356                 rxbuf->m_pack = NULL;
4357         }
4358 }
4359
4360
4361 /*********************************************************************
4362  *
4363  *  Initialize a receive ring and its buffers.
4364  *
4365  **********************************************************************/
4366 static int
4367 igb_setup_receive_ring(struct rx_ring *rxr)
4368 {
4369         struct  adapter         *adapter;
4370         struct  ifnet           *ifp;
4371         device_t                dev;
4372         struct igb_rx_buf       *rxbuf;
4373         bus_dma_segment_t       pseg[1], hseg[1];
4374         struct lro_ctrl         *lro = &rxr->lro;
4375         int                     rsize, nsegs, error = 0;
4376 #ifdef DEV_NETMAP
4377         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4378         struct netmap_slot *slot;
4379 #endif /* DEV_NETMAP */
4380
4381         adapter = rxr->adapter;
4382         dev = adapter->dev;
4383         ifp = adapter->ifp;
4384
4385         /* Clear the ring contents */
4386         IGB_RX_LOCK(rxr);
4387 #ifdef DEV_NETMAP
4388         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4389 #endif /* DEV_NETMAP */
4390         rsize = roundup2(adapter->num_rx_desc *
4391             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4392         bzero((void *)rxr->rx_base, rsize);
4393
4394         /*
4395         ** Free current RX buffer structures and their mbufs
4396         */
4397         igb_free_receive_ring(rxr);
4398
4399         /* Configure for header split? */
4400         if (igb_header_split)
4401                 rxr->hdr_split = TRUE;
4402
4403         /* Now replenish the ring mbufs */
4404         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4405                 struct mbuf     *mh, *mp;
4406
4407                 rxbuf = &rxr->rx_buffers[j];
4408 #ifdef DEV_NETMAP
4409                 if (slot) {
4410                         /* slot sj is mapped to the j-th NIC-ring entry */
4411                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4412                         uint64_t paddr;
4413                         void *addr;
4414
4415                         addr = PNMB(na, slot + sj, &paddr);
4416                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4417                         /* Update descriptor */
4418                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4419                         continue;
4420                 }
4421 #endif /* DEV_NETMAP */
4422                 if (rxr->hdr_split == FALSE)
4423                         goto skip_head;
4424
4425                 /* First the header */
4426                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4427                 if (rxbuf->m_head == NULL) {
4428                         error = ENOBUFS;
4429                         goto fail;
4430                 }
4431                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4432                 mh = rxbuf->m_head;
4433                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4434                 mh->m_flags |= M_PKTHDR;
4435                 /* Get the memory mapping */
4436                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4437                     rxbuf->hmap, rxbuf->m_head, hseg,
4438                     &nsegs, BUS_DMA_NOWAIT);
4439                 if (error != 0) /* Nothing elegant to do here */
4440                         goto fail;
4441                 bus_dmamap_sync(rxr->htag,
4442                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4443                 /* Update descriptor */
4444                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4445
4446 skip_head:
4447                 /* Now the payload cluster */
4448                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4449                     M_PKTHDR, adapter->rx_mbuf_sz);
4450                 if (rxbuf->m_pack == NULL) {
4451                         error = ENOBUFS;
4452                         goto fail;
4453                 }
4454                 mp = rxbuf->m_pack;
4455                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4456                 /* Get the memory mapping */
4457                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4458                     rxbuf->pmap, mp, pseg,
4459                     &nsegs, BUS_DMA_NOWAIT);
4460                 if (error != 0)
4461                         goto fail;
4462                 bus_dmamap_sync(rxr->ptag,
4463                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4464                 /* Update descriptor */
4465                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4466         }
4467
4468         /* Setup our descriptor indices */
4469         rxr->next_to_check = 0;
4470         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4471         rxr->lro_enabled = FALSE;
4472         rxr->rx_split_packets = 0;
4473         rxr->rx_bytes = 0;
4474
4475         rxr->fmp = NULL;
4476         rxr->lmp = NULL;
4477
4478         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4479             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4480
4481         /*
4482         ** Now set up the LRO interface, we
4483         ** also only do head split when LRO
4484         ** is enabled, since so often they
4485         ** are undesireable in similar setups.
4486         */
4487         if (ifp->if_capenable & IFCAP_LRO) {
4488                 error = tcp_lro_init(lro);
4489                 if (error) {
4490                         device_printf(dev, "LRO Initialization failed!\n");
4491                         goto fail;
4492                 }
4493                 INIT_DEBUGOUT("RX LRO Initialized\n");
4494                 rxr->lro_enabled = TRUE;
4495                 lro->ifp = adapter->ifp;
4496         }
4497
4498         IGB_RX_UNLOCK(rxr);
4499         return (0);
4500
4501 fail:
4502         igb_free_receive_ring(rxr);
4503         IGB_RX_UNLOCK(rxr);
4504         return (error);
4505 }
4506
4507
4508 /*********************************************************************
4509  *
4510  *  Initialize all receive rings.
4511  *
4512  **********************************************************************/
4513 static int
4514 igb_setup_receive_structures(struct adapter *adapter)
4515 {
4516         struct rx_ring *rxr = adapter->rx_rings;
4517         int i;
4518
4519         for (i = 0; i < adapter->num_queues; i++, rxr++)
4520                 if (igb_setup_receive_ring(rxr))
4521                         goto fail;
4522
4523         return (0);
4524 fail:
4525         /*
4526          * Free RX buffers allocated so far, we will only handle
4527          * the rings that completed, the failing case will have
4528          * cleaned up for itself. 'i' is the endpoint.
4529          */
4530         for (int j = 0; j < i; ++j) {
4531                 rxr = &adapter->rx_rings[j];
4532                 IGB_RX_LOCK(rxr);
4533                 igb_free_receive_ring(rxr);
4534                 IGB_RX_UNLOCK(rxr);
4535         }
4536
4537         return (ENOBUFS);
4538 }
4539
4540 /*
4541  * Initialise the RSS mapping for NICs that support multiple transmit/
4542  * receive rings.
4543  */
4544 static void
4545 igb_initialise_rss_mapping(struct adapter *adapter)
4546 {
4547         struct e1000_hw *hw = &adapter->hw;
4548         int i;
4549         int queue_id;
4550         u32 reta;
4551         u32 rss_key[10], mrqc, shift = 0;
4552
4553         /* XXX? */
4554         if (adapter->hw.mac.type == e1000_82575)
4555                 shift = 6;
4556
4557         /*
4558          * The redirection table controls which destination
4559          * queue each bucket redirects traffic to.
4560          * Each DWORD represents four queues, with the LSB
4561          * being the first queue in the DWORD.
4562          *
4563          * This just allocates buckets to queues using round-robin
4564          * allocation.
4565          *
4566          * NOTE: It Just Happens to line up with the default
4567          * RSS allocation method.
4568          */
4569
4570         /* Warning FM follows */
4571         reta = 0;
4572         for (i = 0; i < 128; i++) {
4573 #ifdef  RSS
4574                 queue_id = rss_get_indirection_to_bucket(i);
4575                 /*
4576                  * If we have more queues than buckets, we'll
4577                  * end up mapping buckets to a subset of the
4578                  * queues.
4579                  *
4580                  * If we have more buckets than queues, we'll
4581                  * end up instead assigning multiple buckets
4582                  * to queues.
4583                  *
4584                  * Both are suboptimal, but we need to handle
4585                  * the case so we don't go out of bounds
4586                  * indexing arrays and such.
4587                  */
4588                 queue_id = queue_id % adapter->num_queues;
4589 #else
4590                 queue_id = (i % adapter->num_queues);
4591 #endif
4592                 /* Adjust if required */
4593                 queue_id = queue_id << shift;
4594
4595                 /*
4596                  * The low 8 bits are for hash value (n+0);
4597                  * The next 8 bits are for hash value (n+1), etc.
4598                  */
4599                 reta = reta >> 8;
4600                 reta = reta | ( ((uint32_t) queue_id) << 24);
4601                 if ((i & 3) == 3) {
4602                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4603                         reta = 0;
4604                 }
4605         }
4606
4607         /* Now fill in hash table */
4608
4609         /*
4610          * MRQC: Multiple Receive Queues Command
4611          * Set queuing to RSS control, number depends on the device.
4612          */
4613         mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4614
4615 #ifdef  RSS
4616         /* XXX ew typecasting */
4617         rss_getkey((uint8_t *) &rss_key);
4618 #else
4619         arc4rand(&rss_key, sizeof(rss_key), 0);
4620 #endif
4621         for (i = 0; i < 10; i++)
4622                 E1000_WRITE_REG_ARRAY(hw,
4623                     E1000_RSSRK(0), i, rss_key[i]);
4624
4625         /*
4626          * Configure the RSS fields to hash upon.
4627          */
4628         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4629             E1000_MRQC_RSS_FIELD_IPV4_TCP);
4630         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4631             E1000_MRQC_RSS_FIELD_IPV6_TCP);
4632         mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4633             E1000_MRQC_RSS_FIELD_IPV6_UDP);
4634         mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4635             E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4636
4637         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4638 }
4639
4640 /*********************************************************************
4641  *
4642  *  Enable receive unit.
4643  *
4644  **********************************************************************/
4645 static void
4646 igb_initialize_receive_units(struct adapter *adapter)
4647 {
4648         struct rx_ring  *rxr = adapter->rx_rings;
4649         struct ifnet    *ifp = adapter->ifp;
4650         struct e1000_hw *hw = &adapter->hw;
4651         u32             rctl, rxcsum, psize, srrctl = 0;
4652
4653         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4654
4655         /*
4656          * Make sure receives are disabled while setting
4657          * up the descriptor ring
4658          */
4659         rctl = E1000_READ_REG(hw, E1000_RCTL);
4660         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4661
4662         /*
4663         ** Set up for header split
4664         */
4665         if (igb_header_split) {
4666                 /* Use a standard mbuf for the header */
4667                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4668                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4669         } else
4670                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4671
4672         /*
4673         ** Set up for jumbo frames
4674         */
4675         if (ifp->if_mtu > ETHERMTU) {
4676                 rctl |= E1000_RCTL_LPE;
4677                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4678                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4679                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4680                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4681                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4682                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4683                 }
4684                 /* Set maximum packet len */
4685                 psize = adapter->max_frame_size;
4686                 /* are we on a vlan? */
4687                 if (adapter->ifp->if_vlantrunk != NULL)
4688                         psize += VLAN_TAG_SIZE;
4689                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4690         } else {
4691                 rctl &= ~E1000_RCTL_LPE;
4692                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4693                 rctl |= E1000_RCTL_SZ_2048;
4694         }
4695
4696         /*
4697          * If TX flow control is disabled and there's >1 queue defined,
4698          * enable DROP.
4699          *
4700          * This drops frames rather than hanging the RX MAC for all queues.
4701          */
4702         if ((adapter->num_queues > 1) &&
4703             (adapter->fc == e1000_fc_none ||
4704              adapter->fc == e1000_fc_rx_pause)) {
4705                 srrctl |= E1000_SRRCTL_DROP_EN;
4706         }
4707
4708         /* Setup the Base and Length of the Rx Descriptor Rings */
4709         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4710                 u64 bus_addr = rxr->rxdma.dma_paddr;
4711                 u32 rxdctl;
4712
4713                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4714                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4715                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4716                     (uint32_t)(bus_addr >> 32));
4717                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4718                     (uint32_t)bus_addr);
4719                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4720                 /* Enable this Queue */
4721                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4722                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4723                 rxdctl &= 0xFFF00000;
4724                 rxdctl |= IGB_RX_PTHRESH;
4725                 rxdctl |= IGB_RX_HTHRESH << 8;
4726                 rxdctl |= IGB_RX_WTHRESH << 16;
4727                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4728         }
4729
4730         /*
4731         ** Setup for RX MultiQueue
4732         */
4733         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4734         if (adapter->num_queues >1) {
4735
4736                 /* rss setup */
4737                 igb_initialise_rss_mapping(adapter);
4738
4739                 /*
4740                 ** NOTE: Receive Full-Packet Checksum Offload 
4741                 ** is mutually exclusive with Multiqueue. However
4742                 ** this is not the same as TCP/IP checksums which
4743                 ** still work.
4744                 */
4745                 rxcsum |= E1000_RXCSUM_PCSD;
4746 #if __FreeBSD_version >= 800000
4747                 /* For SCTP Offload */
4748                 if ((hw->mac.type != e1000_82575) &&
4749                     (ifp->if_capenable & IFCAP_RXCSUM))
4750                         rxcsum |= E1000_RXCSUM_CRCOFL;
4751 #endif
4752         } else {
4753                 /* Non RSS setup */
4754                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4755                         rxcsum |= E1000_RXCSUM_IPPCSE;
4756 #if __FreeBSD_version >= 800000
4757                         if (adapter->hw.mac.type != e1000_82575)
4758                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4759 #endif
4760                 } else
4761                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4762         }
4763         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4764
4765         /* Setup the Receive Control Register */
4766         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4767         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4768                    E1000_RCTL_RDMTS_HALF |
4769                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4770         /* Strip CRC bytes. */
4771         rctl |= E1000_RCTL_SECRC;
4772         /* Make sure VLAN Filters are off */
4773         rctl &= ~E1000_RCTL_VFE;
4774         /* Don't store bad packets */
4775         rctl &= ~E1000_RCTL_SBP;
4776
4777         /* Enable Receives */
4778         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4779
4780         /*
4781          * Setup the HW Rx Head and Tail Descriptor Pointers
4782          *   - needs to be after enable
4783          */
4784         for (int i = 0; i < adapter->num_queues; i++) {
4785                 rxr = &adapter->rx_rings[i];
4786                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4787 #ifdef DEV_NETMAP
4788                 /*
4789                  * an init() while a netmap client is active must
4790                  * preserve the rx buffers passed to userspace.
4791                  * In this driver it means we adjust RDT to
4792                  * something different from next_to_refresh
4793                  * (which is not used in netmap mode).
4794                  */
4795                 if (ifp->if_capenable & IFCAP_NETMAP) {
4796                         struct netmap_adapter *na = NA(adapter->ifp);
4797                         struct netmap_kring *kring = &na->rx_rings[i];
4798                         int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4799
4800                         if (t >= adapter->num_rx_desc)
4801                                 t -= adapter->num_rx_desc;
4802                         else if (t < 0)
4803                                 t += adapter->num_rx_desc;
4804                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4805                 } else
4806 #endif /* DEV_NETMAP */
4807                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4808         }
4809         return;
4810 }
4811
4812 /*********************************************************************
4813  *
4814  *  Free receive rings.
4815  *
4816  **********************************************************************/
4817 static void
4818 igb_free_receive_structures(struct adapter *adapter)
4819 {
4820         struct rx_ring *rxr = adapter->rx_rings;
4821
4822         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4823                 struct lro_ctrl *lro = &rxr->lro;
4824                 igb_free_receive_buffers(rxr);
4825                 tcp_lro_free(lro);
4826                 igb_dma_free(adapter, &rxr->rxdma);
4827         }
4828
4829         free(adapter->rx_rings, M_DEVBUF);
4830 }
4831
4832 /*********************************************************************
4833  *
4834  *  Free receive ring data structures.
4835  *
4836  **********************************************************************/
4837 static void
4838 igb_free_receive_buffers(struct rx_ring *rxr)
4839 {
4840         struct adapter          *adapter = rxr->adapter;
4841         struct igb_rx_buf       *rxbuf;
4842         int i;
4843
4844         INIT_DEBUGOUT("free_receive_structures: begin");
4845
4846         /* Cleanup any existing buffers */
4847         if (rxr->rx_buffers != NULL) {
4848                 for (i = 0; i < adapter->num_rx_desc; i++) {
4849                         rxbuf = &rxr->rx_buffers[i];
4850                         if (rxbuf->m_head != NULL) {
4851                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4852                                     BUS_DMASYNC_POSTREAD);
4853                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4854                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4855                                 m_freem(rxbuf->m_head);
4856                         }
4857                         if (rxbuf->m_pack != NULL) {
4858                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4859                                     BUS_DMASYNC_POSTREAD);
4860                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4861                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4862                                 m_freem(rxbuf->m_pack);
4863                         }
4864                         rxbuf->m_head = NULL;
4865                         rxbuf->m_pack = NULL;
4866                         if (rxbuf->hmap != NULL) {
4867                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4868                                 rxbuf->hmap = NULL;
4869                         }
4870                         if (rxbuf->pmap != NULL) {
4871                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4872                                 rxbuf->pmap = NULL;
4873                         }
4874                 }
4875                 if (rxr->rx_buffers != NULL) {
4876                         free(rxr->rx_buffers, M_DEVBUF);
4877                         rxr->rx_buffers = NULL;
4878                 }
4879         }
4880
4881         if (rxr->htag != NULL) {
4882                 bus_dma_tag_destroy(rxr->htag);
4883                 rxr->htag = NULL;
4884         }
4885         if (rxr->ptag != NULL) {
4886                 bus_dma_tag_destroy(rxr->ptag);
4887                 rxr->ptag = NULL;
4888         }
4889 }
4890
4891 static __inline void
4892 igb_rx_discard(struct rx_ring *rxr, int i)
4893 {
4894         struct igb_rx_buf       *rbuf;
4895
4896         rbuf = &rxr->rx_buffers[i];
4897
4898         /* Partially received? Free the chain */
4899         if (rxr->fmp != NULL) {
4900                 rxr->fmp->m_flags |= M_PKTHDR;
4901                 m_freem(rxr->fmp);
4902                 rxr->fmp = NULL;
4903                 rxr->lmp = NULL;
4904         }
4905
4906         /*
4907         ** With advanced descriptors the writeback
4908         ** clobbers the buffer addrs, so its easier
4909         ** to just free the existing mbufs and take
4910         ** the normal refresh path to get new buffers
4911         ** and mapping.
4912         */
4913         if (rbuf->m_head) {
4914                 m_free(rbuf->m_head);
4915                 rbuf->m_head = NULL;
4916                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4917         }
4918
4919         if (rbuf->m_pack) {
4920                 m_free(rbuf->m_pack);
4921                 rbuf->m_pack = NULL;
4922                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4923         }
4924
4925         return;
4926 }
4927
4928 static __inline void
4929 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4930 {
4931
4932         /*
4933          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4934          * should be computed by hardware. Also it should not have VLAN tag in
4935          * ethernet header.
4936          */
4937         if (rxr->lro_enabled &&
4938             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4939             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4940             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4941             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4942             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4943             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4944                 /*
4945                  * Send to the stack if:
4946                  **  - LRO not enabled, or
4947                  **  - no LRO resources, or
4948                  **  - lro enqueue fails
4949                  */
4950                 if (rxr->lro.lro_cnt != 0)
4951                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4952                                 return;
4953         }
4954         IGB_RX_UNLOCK(rxr);
4955         (*ifp->if_input)(ifp, m);
4956         IGB_RX_LOCK(rxr);
4957 }
4958
4959 /*********************************************************************
4960  *
4961  *  This routine executes in interrupt context. It replenishes
4962  *  the mbufs in the descriptor and sends data which has been
4963  *  dma'ed into host memory to upper layer.
4964  *
4965  *  We loop at most count times if count is > 0, or until done if
4966  *  count < 0.
4967  *
4968  *  Return TRUE if more to clean, FALSE otherwise
4969  *********************************************************************/
4970 static bool
4971 igb_rxeof(struct igb_queue *que, int count, int *done)
4972 {
4973         struct adapter          *adapter = que->adapter;
4974         struct rx_ring          *rxr = que->rxr;
4975         struct ifnet            *ifp = adapter->ifp;
4976         struct lro_ctrl         *lro = &rxr->lro;
4977         struct lro_entry        *queued;
4978         int                     i, processed = 0, rxdone = 0;
4979         u32                     ptype, staterr = 0;
4980         union e1000_adv_rx_desc *cur;
4981
4982         IGB_RX_LOCK(rxr);
4983         /* Sync the ring. */
4984         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4985             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4986
4987 #ifdef DEV_NETMAP
4988         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4989                 IGB_RX_UNLOCK(rxr);
4990                 return (FALSE);
4991         }
4992 #endif /* DEV_NETMAP */
4993
4994         /* Main clean loop */
4995         for (i = rxr->next_to_check; count != 0;) {
4996                 struct mbuf             *sendmp, *mh, *mp;
4997                 struct igb_rx_buf       *rxbuf;
4998                 u16                     hlen, plen, hdr, vtag, pkt_info;
4999                 bool                    eop = FALSE;
5000  
5001                 cur = &rxr->rx_base[i];
5002                 staterr = le32toh(cur->wb.upper.status_error);
5003                 if ((staterr & E1000_RXD_STAT_DD) == 0)
5004                         break;
5005                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
5006                         break;
5007                 count--;
5008                 sendmp = mh = mp = NULL;
5009                 cur->wb.upper.status_error = 0;
5010                 rxbuf = &rxr->rx_buffers[i];
5011                 plen = le16toh(cur->wb.upper.length);
5012                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
5013                 if (((adapter->hw.mac.type == e1000_i350) ||
5014                     (adapter->hw.mac.type == e1000_i354)) &&
5015                     (staterr & E1000_RXDEXT_STATERR_LB))
5016                         vtag = be16toh(cur->wb.upper.vlan);
5017                 else
5018                         vtag = le16toh(cur->wb.upper.vlan);
5019                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
5020                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
5021                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
5022
5023                 /*
5024                  * Free the frame (all segments) if we're at EOP and
5025                  * it's an error.
5026                  *
5027                  * The datasheet states that EOP + status is only valid for
5028                  * the final segment in a multi-segment frame.
5029                  */
5030                 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
5031                         adapter->dropped_pkts++;
5032                         ++rxr->rx_discarded;
5033                         igb_rx_discard(rxr, i);
5034                         goto next_desc;
5035                 }
5036
5037                 /*
5038                 ** The way the hardware is configured to
5039                 ** split, it will ONLY use the header buffer
5040                 ** when header split is enabled, otherwise we
5041                 ** get normal behavior, ie, both header and
5042                 ** payload are DMA'd into the payload buffer.
5043                 **
5044                 ** The fmp test is to catch the case where a
5045                 ** packet spans multiple descriptors, in that
5046                 ** case only the first header is valid.
5047                 */
5048                 if (rxr->hdr_split && rxr->fmp == NULL) {
5049                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5050                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5051                             E1000_RXDADV_HDRBUFLEN_SHIFT;
5052                         if (hlen > IGB_HDR_BUF)
5053                                 hlen = IGB_HDR_BUF;
5054                         mh = rxr->rx_buffers[i].m_head;
5055                         mh->m_len = hlen;
5056                         /* clear buf pointer for refresh */
5057                         rxbuf->m_head = NULL;
5058                         /*
5059                         ** Get the payload length, this
5060                         ** could be zero if its a small
5061                         ** packet.
5062                         */
5063                         if (plen > 0) {
5064                                 mp = rxr->rx_buffers[i].m_pack;
5065                                 mp->m_len = plen;
5066                                 mh->m_next = mp;
5067                                 /* clear buf pointer */
5068                                 rxbuf->m_pack = NULL;
5069                                 rxr->rx_split_packets++;
5070                         }
5071                 } else {
5072                         /*
5073                         ** Either no header split, or a
5074                         ** secondary piece of a fragmented
5075                         ** split packet.
5076                         */
5077                         mh = rxr->rx_buffers[i].m_pack;
5078                         mh->m_len = plen;
5079                         /* clear buf info for refresh */
5080                         rxbuf->m_pack = NULL;
5081                 }
5082                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5083
5084                 ++processed; /* So we know when to refresh */
5085
5086                 /* Initial frame - setup */
5087                 if (rxr->fmp == NULL) {
5088                         mh->m_pkthdr.len = mh->m_len;
5089                         /* Save the head of the chain */
5090                         rxr->fmp = mh;
5091                         rxr->lmp = mh;
5092                         if (mp != NULL) {
5093                                 /* Add payload if split */
5094                                 mh->m_pkthdr.len += mp->m_len;
5095                                 rxr->lmp = mh->m_next;
5096                         }
5097                 } else {
5098                         /* Chain mbuf's together */
5099                         rxr->lmp->m_next = mh;
5100                         rxr->lmp = rxr->lmp->m_next;
5101                         rxr->fmp->m_pkthdr.len += mh->m_len;
5102                 }
5103
5104                 if (eop) {
5105                         rxr->fmp->m_pkthdr.rcvif = ifp;
5106                         rxr->rx_packets++;
5107                         /* capture data for AIM */
5108                         rxr->packets++;
5109                         rxr->bytes += rxr->fmp->m_pkthdr.len;
5110                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5111
5112                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5113                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
5114
5115                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5116                             (staterr & E1000_RXD_STAT_VP) != 0) {
5117                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5118                                 rxr->fmp->m_flags |= M_VLANTAG;
5119                         }
5120
5121                         /*
5122                          * In case of multiqueue, we have RXCSUM.PCSD bit set
5123                          * and never cleared. This means we have RSS hash
5124                          * available to be used.
5125                          */
5126                         if (adapter->num_queues > 1) {
5127                                 rxr->fmp->m_pkthdr.flowid = 
5128                                     le32toh(cur->wb.lower.hi_dword.rss);
5129                                 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5130                                         case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5131                                                 M_HASHTYPE_SET(rxr->fmp,
5132                                                     M_HASHTYPE_RSS_TCP_IPV4);
5133                                         break;
5134                                         case E1000_RXDADV_RSSTYPE_IPV4:
5135                                                 M_HASHTYPE_SET(rxr->fmp,
5136                                                     M_HASHTYPE_RSS_IPV4);
5137                                         break;
5138                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5139                                                 M_HASHTYPE_SET(rxr->fmp,
5140                                                     M_HASHTYPE_RSS_TCP_IPV6);
5141                                         break;
5142                                         case E1000_RXDADV_RSSTYPE_IPV6_EX:
5143                                                 M_HASHTYPE_SET(rxr->fmp,
5144                                                     M_HASHTYPE_RSS_IPV6_EX);
5145                                         break;
5146                                         case E1000_RXDADV_RSSTYPE_IPV6:
5147                                                 M_HASHTYPE_SET(rxr->fmp,
5148                                                     M_HASHTYPE_RSS_IPV6);
5149                                         break;
5150                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5151                                                 M_HASHTYPE_SET(rxr->fmp,
5152                                                     M_HASHTYPE_RSS_TCP_IPV6_EX);
5153                                         break;
5154                                         default:
5155                                                 /* XXX fallthrough */
5156                                                 M_HASHTYPE_SET(rxr->fmp,
5157                                                     M_HASHTYPE_OPAQUE);
5158                                 }
5159                         } else {
5160 #ifndef IGB_LEGACY_TX
5161                                 rxr->fmp->m_pkthdr.flowid = que->msix;
5162                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5163 #endif
5164                         }
5165                         sendmp = rxr->fmp;
5166                         /* Make sure to set M_PKTHDR. */
5167                         sendmp->m_flags |= M_PKTHDR;
5168                         rxr->fmp = NULL;
5169                         rxr->lmp = NULL;
5170                 }
5171
5172 next_desc:
5173                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5174                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5175
5176                 /* Advance our pointers to the next descriptor. */
5177                 if (++i == adapter->num_rx_desc)
5178                         i = 0;
5179                 /*
5180                 ** Send to the stack or LRO
5181                 */
5182                 if (sendmp != NULL) {
5183                         rxr->next_to_check = i;
5184                         igb_rx_input(rxr, ifp, sendmp, ptype);
5185                         i = rxr->next_to_check;
5186                         rxdone++;
5187                 }
5188
5189                 /* Every 8 descriptors we go to refresh mbufs */
5190                 if (processed == 8) {
5191                         igb_refresh_mbufs(rxr, i);
5192                         processed = 0;
5193                 }
5194         }
5195
5196         /* Catch any remainders */
5197         if (igb_rx_unrefreshed(rxr))
5198                 igb_refresh_mbufs(rxr, i);
5199
5200         rxr->next_to_check = i;
5201
5202         /*
5203          * Flush any outstanding LRO work
5204          */
5205         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5206                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5207                 tcp_lro_flush(lro, queued);
5208         }
5209
5210         if (done != NULL)
5211                 *done += rxdone;
5212
5213         IGB_RX_UNLOCK(rxr);
5214         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5215 }
5216
5217 /*********************************************************************
5218  *
5219  *  Verify that the hardware indicated that the checksum is valid.
5220  *  Inform the stack about the status of checksum so that stack
5221  *  doesn't spend time verifying the checksum.
5222  *
5223  *********************************************************************/
5224 static void
5225 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5226 {
5227         u16 status = (u16)staterr;
5228         u8  errors = (u8) (staterr >> 24);
5229         int sctp;
5230
5231         /* Ignore Checksum bit is set */
5232         if (status & E1000_RXD_STAT_IXSM) {
5233                 mp->m_pkthdr.csum_flags = 0;
5234                 return;
5235         }
5236
5237         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5238             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5239                 sctp = 1;
5240         else
5241                 sctp = 0;
5242         if (status & E1000_RXD_STAT_IPCS) {
5243                 /* Did it pass? */
5244                 if (!(errors & E1000_RXD_ERR_IPE)) {
5245                         /* IP Checksum Good */
5246                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5247                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5248                 } else
5249                         mp->m_pkthdr.csum_flags = 0;
5250         }
5251
5252         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5253                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5254 #if __FreeBSD_version >= 800000
5255                 if (sctp) /* reassign */
5256                         type = CSUM_SCTP_VALID;
5257 #endif
5258                 /* Did it pass? */
5259                 if (!(errors & E1000_RXD_ERR_TCPE)) {
5260                         mp->m_pkthdr.csum_flags |= type;
5261                         if (sctp == 0)
5262                                 mp->m_pkthdr.csum_data = htons(0xffff);
5263                 }
5264         }
5265         return;
5266 }
5267
5268 /*
5269  * This routine is run via an vlan
5270  * config EVENT
5271  */
5272 static void
5273 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5274 {
5275         struct adapter  *adapter = ifp->if_softc;
5276         u32             index, bit;
5277
5278         if (ifp->if_softc !=  arg)   /* Not our event */
5279                 return;
5280
5281         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5282                 return;
5283
5284         IGB_CORE_LOCK(adapter);
5285         index = (vtag >> 5) & 0x7F;
5286         bit = vtag & 0x1F;
5287         adapter->shadow_vfta[index] |= (1 << bit);
5288         ++adapter->num_vlans;
5289         /* Change hw filter setting */
5290         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5291                 igb_setup_vlan_hw_support(adapter);
5292         IGB_CORE_UNLOCK(adapter);
5293 }
5294
5295 /*
5296  * This routine is run via an vlan
5297  * unconfig EVENT
5298  */
5299 static void
5300 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5301 {
5302         struct adapter  *adapter = ifp->if_softc;
5303         u32             index, bit;
5304
5305         if (ifp->if_softc !=  arg)
5306                 return;
5307
5308         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5309                 return;
5310
5311         IGB_CORE_LOCK(adapter);
5312         index = (vtag >> 5) & 0x7F;
5313         bit = vtag & 0x1F;
5314         adapter->shadow_vfta[index] &= ~(1 << bit);
5315         --adapter->num_vlans;
5316         /* Change hw filter setting */
5317         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5318                 igb_setup_vlan_hw_support(adapter);
5319         IGB_CORE_UNLOCK(adapter);
5320 }
5321
5322 static void
5323 igb_setup_vlan_hw_support(struct adapter *adapter)
5324 {
5325         struct e1000_hw *hw = &adapter->hw;
5326         struct ifnet    *ifp = adapter->ifp;
5327         u32             reg;
5328
5329         if (adapter->vf_ifp) {
5330                 e1000_rlpml_set_vf(hw,
5331                     adapter->max_frame_size + VLAN_TAG_SIZE);
5332                 return;
5333         }
5334
5335         reg = E1000_READ_REG(hw, E1000_CTRL);
5336         reg |= E1000_CTRL_VME;
5337         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5338
5339         /* Enable the Filter Table */
5340         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5341                 reg = E1000_READ_REG(hw, E1000_RCTL);
5342                 reg &= ~E1000_RCTL_CFIEN;
5343                 reg |= E1000_RCTL_VFE;
5344                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5345         }
5346
5347         /* Update the frame size */
5348         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5349             adapter->max_frame_size + VLAN_TAG_SIZE);
5350
5351         /* Don't bother with table if no vlans */
5352         if ((adapter->num_vlans == 0) ||
5353             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5354                 return;
5355         /*
5356         ** A soft reset zero's out the VFTA, so
5357         ** we need to repopulate it now.
5358         */
5359         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5360                 if (adapter->shadow_vfta[i] != 0) {
5361                         if (adapter->vf_ifp)
5362                                 e1000_vfta_set_vf(hw,
5363                                     adapter->shadow_vfta[i], TRUE);
5364                         else
5365                                 e1000_write_vfta(hw,
5366                                     i, adapter->shadow_vfta[i]);
5367                 }
5368 }
5369
5370 static void
5371 igb_enable_intr(struct adapter *adapter)
5372 {
5373         /* With RSS set up what to auto clear */
5374         if (adapter->msix_mem) {
5375                 u32 mask = (adapter->que_mask | adapter->link_mask);
5376                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5377                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5378                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5379                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5380                     E1000_IMS_LSC);
5381         } else {
5382                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5383                     IMS_ENABLE_MASK);
5384         }
5385         E1000_WRITE_FLUSH(&adapter->hw);
5386
5387         return;
5388 }
5389
5390 static void
5391 igb_disable_intr(struct adapter *adapter)
5392 {
5393         if (adapter->msix_mem) {
5394                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5395                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5396         } 
5397         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5398         E1000_WRITE_FLUSH(&adapter->hw);
5399         return;
5400 }
5401
5402 /*
5403  * Bit of a misnomer, what this really means is
5404  * to enable OS management of the system... aka
5405  * to disable special hardware management features 
5406  */
5407 static void
5408 igb_init_manageability(struct adapter *adapter)
5409 {
5410         if (adapter->has_manage) {
5411                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5412                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5413
5414                 /* disable hardware interception of ARP */
5415                 manc &= ~(E1000_MANC_ARP_EN);
5416
5417                 /* enable receiving management packets to the host */
5418                 manc |= E1000_MANC_EN_MNG2HOST;
5419                 manc2h |= 1 << 5;  /* Mng Port 623 */
5420                 manc2h |= 1 << 6;  /* Mng Port 664 */
5421                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5422                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5423         }
5424 }
5425
5426 /*
5427  * Give control back to hardware management
5428  * controller if there is one.
5429  */
5430 static void
5431 igb_release_manageability(struct adapter *adapter)
5432 {
5433         if (adapter->has_manage) {
5434                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5435
5436                 /* re-enable hardware interception of ARP */
5437                 manc |= E1000_MANC_ARP_EN;
5438                 manc &= ~E1000_MANC_EN_MNG2HOST;
5439
5440                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5441         }
5442 }
5443
5444 /*
5445  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5446  * For ASF and Pass Through versions of f/w this means that
5447  * the driver is loaded. 
5448  *
5449  */
5450 static void
5451 igb_get_hw_control(struct adapter *adapter)
5452 {
5453         u32 ctrl_ext;
5454
5455         if (adapter->vf_ifp)
5456                 return;
5457
5458         /* Let firmware know the driver has taken over */
5459         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5460         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5461             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5462 }
5463
5464 /*
5465  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5466  * For ASF and Pass Through versions of f/w this means that the
5467  * driver is no longer loaded.
5468  *
5469  */
5470 static void
5471 igb_release_hw_control(struct adapter *adapter)
5472 {
5473         u32 ctrl_ext;
5474
5475         if (adapter->vf_ifp)
5476                 return;
5477
5478         /* Let firmware taken over control of h/w */
5479         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5480         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5481             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5482 }
5483
5484 static int
5485 igb_is_valid_ether_addr(uint8_t *addr)
5486 {
5487         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5488
5489         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5490                 return (FALSE);
5491         }
5492
5493         return (TRUE);
5494 }
5495
5496
5497 /*
5498  * Enable PCI Wake On Lan capability
5499  */
5500 static void
5501 igb_enable_wakeup(device_t dev)
5502 {
5503         u16     cap, status;
5504         u8      id;
5505
5506         /* First find the capabilities pointer*/
5507         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5508         /* Read the PM Capabilities */
5509         id = pci_read_config(dev, cap, 1);
5510         if (id != PCIY_PMG)     /* Something wrong */
5511                 return;
5512         /* OK, we have the power capabilities, so
5513            now get the status register */
5514         cap += PCIR_POWER_STATUS;
5515         status = pci_read_config(dev, cap, 2);
5516         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5517         pci_write_config(dev, cap, status, 2);
5518         return;
5519 }
5520
5521 static void
5522 igb_led_func(void *arg, int onoff)
5523 {
5524         struct adapter  *adapter = arg;
5525
5526         IGB_CORE_LOCK(adapter);
5527         if (onoff) {
5528                 e1000_setup_led(&adapter->hw);
5529                 e1000_led_on(&adapter->hw);
5530         } else {
5531                 e1000_led_off(&adapter->hw);
5532                 e1000_cleanup_led(&adapter->hw);
5533         }
5534         IGB_CORE_UNLOCK(adapter);
5535 }
5536
5537 static uint64_t
5538 igb_get_vf_counter(if_t ifp, ift_counter cnt)
5539 {
5540         struct adapter *adapter;
5541         struct e1000_vf_stats *stats;
5542 #ifndef IGB_LEGACY_TX
5543         struct tx_ring *txr;
5544         uint64_t rv;
5545 #endif
5546
5547         adapter = if_getsoftc(ifp);
5548         stats = (struct e1000_vf_stats *)adapter->stats;
5549
5550         switch (cnt) {
5551         case IFCOUNTER_IPACKETS:
5552                 return (stats->gprc);
5553         case IFCOUNTER_OPACKETS:
5554                 return (stats->gptc);
5555         case IFCOUNTER_IBYTES:
5556                 return (stats->gorc);
5557         case IFCOUNTER_OBYTES:
5558                 return (stats->gotc);
5559         case IFCOUNTER_IMCASTS:
5560                 return (stats->mprc);
5561         case IFCOUNTER_IERRORS:
5562                 return (adapter->dropped_pkts);
5563         case IFCOUNTER_OERRORS:
5564                 return (adapter->watchdog_events);
5565 #ifndef IGB_LEGACY_TX
5566         case IFCOUNTER_OQDROPS:
5567                 rv = 0;
5568                 txr = adapter->tx_rings;
5569                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5570                         rv += txr->br->br_drops;
5571                 return (rv);
5572 #endif
5573         default:
5574                 return (if_get_counter_default(ifp, cnt));
5575         }
5576 }
5577
5578 static uint64_t
5579 igb_get_counter(if_t ifp, ift_counter cnt)
5580 {
5581         struct adapter *adapter;
5582         struct e1000_hw_stats *stats;
5583 #ifndef IGB_LEGACY_TX
5584         struct tx_ring *txr;
5585         uint64_t rv;
5586 #endif
5587
5588         adapter = if_getsoftc(ifp);
5589         if (adapter->vf_ifp)
5590                 return (igb_get_vf_counter(ifp, cnt));
5591
5592         stats = (struct e1000_hw_stats *)adapter->stats;
5593
5594         switch (cnt) {
5595         case IFCOUNTER_IPACKETS:
5596                 return (stats->gprc);
5597         case IFCOUNTER_OPACKETS:
5598                 return (stats->gptc);
5599         case IFCOUNTER_IBYTES:
5600                 return (stats->gorc);
5601         case IFCOUNTER_OBYTES:
5602                 return (stats->gotc);
5603         case IFCOUNTER_IMCASTS:
5604                 return (stats->mprc);
5605         case IFCOUNTER_OMCASTS:
5606                 return (stats->mptc);
5607         case IFCOUNTER_IERRORS:
5608                 return (adapter->dropped_pkts + stats->rxerrc +
5609                     stats->crcerrs + stats->algnerrc +
5610                     stats->ruc + stats->roc + stats->cexterr);
5611         case IFCOUNTER_OERRORS:
5612                 return (stats->ecol + stats->latecol +
5613                     adapter->watchdog_events);
5614         case IFCOUNTER_COLLISIONS:
5615                 return (stats->colc);
5616         case IFCOUNTER_IQDROPS:
5617                 return (stats->mpc);
5618 #ifndef IGB_LEGACY_TX
5619         case IFCOUNTER_OQDROPS:
5620                 rv = 0;
5621                 txr = adapter->tx_rings;
5622                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5623                         rv += txr->br->br_drops;
5624                 return (rv);
5625 #endif
5626         default:
5627                 return (if_get_counter_default(ifp, cnt));
5628         }
5629 }
5630
5631 /**********************************************************************
5632  *
5633  *  Update the board statistics counters.
5634  *
5635  **********************************************************************/
5636 static void
5637 igb_update_stats_counters(struct adapter *adapter)
5638 {
5639         struct e1000_hw         *hw = &adapter->hw;
5640         struct e1000_hw_stats   *stats;
5641
5642         /* 
5643         ** The virtual function adapter has only a
5644         ** small controlled set of stats, do only 
5645         ** those and return.
5646         */
5647         if (adapter->vf_ifp) {
5648                 igb_update_vf_stats_counters(adapter);
5649                 return;
5650         }
5651
5652         stats = (struct e1000_hw_stats  *)adapter->stats;
5653
5654         if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5655            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5656                 stats->symerrs +=
5657                     E1000_READ_REG(hw,E1000_SYMERRS);
5658                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5659         }
5660
5661         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5662         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5663         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5664         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5665
5666         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5667         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5668         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5669         stats->dc += E1000_READ_REG(hw, E1000_DC);
5670         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5671         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5672         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5673         /*
5674         ** For watchdog management we need to know if we have been
5675         ** paused during the last interval, so capture that here.
5676         */ 
5677         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5678         stats->xoffrxc += adapter->pause_frames;
5679         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5680         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5681         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5682         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5683         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5684         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5685         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5686         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5687         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5688         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5689         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5690         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5691
5692         /* For the 64-bit byte counters the low dword must be read first. */
5693         /* Both registers clear on the read of the high dword */
5694
5695         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5696             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5697         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5698             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5699
5700         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5701         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5702         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5703         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5704         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5705
5706         stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5707         stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5708         stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5709
5710         stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5711             ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5712         stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5713             ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5714
5715         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5716         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5717         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5718         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5719         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5720         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5721         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5722         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5723         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5724         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5725
5726         /* Interrupt Counts */
5727
5728         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5729         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5730         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5731         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5732         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5733         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5734         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5735         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5736         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5737
5738         /* Host to Card Statistics */
5739
5740         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5741         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5742         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5743         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5744         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5745         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5746         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5747         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5748             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5749         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5750             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5751         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5752         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5753         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5754
5755         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5756         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5757         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5758         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5759         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5760         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5761
5762         /* Driver specific counters */
5763         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5764         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5765         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5766         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5767         adapter->packet_buf_alloc_tx =
5768             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5769         adapter->packet_buf_alloc_rx =
5770             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5771 }
5772
5773
5774 /**********************************************************************
5775  *
5776  *  Initialize the VF board statistics counters.
5777  *
5778  **********************************************************************/
5779 static void
5780 igb_vf_init_stats(struct adapter *adapter)
5781 {
5782         struct e1000_hw *hw = &adapter->hw;
5783         struct e1000_vf_stats   *stats;
5784
5785         stats = (struct e1000_vf_stats  *)adapter->stats;
5786         if (stats == NULL)
5787                 return;
5788         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5789         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5790         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5791         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5792         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5793 }
5794  
5795 /**********************************************************************
5796  *
5797  *  Update the VF board statistics counters.
5798  *
5799  **********************************************************************/
5800 static void
5801 igb_update_vf_stats_counters(struct adapter *adapter)
5802 {
5803         struct e1000_hw *hw = &adapter->hw;
5804         struct e1000_vf_stats   *stats;
5805
5806         if (adapter->link_speed == 0)
5807                 return;
5808
5809         stats = (struct e1000_vf_stats  *)adapter->stats;
5810
5811         UPDATE_VF_REG(E1000_VFGPRC,
5812             stats->last_gprc, stats->gprc);
5813         UPDATE_VF_REG(E1000_VFGORC,
5814             stats->last_gorc, stats->gorc);
5815         UPDATE_VF_REG(E1000_VFGPTC,
5816             stats->last_gptc, stats->gptc);
5817         UPDATE_VF_REG(E1000_VFGOTC,
5818             stats->last_gotc, stats->gotc);
5819         UPDATE_VF_REG(E1000_VFMPRC,
5820             stats->last_mprc, stats->mprc);
5821 }
5822
5823 /* Export a single 32-bit register via a read-only sysctl. */
5824 static int
5825 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5826 {
5827         struct adapter *adapter;
5828         u_int val;
5829
5830         adapter = oidp->oid_arg1;
5831         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5832         return (sysctl_handle_int(oidp, &val, 0, req));
5833 }
5834
5835 /*
5836 **  Tuneable interrupt rate handler
5837 */
5838 static int
5839 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5840 {
5841         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5842         int                     error;
5843         u32                     reg, usec, rate;
5844                         
5845         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5846         usec = ((reg & 0x7FFC) >> 2);
5847         if (usec > 0)
5848                 rate = 1000000 / usec;
5849         else
5850                 rate = 0;
5851         error = sysctl_handle_int(oidp, &rate, 0, req);
5852         if (error || !req->newptr)
5853                 return error;
5854         return 0;
5855 }
5856
5857 /*
5858  * Add sysctl variables, one per statistic, to the system.
5859  */
5860 static void
5861 igb_add_hw_stats(struct adapter *adapter)
5862 {
5863         device_t dev = adapter->dev;
5864
5865         struct tx_ring *txr = adapter->tx_rings;
5866         struct rx_ring *rxr = adapter->rx_rings;
5867
5868         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5869         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5870         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5871         struct e1000_hw_stats *stats = adapter->stats;
5872
5873         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5874         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5875
5876 #define QUEUE_NAME_LEN 32
5877         char namebuf[QUEUE_NAME_LEN];
5878
5879         /* Driver Statistics */
5880         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5881                         CTLFLAG_RD, &adapter->dropped_pkts,
5882                         "Driver dropped packets");
5883         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 
5884                         CTLFLAG_RD, &adapter->link_irq,
5885                         "Link MSIX IRQ Handled");
5886         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5887                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5888                         "Defragmenting mbuf chain failed");
5889         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5890                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5891                         "Driver tx dma failure in xmit");
5892         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5893                         CTLFLAG_RD, &adapter->rx_overruns,
5894                         "RX overruns");
5895         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5896                         CTLFLAG_RD, &adapter->watchdog_events,
5897                         "Watchdog timeouts");
5898
5899         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5900                         CTLFLAG_RD, &adapter->device_control,
5901                         "Device Control Register");
5902         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5903                         CTLFLAG_RD, &adapter->rx_control,
5904                         "Receiver Control Register");
5905         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5906                         CTLFLAG_RD, &adapter->int_mask,
5907                         "Interrupt Mask");
5908         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5909                         CTLFLAG_RD, &adapter->eint_mask,
5910                         "Extended Interrupt Mask");
5911         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5912                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5913                         "Transmit Buffer Packet Allocation");
5914         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5915                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5916                         "Receive Buffer Packet Allocation");
5917         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5918                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5919                         "Flow Control High Watermark");
5920         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5921                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5922                         "Flow Control Low Watermark");
5923
5924         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5925                 struct lro_ctrl *lro = &rxr->lro;
5926
5927                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5928                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5929                                             CTLFLAG_RD, NULL, "Queue Name");
5930                 queue_list = SYSCTL_CHILDREN(queue_node);
5931
5932                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5933                                 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5934                                 sizeof(&adapter->queues[i]),
5935                                 igb_sysctl_interrupt_rate_handler,
5936                                 "IU", "Interrupt Rate");
5937
5938                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5939                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5940                                 igb_sysctl_reg_handler, "IU",
5941                                 "Transmit Descriptor Head");
5942                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5943                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5944                                 igb_sysctl_reg_handler, "IU",
5945                                 "Transmit Descriptor Tail");
5946                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5947                                 CTLFLAG_RD, &txr->no_desc_avail,
5948                                 "Queue Descriptors Unavailable");
5949                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5950                                 CTLFLAG_RD, &txr->total_packets,
5951                                 "Queue Packets Transmitted");
5952
5953                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5954                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5955                                 igb_sysctl_reg_handler, "IU",
5956                                 "Receive Descriptor Head");
5957                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5958                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5959                                 igb_sysctl_reg_handler, "IU",
5960                                 "Receive Descriptor Tail");
5961                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5962                                 CTLFLAG_RD, &rxr->rx_packets,
5963                                 "Queue Packets Received");
5964                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5965                                 CTLFLAG_RD, &rxr->rx_bytes,
5966                                 "Queue Bytes Received");
5967                 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
5968                                 CTLFLAG_RD, &lro->lro_queued, 0,
5969                                 "LRO Queued");
5970                 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
5971                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5972                                 "LRO Flushed");
5973         }
5974
5975         /* MAC stats get their own sub node */
5976
5977         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5978                                     CTLFLAG_RD, NULL, "MAC Statistics");
5979         stat_list = SYSCTL_CHILDREN(stat_node);
5980
5981         /*
5982         ** VF adapter has a very limited set of stats
5983         ** since its not managing the metal, so to speak.
5984         */
5985         if (adapter->vf_ifp) {
5986         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5987                         CTLFLAG_RD, &stats->gprc,
5988                         "Good Packets Received");
5989         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5990                         CTLFLAG_RD, &stats->gptc,
5991                         "Good Packets Transmitted");
5992         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5993                         CTLFLAG_RD, &stats->gorc, 
5994                         "Good Octets Received"); 
5995         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5996                         CTLFLAG_RD, &stats->gotc, 
5997                         "Good Octets Transmitted"); 
5998         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5999                         CTLFLAG_RD, &stats->mprc,
6000                         "Multicast Packets Received");
6001                 return;
6002         }
6003
6004         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
6005                         CTLFLAG_RD, &stats->ecol,
6006                         "Excessive collisions");
6007         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
6008                         CTLFLAG_RD, &stats->scc,
6009                         "Single collisions");
6010         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
6011                         CTLFLAG_RD, &stats->mcc,
6012                         "Multiple collisions");
6013         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
6014                         CTLFLAG_RD, &stats->latecol,
6015                         "Late collisions");
6016         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
6017                         CTLFLAG_RD, &stats->colc,
6018                         "Collision Count");
6019         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
6020                         CTLFLAG_RD, &stats->symerrs,
6021                         "Symbol Errors");
6022         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
6023                         CTLFLAG_RD, &stats->sec,
6024                         "Sequence Errors");
6025         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
6026                         CTLFLAG_RD, &stats->dc,
6027                         "Defer Count");
6028         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
6029                         CTLFLAG_RD, &stats->mpc,
6030                         "Missed Packets");
6031         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
6032                         CTLFLAG_RD, &stats->rlec,
6033                         "Receive Length Errors");
6034         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
6035                         CTLFLAG_RD, &stats->rnbc,
6036                         "Receive No Buffers");
6037         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
6038                         CTLFLAG_RD, &stats->ruc,
6039                         "Receive Undersize");
6040         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
6041                         CTLFLAG_RD, &stats->rfc,
6042                         "Fragmented Packets Received");
6043         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
6044                         CTLFLAG_RD, &stats->roc,
6045                         "Oversized Packets Received");
6046         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
6047                         CTLFLAG_RD, &stats->rjc,
6048                         "Recevied Jabber");
6049         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6050                         CTLFLAG_RD, &stats->rxerrc,
6051                         "Receive Errors");
6052         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6053                         CTLFLAG_RD, &stats->crcerrs,
6054                         "CRC errors");
6055         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6056                         CTLFLAG_RD, &stats->algnerrc,
6057                         "Alignment Errors");
6058         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6059                         CTLFLAG_RD, &stats->tncrs,
6060                         "Transmit with No CRS");
6061         /* On 82575 these are collision counts */
6062         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6063                         CTLFLAG_RD, &stats->cexterr,
6064                         "Collision/Carrier extension errors");
6065         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6066                         CTLFLAG_RD, &stats->xonrxc,
6067                         "XON Received");
6068         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6069                         CTLFLAG_RD, &stats->xontxc,
6070                         "XON Transmitted");
6071         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6072                         CTLFLAG_RD, &stats->xoffrxc,
6073                         "XOFF Received");
6074         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6075                         CTLFLAG_RD, &stats->xofftxc,
6076                         "XOFF Transmitted");
6077         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6078                         CTLFLAG_RD, &stats->fcruc,
6079                         "Unsupported Flow Control Received");
6080         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6081                         CTLFLAG_RD, &stats->mgprc,
6082                         "Management Packets Received");
6083         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6084                         CTLFLAG_RD, &stats->mgpdc,
6085                         "Management Packets Dropped");
6086         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6087                         CTLFLAG_RD, &stats->mgptc,
6088                         "Management Packets Transmitted");
6089         /* Packet Reception Stats */
6090         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6091                         CTLFLAG_RD, &stats->tpr,
6092                         "Total Packets Received");
6093         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6094                         CTLFLAG_RD, &stats->gprc,
6095                         "Good Packets Received");
6096         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6097                         CTLFLAG_RD, &stats->bprc,
6098                         "Broadcast Packets Received");
6099         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6100                         CTLFLAG_RD, &stats->mprc,
6101                         "Multicast Packets Received");
6102         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6103                         CTLFLAG_RD, &stats->prc64,
6104                         "64 byte frames received");
6105         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6106                         CTLFLAG_RD, &stats->prc127,
6107                         "65-127 byte frames received");
6108         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6109                         CTLFLAG_RD, &stats->prc255,
6110                         "128-255 byte frames received");
6111         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6112                         CTLFLAG_RD, &stats->prc511,
6113                         "256-511 byte frames received");
6114         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6115                         CTLFLAG_RD, &stats->prc1023,
6116                         "512-1023 byte frames received");
6117         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6118                         CTLFLAG_RD, &stats->prc1522,
6119                         "1023-1522 byte frames received");
6120         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
6121                         CTLFLAG_RD, &stats->gorc, 
6122                         "Good Octets Received");
6123         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 
6124                         CTLFLAG_RD, &stats->tor, 
6125                         "Total Octets Received");
6126
6127         /* Packet Transmission Stats */
6128         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
6129                         CTLFLAG_RD, &stats->gotc, 
6130                         "Good Octets Transmitted"); 
6131         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 
6132                         CTLFLAG_RD, &stats->tot, 
6133                         "Total Octets Transmitted");
6134         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6135                         CTLFLAG_RD, &stats->tpt,
6136                         "Total Packets Transmitted");
6137         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6138                         CTLFLAG_RD, &stats->gptc,
6139                         "Good Packets Transmitted");
6140         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6141                         CTLFLAG_RD, &stats->bptc,
6142                         "Broadcast Packets Transmitted");
6143         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6144                         CTLFLAG_RD, &stats->mptc,
6145                         "Multicast Packets Transmitted");
6146         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6147                         CTLFLAG_RD, &stats->ptc64,
6148                         "64 byte frames transmitted");
6149         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6150                         CTLFLAG_RD, &stats->ptc127,
6151                         "65-127 byte frames transmitted");
6152         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6153                         CTLFLAG_RD, &stats->ptc255,
6154                         "128-255 byte frames transmitted");
6155         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6156                         CTLFLAG_RD, &stats->ptc511,
6157                         "256-511 byte frames transmitted");
6158         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6159                         CTLFLAG_RD, &stats->ptc1023,
6160                         "512-1023 byte frames transmitted");
6161         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6162                         CTLFLAG_RD, &stats->ptc1522,
6163                         "1024-1522 byte frames transmitted");
6164         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6165                         CTLFLAG_RD, &stats->tsctc,
6166                         "TSO Contexts Transmitted");
6167         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6168                         CTLFLAG_RD, &stats->tsctfc,
6169                         "TSO Contexts Failed");
6170
6171
6172         /* Interrupt Stats */
6173
6174         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
6175                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
6176         int_list = SYSCTL_CHILDREN(int_node);
6177
6178         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6179                         CTLFLAG_RD, &stats->iac,
6180                         "Interrupt Assertion Count");
6181
6182         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6183                         CTLFLAG_RD, &stats->icrxptc,
6184                         "Interrupt Cause Rx Pkt Timer Expire Count");
6185
6186         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6187                         CTLFLAG_RD, &stats->icrxatc,
6188                         "Interrupt Cause Rx Abs Timer Expire Count");
6189
6190         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6191                         CTLFLAG_RD, &stats->ictxptc,
6192                         "Interrupt Cause Tx Pkt Timer Expire Count");
6193
6194         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6195                         CTLFLAG_RD, &stats->ictxatc,
6196                         "Interrupt Cause Tx Abs Timer Expire Count");
6197
6198         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6199                         CTLFLAG_RD, &stats->ictxqec,
6200                         "Interrupt Cause Tx Queue Empty Count");
6201
6202         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6203                         CTLFLAG_RD, &stats->ictxqmtc,
6204                         "Interrupt Cause Tx Queue Min Thresh Count");
6205
6206         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6207                         CTLFLAG_RD, &stats->icrxdmtc,
6208                         "Interrupt Cause Rx Desc Min Thresh Count");
6209
6210         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6211                         CTLFLAG_RD, &stats->icrxoc,
6212                         "Interrupt Cause Receiver Overrun Count");
6213
6214         /* Host to Card Stats */
6215
6216         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
6217                                     CTLFLAG_RD, NULL, 
6218                                     "Host to Card Statistics");
6219
6220         host_list = SYSCTL_CHILDREN(host_node);
6221
6222         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6223                         CTLFLAG_RD, &stats->cbtmpc,
6224                         "Circuit Breaker Tx Packet Count");
6225
6226         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6227                         CTLFLAG_RD, &stats->htdpmc,
6228                         "Host Transmit Discarded Packets");
6229
6230         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6231                         CTLFLAG_RD, &stats->rpthc,
6232                         "Rx Packets To Host");
6233
6234         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6235                         CTLFLAG_RD, &stats->cbrmpc,
6236                         "Circuit Breaker Rx Packet Count");
6237
6238         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6239                         CTLFLAG_RD, &stats->cbrdpc,
6240                         "Circuit Breaker Rx Dropped Count");
6241
6242         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6243                         CTLFLAG_RD, &stats->hgptc,
6244                         "Host Good Packets Tx Count");
6245
6246         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6247                         CTLFLAG_RD, &stats->htcbdpc,
6248                         "Host Tx Circuit Breaker Dropped Count");
6249
6250         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6251                         CTLFLAG_RD, &stats->hgorc,
6252                         "Host Good Octets Received Count");
6253
6254         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6255                         CTLFLAG_RD, &stats->hgotc,
6256                         "Host Good Octets Transmit Count");
6257
6258         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6259                         CTLFLAG_RD, &stats->lenerrs,
6260                         "Length Errors");
6261
6262         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6263                         CTLFLAG_RD, &stats->scvpc,
6264                         "SerDes/SGMII Code Violation Pkt Count");
6265
6266         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6267                         CTLFLAG_RD, &stats->hrmpc,
6268                         "Header Redirection Missed Packet Count");
6269 }
6270
6271
6272 /**********************************************************************
6273  *
6274  *  This routine provides a way to dump out the adapter eeprom,
6275  *  often a useful debug/service tool. This only dumps the first
6276  *  32 words, stuff that matters is in that extent.
6277  *
6278  **********************************************************************/
6279 static int
6280 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6281 {
6282         struct adapter *adapter;
6283         int error;
6284         int result;
6285
6286         result = -1;
6287         error = sysctl_handle_int(oidp, &result, 0, req);
6288
6289         if (error || !req->newptr)
6290                 return (error);
6291
6292         /*
6293          * This value will cause a hex dump of the
6294          * first 32 16-bit words of the EEPROM to
6295          * the screen.
6296          */
6297         if (result == 1) {
6298                 adapter = (struct adapter *)arg1;
6299                 igb_print_nvm_info(adapter);
6300         }
6301
6302         return (error);
6303 }
6304
6305 static void
6306 igb_print_nvm_info(struct adapter *adapter)
6307 {
6308         u16     eeprom_data;
6309         int     i, j, row = 0;
6310
6311         /* Its a bit crude, but it gets the job done */
6312         printf("\nInterface EEPROM Dump:\n");
6313         printf("Offset\n0x0000  ");
6314         for (i = 0, j = 0; i < 32; i++, j++) {
6315                 if (j == 8) { /* Make the offset block */
6316                         j = 0; ++row;
6317                         printf("\n0x00%x0  ",row);
6318                 }
6319                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6320                 printf("%04x ", eeprom_data);
6321         }
6322         printf("\n");
6323 }
6324
6325 static void
6326 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6327         const char *description, int *limit, int value)
6328 {
6329         *limit = value;
6330         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6331             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6332             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6333 }
6334
6335 /*
6336 ** Set flow control using sysctl:
6337 ** Flow control values:
6338 **      0 - off
6339 **      1 - rx pause
6340 **      2 - tx pause
6341 **      3 - full
6342 */
6343 static int
6344 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6345 {
6346         int             error;
6347         static int      input = 3; /* default is full */
6348         struct adapter  *adapter = (struct adapter *) arg1;
6349
6350         error = sysctl_handle_int(oidp, &input, 0, req);
6351
6352         if ((error) || (req->newptr == NULL))
6353                 return (error);
6354
6355         switch (input) {
6356                 case e1000_fc_rx_pause:
6357                 case e1000_fc_tx_pause:
6358                 case e1000_fc_full:
6359                 case e1000_fc_none:
6360                         adapter->hw.fc.requested_mode = input;
6361                         adapter->fc = input;
6362                         break;
6363                 default:
6364                         /* Do nothing */
6365                         return (error);
6366         }
6367
6368         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6369         e1000_force_mac_fc(&adapter->hw);
6370         /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6371         return (error);
6372 }
6373
6374 /*
6375 ** Manage DMA Coalesce:
6376 ** Control values:
6377 **      0/1 - off/on
6378 **      Legal timer values are:
6379 **      250,500,1000-10000 in thousands
6380 */
6381 static int
6382 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6383 {
6384         struct adapter *adapter = (struct adapter *) arg1;
6385         int             error;
6386
6387         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6388
6389         if ((error) || (req->newptr == NULL))
6390                 return (error);
6391
6392         switch (adapter->dmac) {
6393                 case 0:
6394                         /* Disabling */
6395                         break;
6396                 case 1: /* Just enable and use default */
6397                         adapter->dmac = 1000;
6398                         break;
6399                 case 250:
6400                 case 500:
6401                 case 1000:
6402                 case 2000:
6403                 case 3000:
6404                 case 4000:
6405                 case 5000:
6406                 case 6000:
6407                 case 7000:
6408                 case 8000:
6409                 case 9000:
6410                 case 10000:
6411                         /* Legal values - allow */
6412                         break;
6413                 default:
6414                         /* Do nothing, illegal value */
6415                         adapter->dmac = 0;
6416                         return (EINVAL);
6417         }
6418         /* Reinit the interface */
6419         igb_init(adapter);
6420         return (error);
6421 }
6422
6423 /*
6424 ** Manage Energy Efficient Ethernet:
6425 ** Control values:
6426 **     0/1 - enabled/disabled
6427 */
6428 static int
6429 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6430 {
6431         struct adapter  *adapter = (struct adapter *) arg1;
6432         int             error, value;
6433
6434         value = adapter->hw.dev_spec._82575.eee_disable;
6435         error = sysctl_handle_int(oidp, &value, 0, req);
6436         if (error || req->newptr == NULL)
6437                 return (error);
6438         IGB_CORE_LOCK(adapter);
6439         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6440         igb_init_locked(adapter);
6441         IGB_CORE_UNLOCK(adapter);
6442         return (0);
6443 }