]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_igb.c
Update compiler-rt to release_39 branch r288513. Since this contains a
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "opt_rss.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #include "opt_altq.h"
43 #endif
44
45 #include "if_igb.h"
46
47 /*********************************************************************
48  *  Driver version:
49  *********************************************************************/
50 char igb_driver_version[] = "2.5.3-k";
51
52
53 /*********************************************************************
54  *  PCI Device ID Table
55  *
56  *  Used by probe to select devices to load on
57  *  Last field stores an index into e1000_strings
58  *  Last entry must be all 0s
59  *
60  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61  *********************************************************************/
62
63 static igb_vendor_info_t igb_vendor_info_array[] =
64 {
65         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
72         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
79         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
81         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
88         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,  0, 0, 0},
89         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
90         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,  0, 0, 0},
91         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
93         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,  0, 0, 0},
98         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
99         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,  0, 0, 0},
100         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
101         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,  0, 0, 0},
104         /* required last entry */
105         {0, 0, 0, 0, 0}
106 };
107
108 /*********************************************************************
109  *  Table of branding strings for all supported NICs.
110  *********************************************************************/
111
112 static char *igb_strings[] = {
113         "Intel(R) PRO/1000 Network Connection"
114 };
115
116 /*********************************************************************
117  *  Function prototypes
118  *********************************************************************/
119 static int      igb_probe(device_t);
120 static int      igb_attach(device_t);
121 static int      igb_detach(device_t);
122 static int      igb_shutdown(device_t);
123 static int      igb_suspend(device_t);
124 static int      igb_resume(device_t);
125 #ifndef IGB_LEGACY_TX
126 static int      igb_mq_start(struct ifnet *, struct mbuf *);
127 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128 static void     igb_qflush(struct ifnet *);
129 static void     igb_deferred_mq_start(void *, int);
130 #else
131 static void     igb_start(struct ifnet *);
132 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133 #endif
134 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
135 static uint64_t igb_get_counter(if_t, ift_counter);
136 static void     igb_init(void *);
137 static void     igb_init_locked(struct adapter *);
138 static void     igb_stop(void *);
139 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
140 static int      igb_media_change(struct ifnet *);
141 static void     igb_identify_hardware(struct adapter *);
142 static int      igb_allocate_pci_resources(struct adapter *);
143 static int      igb_allocate_msix(struct adapter *);
144 static int      igb_allocate_legacy(struct adapter *);
145 static int      igb_setup_msix(struct adapter *);
146 static void     igb_free_pci_resources(struct adapter *);
147 static void     igb_local_timer(void *);
148 static void     igb_reset(struct adapter *);
149 static int      igb_setup_interface(device_t, struct adapter *);
150 static int      igb_allocate_queues(struct adapter *);
151 static void     igb_configure_queues(struct adapter *);
152
153 static int      igb_allocate_transmit_buffers(struct tx_ring *);
154 static void     igb_setup_transmit_structures(struct adapter *);
155 static void     igb_setup_transmit_ring(struct tx_ring *);
156 static void     igb_initialize_transmit_units(struct adapter *);
157 static void     igb_free_transmit_structures(struct adapter *);
158 static void     igb_free_transmit_buffers(struct tx_ring *);
159
160 static int      igb_allocate_receive_buffers(struct rx_ring *);
161 static int      igb_setup_receive_structures(struct adapter *);
162 static int      igb_setup_receive_ring(struct rx_ring *);
163 static void     igb_initialize_receive_units(struct adapter *);
164 static void     igb_free_receive_structures(struct adapter *);
165 static void     igb_free_receive_buffers(struct rx_ring *);
166 static void     igb_free_receive_ring(struct rx_ring *);
167
168 static void     igb_enable_intr(struct adapter *);
169 static void     igb_disable_intr(struct adapter *);
170 static void     igb_update_stats_counters(struct adapter *);
171 static bool     igb_txeof(struct tx_ring *);
172
173 static __inline void igb_rx_discard(struct rx_ring *, int);
174 static __inline void igb_rx_input(struct rx_ring *,
175                     struct ifnet *, struct mbuf *, u32);
176
177 static bool     igb_rxeof(struct igb_queue *, int, int *);
178 static void     igb_rx_checksum(u32, struct mbuf *, u32);
179 static int      igb_tx_ctx_setup(struct tx_ring *,
180                     struct mbuf *, u32 *, u32 *);
181 static int      igb_tso_setup(struct tx_ring *,
182                     struct mbuf *, u32 *, u32 *);
183 static void     igb_set_promisc(struct adapter *);
184 static void     igb_disable_promisc(struct adapter *);
185 static void     igb_set_multi(struct adapter *);
186 static void     igb_update_link_status(struct adapter *);
187 static void     igb_refresh_mbufs(struct rx_ring *, int);
188
189 static void     igb_register_vlan(void *, struct ifnet *, u16);
190 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
191 static void     igb_setup_vlan_hw_support(struct adapter *);
192
193 static int      igb_xmit(struct tx_ring *, struct mbuf **);
194 static int      igb_dma_malloc(struct adapter *, bus_size_t,
195                     struct igb_dma_alloc *, int);
196 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198 static void     igb_print_nvm_info(struct adapter *);
199 static int      igb_is_valid_ether_addr(u8 *);
200 static void     igb_add_hw_stats(struct adapter *);
201
202 static void     igb_vf_init_stats(struct adapter *);
203 static void     igb_update_vf_stats_counters(struct adapter *);
204
205 /* Management and WOL Support */
206 static void     igb_init_manageability(struct adapter *);
207 static void     igb_release_manageability(struct adapter *);
208 static void     igb_get_hw_control(struct adapter *);
209 static void     igb_release_hw_control(struct adapter *);
210 static void     igb_enable_wakeup(device_t);
211 static void     igb_led_func(void *, int);
212
213 static int      igb_irq_fast(void *);
214 static void     igb_msix_que(void *);
215 static void     igb_msix_link(void *);
216 static void     igb_handle_que(void *context, int pending);
217 static void     igb_handle_link(void *context, int pending);
218 static void     igb_handle_link_locked(struct adapter *);
219
220 static void     igb_set_sysctl_value(struct adapter *, const char *,
221                     const char *, int *, int);
222 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226 #ifdef DEVICE_POLLING
227 static poll_handler_t igb_poll;
228 #endif /* POLLING */
229
230 /*********************************************************************
231  *  FreeBSD Device Interface Entry Points
232  *********************************************************************/
233
234 static device_method_t igb_methods[] = {
235         /* Device interface */
236         DEVMETHOD(device_probe, igb_probe),
237         DEVMETHOD(device_attach, igb_attach),
238         DEVMETHOD(device_detach, igb_detach),
239         DEVMETHOD(device_shutdown, igb_shutdown),
240         DEVMETHOD(device_suspend, igb_suspend),
241         DEVMETHOD(device_resume, igb_resume),
242         DEVMETHOD_END
243 };
244
245 static driver_t igb_driver = {
246         "igb", igb_methods, sizeof(struct adapter),
247 };
248
249 static devclass_t igb_devclass;
250 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251 MODULE_DEPEND(igb, pci, 1, 1, 1);
252 MODULE_DEPEND(igb, ether, 1, 1, 1);
253 #ifdef DEV_NETMAP
254 MODULE_DEPEND(igb, netmap, 1, 1, 1);
255 #endif /* DEV_NETMAP */
256
257 /*********************************************************************
258  *  Tunable default values.
259  *********************************************************************/
260
261 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263 /* Descriptor defaults */
264 static int igb_rxd = IGB_DEFAULT_RXD;
265 static int igb_txd = IGB_DEFAULT_TXD;
266 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267     "Number of receive descriptors per queue");
268 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269     "Number of transmit descriptors per queue");
270
271 /*
272 ** AIM: Adaptive Interrupt Moderation
273 ** which means that the interrupt rate
274 ** is varied over time based on the
275 ** traffic for that interrupt vector
276 */
277 static int igb_enable_aim = TRUE;
278 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279     "Enable adaptive interrupt moderation");
280
281 /*
282  * MSIX should be the default for best performance,
283  * but this allows it to be forced off for testing.
284  */         
285 static int igb_enable_msix = 1;
286 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287     "Enable MSI-X interrupts");
288
289 /*
290 ** Tuneable Interrupt rate
291 */
292 static int igb_max_interrupt_rate = 8000;
293 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296 #ifndef IGB_LEGACY_TX
297 /*
298 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
299 */
300 static int igb_buf_ring_size = IGB_BR_SIZE;
301 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302     &igb_buf_ring_size, 0, "Size of the bufring");
303 #endif
304
305 /*
306 ** Header split causes the packet header to
307 ** be dma'd to a separate mbuf from the payload.
308 ** this can have memory alignment benefits. But
309 ** another plus is that small packets often fit
310 ** into the header and thus use no cluster. Its
311 ** a very workload dependent type feature.
312 */
313 static int igb_header_split = FALSE;
314 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315     "Enable receive mbuf header split");
316
317 /*
318 ** This will autoconfigure based on the
319 ** number of CPUs and max supported
320 ** MSIX messages if left at 0.
321 */
322 static int igb_num_queues = 0;
323 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324     "Number of queues to configure, 0 indicates autoconfigure");
325
326 /*
327 ** Global variable to store last used CPU when binding queues
328 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
329 ** queue is bound to a cpu.
330 */
331 static int igb_last_bind_cpu = -1;
332
333 /* How many packets rxeof tries to clean at a time */
334 static int igb_rx_process_limit = 100;
335 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336     &igb_rx_process_limit, 0,
337     "Maximum number of received packets to process at a time, -1 means unlimited");
338
339 /* How many packets txeof tries to clean at a time */
340 static int igb_tx_process_limit = -1;
341 SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342     &igb_tx_process_limit, 0,
343     "Maximum number of sent packets to process at a time, -1 means unlimited");
344
345 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
346 #include <dev/netmap/if_igb_netmap.h>
347 #endif /* DEV_NETMAP */
348 /*********************************************************************
349  *  Device identification routine
350  *
351  *  igb_probe determines if the driver should be loaded on
352  *  adapter based on PCI vendor/device id of the adapter.
353  *
354  *  return BUS_PROBE_DEFAULT on success, positive on failure
355  *********************************************************************/
356
357 static int
358 igb_probe(device_t dev)
359 {
360         char            adapter_name[256];
361         uint16_t        pci_vendor_id = 0;
362         uint16_t        pci_device_id = 0;
363         uint16_t        pci_subvendor_id = 0;
364         uint16_t        pci_subdevice_id = 0;
365         igb_vendor_info_t *ent;
366
367         INIT_DEBUGOUT("igb_probe: begin");
368
369         pci_vendor_id = pci_get_vendor(dev);
370         if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371                 return (ENXIO);
372
373         pci_device_id = pci_get_device(dev);
374         pci_subvendor_id = pci_get_subvendor(dev);
375         pci_subdevice_id = pci_get_subdevice(dev);
376
377         ent = igb_vendor_info_array;
378         while (ent->vendor_id != 0) {
379                 if ((pci_vendor_id == ent->vendor_id) &&
380                     (pci_device_id == ent->device_id) &&
381
382                     ((pci_subvendor_id == ent->subvendor_id) ||
383                     (ent->subvendor_id == 0)) &&
384
385                     ((pci_subdevice_id == ent->subdevice_id) ||
386                     (ent->subdevice_id == 0))) {
387                         sprintf(adapter_name, "%s, Version - %s",
388                                 igb_strings[ent->index],
389                                 igb_driver_version);
390                         device_set_desc_copy(dev, adapter_name);
391                         return (BUS_PROBE_DEFAULT);
392                 }
393                 ent++;
394         }
395         return (ENXIO);
396 }
397
398 /*********************************************************************
399  *  Device initialization routine
400  *
401  *  The attach entry point is called when the driver is being loaded.
402  *  This routine identifies the type of hardware, allocates all resources
403  *  and initializes the hardware.
404  *
405  *  return 0 on success, positive on failure
406  *********************************************************************/
407
408 static int
409 igb_attach(device_t dev)
410 {
411         struct adapter  *adapter;
412         int             error = 0;
413         u16             eeprom_data;
414
415         INIT_DEBUGOUT("igb_attach: begin");
416
417         if (resource_disabled("igb", device_get_unit(dev))) {
418                 device_printf(dev, "Disabled by device hint\n");
419                 return (ENXIO);
420         }
421
422         adapter = device_get_softc(dev);
423         adapter->dev = adapter->osdep.dev = dev;
424         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425
426         /* SYSCTLs */
427         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430             igb_sysctl_nvm_info, "I", "NVM Information");
431
432         igb_set_sysctl_value(adapter, "enable_aim",
433             "Interrupt Moderation", &adapter->enable_aim,
434             igb_enable_aim);
435
436         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440
441         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442
443         /* Determine hardware and mac info */
444         igb_identify_hardware(adapter);
445
446         /* Setup PCI resources */
447         if (igb_allocate_pci_resources(adapter)) {
448                 device_printf(dev, "Allocation of PCI resources failed\n");
449                 error = ENXIO;
450                 goto err_pci;
451         }
452
453         /* Do Shared Code initialization */
454         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455                 device_printf(dev, "Setup of Shared code failed\n");
456                 error = ENXIO;
457                 goto err_pci;
458         }
459
460         e1000_get_bus_info(&adapter->hw);
461
462         /* Sysctls for limiting the amount of work done in the taskqueues */
463         igb_set_sysctl_value(adapter, "rx_processing_limit",
464             "max number of rx packets to process",
465             &adapter->rx_process_limit, igb_rx_process_limit);
466
467         igb_set_sysctl_value(adapter, "tx_processing_limit",
468             "max number of tx packets to process",
469             &adapter->tx_process_limit, igb_tx_process_limit);
470
471         /*
472          * Validate number of transmit and receive descriptors. It
473          * must not exceed hardware maximum, and must be multiple
474          * of E1000_DBA_ALIGN.
475          */
476         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479                     IGB_DEFAULT_TXD, igb_txd);
480                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
481         } else
482                 adapter->num_tx_desc = igb_txd;
483         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486                     IGB_DEFAULT_RXD, igb_rxd);
487                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
488         } else
489                 adapter->num_rx_desc = igb_rxd;
490
491         adapter->hw.mac.autoneg = DO_AUTO_NEG;
492         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494
495         /* Copper options */
496         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
498                 adapter->hw.phy.disable_polarity_correction = FALSE;
499                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500         }
501
502         /*
503          * Set the frame limits assuming
504          * standard ethernet sized frames.
505          */
506         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507
508         /*
509         ** Allocate and Setup Queues
510         */
511         if (igb_allocate_queues(adapter)) {
512                 error = ENOMEM;
513                 goto err_pci;
514         }
515
516         /* Allocate the appropriate stats memory */
517         if (adapter->vf_ifp) {
518                 adapter->stats =
519                     (struct e1000_vf_stats *)malloc(sizeof \
520                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521                 igb_vf_init_stats(adapter);
522         } else
523                 adapter->stats =
524                     (struct e1000_hw_stats *)malloc(sizeof \
525                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526         if (adapter->stats == NULL) {
527                 device_printf(dev, "Can not allocate stats memory\n");
528                 error = ENOMEM;
529                 goto err_late;
530         }
531
532         /* Allocate multicast array memory. */
533         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535         if (adapter->mta == NULL) {
536                 device_printf(dev, "Can not allocate multicast setup array\n");
537                 error = ENOMEM;
538                 goto err_late;
539         }
540
541         /* Some adapter-specific advanced features */
542         if (adapter->hw.mac.type >= e1000_i350) {
543                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550                     adapter, 0, igb_sysctl_eee, "I",
551                     "Disable Energy Efficient Ethernet");
552                 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553                         if (adapter->hw.mac.type == e1000_i354)
554                                 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
555                         else
556                                 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
557                 }
558         }
559
560         /*
561         ** Start from a known state, this is
562         ** important in reading the nvm and
563         ** mac from that.
564         */
565         e1000_reset_hw(&adapter->hw);
566
567         /* Make sure we have a good EEPROM before we read from it */
568         if (((adapter->hw.mac.type != e1000_i210) &&
569             (adapter->hw.mac.type != e1000_i211)) &&
570             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571                 /*
572                 ** Some PCI-E parts fail the first check due to
573                 ** the link being in sleep state, call it again,
574                 ** if it fails a second time its a real issue.
575                 */
576                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577                         device_printf(dev,
578                             "The EEPROM Checksum Is Not Valid\n");
579                         error = EIO;
580                         goto err_late;
581                 }
582         }
583
584         /*
585         ** Copy the permanent MAC address out of the EEPROM
586         */
587         if (e1000_read_mac_addr(&adapter->hw) < 0) {
588                 device_printf(dev, "EEPROM read error while reading MAC"
589                     " address\n");
590                 error = EIO;
591                 goto err_late;
592         }
593
594         /* Check its sanity */
595         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
596                 if (adapter->vf_ifp) {
597                         u8 addr[ETHER_ADDR_LEN];
598                         arc4rand(&addr, sizeof(addr), 0);
599                         addr[0] &= 0xFE;
600                         addr[0] |= 0x02;
601                         bcopy(addr, adapter->hw.mac.addr, sizeof(addr));
602                 } else {
603                         device_printf(dev, "Invalid MAC address\n");
604                         error = EIO;
605                         goto err_late;
606                 }
607         }
608
609         /* Setup OS specific network interface */
610         if (igb_setup_interface(dev, adapter) != 0)
611                 goto err_late;
612
613         /* Now get a good starting state */
614         igb_reset(adapter);
615
616         /* Initialize statistics */
617         igb_update_stats_counters(adapter);
618
619         adapter->hw.mac.get_link_status = 1;
620         igb_update_link_status(adapter);
621
622         /* Indicate SOL/IDER usage */
623         if (e1000_check_reset_block(&adapter->hw))
624                 device_printf(dev,
625                     "PHY reset is blocked due to SOL/IDER session.\n");
626
627         /* Determine if we have to control management hardware */
628         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
629
630         /*
631          * Setup Wake-on-Lan
632          */
633         /* APME bit in EEPROM is mapped to WUC.APME */
634         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
635         if (eeprom_data)
636                 adapter->wol = E1000_WUFC_MAG;
637
638         /* Register for VLAN events */
639         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
640              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
641         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
642              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
643
644         igb_add_hw_stats(adapter);
645
646         /* Tell the stack that the interface is not active */
647         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
648         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
649
650         adapter->led_dev = led_create(igb_led_func, adapter,
651             device_get_nameunit(dev));
652
653         /* 
654         ** Configure Interrupts
655         */
656         if ((adapter->msix > 1) && (igb_enable_msix))
657                 error = igb_allocate_msix(adapter);
658         else /* MSI or Legacy */
659                 error = igb_allocate_legacy(adapter);
660         if (error)
661                 goto err_late;
662
663 #ifdef DEV_NETMAP
664         igb_netmap_attach(adapter);
665 #endif /* DEV_NETMAP */
666         INIT_DEBUGOUT("igb_attach: end");
667
668         return (0);
669
670 err_late:
671         if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */
672                 return(error);
673         igb_free_transmit_structures(adapter);
674         igb_free_receive_structures(adapter);
675         igb_release_hw_control(adapter);
676 err_pci:
677         igb_free_pci_resources(adapter);
678         if (adapter->ifp != NULL)
679                 if_free(adapter->ifp);
680         free(adapter->mta, M_DEVBUF);
681         IGB_CORE_LOCK_DESTROY(adapter);
682
683         return (error);
684 }
685
686 /*********************************************************************
687  *  Device removal routine
688  *
689  *  The detach entry point is called when the driver is being removed.
690  *  This routine stops the adapter and deallocates all the resources
691  *  that were allocated for driver operation.
692  *
693  *  return 0 on success, positive on failure
694  *********************************************************************/
695
696 static int
697 igb_detach(device_t dev)
698 {
699         struct adapter  *adapter = device_get_softc(dev);
700         struct ifnet    *ifp = adapter->ifp;
701
702         INIT_DEBUGOUT("igb_detach: begin");
703
704         /* Make sure VLANS are not using driver */
705         if (adapter->ifp->if_vlantrunk != NULL) {
706                 device_printf(dev,"Vlan in use, detach first\n");
707                 return (EBUSY);
708         }
709
710         ether_ifdetach(adapter->ifp);
711
712         if (adapter->led_dev != NULL)
713                 led_destroy(adapter->led_dev);
714
715 #ifdef DEVICE_POLLING
716         if (ifp->if_capenable & IFCAP_POLLING)
717                 ether_poll_deregister(ifp);
718 #endif
719
720         IGB_CORE_LOCK(adapter);
721         adapter->in_detach = 1;
722         igb_stop(adapter);
723         IGB_CORE_UNLOCK(adapter);
724
725         e1000_phy_hw_reset(&adapter->hw);
726
727         /* Give control back to firmware */
728         igb_release_manageability(adapter);
729         igb_release_hw_control(adapter);
730
731         if (adapter->wol) {
732                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
733                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
734                 igb_enable_wakeup(dev);
735         }
736
737         /* Unregister VLAN events */
738         if (adapter->vlan_attach != NULL)
739                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
740         if (adapter->vlan_detach != NULL)
741                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
742
743         callout_drain(&adapter->timer);
744
745 #ifdef DEV_NETMAP
746         netmap_detach(adapter->ifp);
747 #endif /* DEV_NETMAP */
748         igb_free_pci_resources(adapter);
749         bus_generic_detach(dev);
750         if_free(ifp);
751
752         igb_free_transmit_structures(adapter);
753         igb_free_receive_structures(adapter);
754         if (adapter->mta != NULL)
755                 free(adapter->mta, M_DEVBUF);
756
757         IGB_CORE_LOCK_DESTROY(adapter);
758
759         return (0);
760 }
761
762 /*********************************************************************
763  *
764  *  Shutdown entry point
765  *
766  **********************************************************************/
767
768 static int
769 igb_shutdown(device_t dev)
770 {
771         return igb_suspend(dev);
772 }
773
774 /*
775  * Suspend/resume device methods.
776  */
777 static int
778 igb_suspend(device_t dev)
779 {
780         struct adapter *adapter = device_get_softc(dev);
781
782         IGB_CORE_LOCK(adapter);
783
784         igb_stop(adapter);
785
786         igb_release_manageability(adapter);
787         igb_release_hw_control(adapter);
788
789         if (adapter->wol) {
790                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
791                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
792                 igb_enable_wakeup(dev);
793         }
794
795         IGB_CORE_UNLOCK(adapter);
796
797         return bus_generic_suspend(dev);
798 }
799
800 static int
801 igb_resume(device_t dev)
802 {
803         struct adapter *adapter = device_get_softc(dev);
804         struct tx_ring  *txr = adapter->tx_rings;
805         struct ifnet *ifp = adapter->ifp;
806
807         IGB_CORE_LOCK(adapter);
808         igb_init_locked(adapter);
809         igb_init_manageability(adapter);
810
811         if ((ifp->if_flags & IFF_UP) &&
812             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
813                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
814                         IGB_TX_LOCK(txr);
815 #ifndef IGB_LEGACY_TX
816                         /* Process the stack queue only if not depleted */
817                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
818                             !drbr_empty(ifp, txr->br))
819                                 igb_mq_start_locked(ifp, txr);
820 #else
821                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
822                                 igb_start_locked(txr, ifp);
823 #endif
824                         IGB_TX_UNLOCK(txr);
825                 }
826         }
827         IGB_CORE_UNLOCK(adapter);
828
829         return bus_generic_resume(dev);
830 }
831
832
833 #ifdef IGB_LEGACY_TX
834
835 /*********************************************************************
836  *  Transmit entry point
837  *
838  *  igb_start is called by the stack to initiate a transmit.
839  *  The driver will remain in this routine as long as there are
840  *  packets to transmit and transmit resources are available.
841  *  In case resources are not available stack is notified and
842  *  the packet is requeued.
843  **********************************************************************/
844
845 static void
846 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
847 {
848         struct adapter  *adapter = ifp->if_softc;
849         struct mbuf     *m_head;
850
851         IGB_TX_LOCK_ASSERT(txr);
852
853         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
854             IFF_DRV_RUNNING)
855                 return;
856         if (!adapter->link_active)
857                 return;
858
859         /* Call cleanup if number of TX descriptors low */
860         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
861                 igb_txeof(txr);
862
863         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
864                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
865                         txr->queue_status |= IGB_QUEUE_DEPLETED;
866                         break;
867                 }
868                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
869                 if (m_head == NULL)
870                         break;
871                 /*
872                  *  Encapsulation can modify our pointer, and or make it
873                  *  NULL on failure.  In that event, we can't requeue.
874                  */
875                 if (igb_xmit(txr, &m_head)) {
876                         if (m_head != NULL)
877                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
878                         if (txr->tx_avail <= IGB_MAX_SCATTER)
879                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
880                         break;
881                 }
882
883                 /* Send a copy of the frame to the BPF listener */
884                 ETHER_BPF_MTAP(ifp, m_head);
885
886                 /* Set watchdog on */
887                 txr->watchdog_time = ticks;
888                 txr->queue_status |= IGB_QUEUE_WORKING;
889         }
890 }
891  
892 /*
893  * Legacy TX driver routine, called from the
894  * stack, always uses tx[0], and spins for it.
895  * Should not be used with multiqueue tx
896  */
897 static void
898 igb_start(struct ifnet *ifp)
899 {
900         struct adapter  *adapter = ifp->if_softc;
901         struct tx_ring  *txr = adapter->tx_rings;
902
903         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
904                 IGB_TX_LOCK(txr);
905                 igb_start_locked(txr, ifp);
906                 IGB_TX_UNLOCK(txr);
907         }
908         return;
909 }
910
911 #else /* ~IGB_LEGACY_TX */
912
913 /*
914 ** Multiqueue Transmit Entry:
915 **  quick turnaround to the stack
916 **
917 */
918 static int
919 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
920 {
921         struct adapter          *adapter = ifp->if_softc;
922         struct igb_queue        *que;
923         struct tx_ring          *txr;
924         int                     i, err = 0;
925 #ifdef  RSS
926         uint32_t                bucket_id;
927 #endif
928
929         /* Which queue to use */
930         /*
931          * When doing RSS, map it to the same outbound queue
932          * as the incoming flow would be mapped to.
933          *
934          * If everything is setup correctly, it should be the
935          * same bucket that the current CPU we're on is.
936          */
937         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
938 #ifdef  RSS
939                 if (rss_hash2bucket(m->m_pkthdr.flowid,
940                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
941                         /* XXX TODO: spit out something if bucket_id > num_queues? */
942                         i = bucket_id % adapter->num_queues;
943                 } else {
944 #endif
945                         i = m->m_pkthdr.flowid % adapter->num_queues;
946 #ifdef  RSS
947                 }
948 #endif
949         } else {
950                 i = curcpu % adapter->num_queues;
951         }
952         txr = &adapter->tx_rings[i];
953         que = &adapter->queues[i];
954
955         err = drbr_enqueue(ifp, txr->br, m);
956         if (err)
957                 return (err);
958         if (IGB_TX_TRYLOCK(txr)) {
959                 igb_mq_start_locked(ifp, txr);
960                 IGB_TX_UNLOCK(txr);
961         } else
962                 taskqueue_enqueue(que->tq, &txr->txq_task);
963
964         return (0);
965 }
966
967 static int
968 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
969 {
970         struct adapter  *adapter = txr->adapter;
971         struct mbuf     *next;
972         int             err = 0, enq = 0;
973
974         IGB_TX_LOCK_ASSERT(txr);
975
976         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
977             adapter->link_active == 0)
978                 return (ENETDOWN);
979
980         /* Process the queue */
981         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
982                 if ((err = igb_xmit(txr, &next)) != 0) {
983                         if (next == NULL) {
984                                 /* It was freed, move forward */
985                                 drbr_advance(ifp, txr->br);
986                         } else {
987                                 /* 
988                                  * Still have one left, it may not be
989                                  * the same since the transmit function
990                                  * may have changed it.
991                                  */
992                                 drbr_putback(ifp, txr->br, next);
993                         }
994                         break;
995                 }
996                 drbr_advance(ifp, txr->br);
997                 enq++;
998                 if (next->m_flags & M_MCAST && adapter->vf_ifp)
999                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1000                 ETHER_BPF_MTAP(ifp, next);
1001                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1002                         break;
1003         }
1004         if (enq > 0) {
1005                 /* Set the watchdog */
1006                 txr->queue_status |= IGB_QUEUE_WORKING;
1007                 txr->watchdog_time = ticks;
1008         }
1009         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1010                 igb_txeof(txr);
1011         if (txr->tx_avail <= IGB_MAX_SCATTER)
1012                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1013         return (err);
1014 }
1015
1016 /*
1017  * Called from a taskqueue to drain queued transmit packets.
1018  */
1019 static void
1020 igb_deferred_mq_start(void *arg, int pending)
1021 {
1022         struct tx_ring *txr = arg;
1023         struct adapter *adapter = txr->adapter;
1024         struct ifnet *ifp = adapter->ifp;
1025
1026         IGB_TX_LOCK(txr);
1027         if (!drbr_empty(ifp, txr->br))
1028                 igb_mq_start_locked(ifp, txr);
1029         IGB_TX_UNLOCK(txr);
1030 }
1031
1032 /*
1033 ** Flush all ring buffers
1034 */
1035 static void
1036 igb_qflush(struct ifnet *ifp)
1037 {
1038         struct adapter  *adapter = ifp->if_softc;
1039         struct tx_ring  *txr = adapter->tx_rings;
1040         struct mbuf     *m;
1041
1042         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1043                 IGB_TX_LOCK(txr);
1044                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1045                         m_freem(m);
1046                 IGB_TX_UNLOCK(txr);
1047         }
1048         if_qflush(ifp);
1049 }
1050 #endif /* ~IGB_LEGACY_TX */
1051
1052 /*********************************************************************
1053  *  Ioctl entry point
1054  *
1055  *  igb_ioctl is called when the user wants to configure the
1056  *  interface.
1057  *
1058  *  return 0 on success, positive on failure
1059  **********************************************************************/
1060
1061 static int
1062 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1063 {
1064         struct adapter  *adapter = ifp->if_softc;
1065         struct ifreq    *ifr = (struct ifreq *)data;
1066 #if defined(INET) || defined(INET6)
1067         struct ifaddr   *ifa = (struct ifaddr *)data;
1068 #endif
1069         bool            avoid_reset = FALSE;
1070         int             error = 0;
1071
1072         if (adapter->in_detach)
1073                 return (error);
1074
1075         switch (command) {
1076         case SIOCSIFADDR:
1077 #ifdef INET
1078                 if (ifa->ifa_addr->sa_family == AF_INET)
1079                         avoid_reset = TRUE;
1080 #endif
1081 #ifdef INET6
1082                 if (ifa->ifa_addr->sa_family == AF_INET6)
1083                         avoid_reset = TRUE;
1084 #endif
1085                 /*
1086                 ** Calling init results in link renegotiation,
1087                 ** so we avoid doing it when possible.
1088                 */
1089                 if (avoid_reset) {
1090                         ifp->if_flags |= IFF_UP;
1091                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1092                                 igb_init(adapter);
1093 #ifdef INET
1094                         if (!(ifp->if_flags & IFF_NOARP))
1095                                 arp_ifinit(ifp, ifa);
1096 #endif
1097                 } else
1098                         error = ether_ioctl(ifp, command, data);
1099                 break;
1100         case SIOCSIFMTU:
1101             {
1102                 int max_frame_size;
1103
1104                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1105
1106                 IGB_CORE_LOCK(adapter);
1107                 max_frame_size = 9234;
1108                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1109                     ETHER_CRC_LEN) {
1110                         IGB_CORE_UNLOCK(adapter);
1111                         error = EINVAL;
1112                         break;
1113                 }
1114
1115                 ifp->if_mtu = ifr->ifr_mtu;
1116                 adapter->max_frame_size =
1117                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1118                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1119                         igb_init_locked(adapter);
1120                 IGB_CORE_UNLOCK(adapter);
1121                 break;
1122             }
1123         case SIOCSIFFLAGS:
1124                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1125                     SIOCSIFFLAGS (Set Interface Flags)");
1126                 IGB_CORE_LOCK(adapter);
1127                 if (ifp->if_flags & IFF_UP) {
1128                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1129                                 if ((ifp->if_flags ^ adapter->if_flags) &
1130                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1131                                         igb_disable_promisc(adapter);
1132                                         igb_set_promisc(adapter);
1133                                 }
1134                         } else
1135                                 igb_init_locked(adapter);
1136                 } else
1137                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1138                                 igb_stop(adapter);
1139                 adapter->if_flags = ifp->if_flags;
1140                 IGB_CORE_UNLOCK(adapter);
1141                 break;
1142         case SIOCADDMULTI:
1143         case SIOCDELMULTI:
1144                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1145                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1146                         IGB_CORE_LOCK(adapter);
1147                         igb_disable_intr(adapter);
1148                         igb_set_multi(adapter);
1149 #ifdef DEVICE_POLLING
1150                         if (!(ifp->if_capenable & IFCAP_POLLING))
1151 #endif
1152                                 igb_enable_intr(adapter);
1153                         IGB_CORE_UNLOCK(adapter);
1154                 }
1155                 break;
1156         case SIOCSIFMEDIA:
1157                 /* Check SOL/IDER usage */
1158                 IGB_CORE_LOCK(adapter);
1159                 if (e1000_check_reset_block(&adapter->hw)) {
1160                         IGB_CORE_UNLOCK(adapter);
1161                         device_printf(adapter->dev, "Media change is"
1162                             " blocked due to SOL/IDER session.\n");
1163                         break;
1164                 }
1165                 IGB_CORE_UNLOCK(adapter);
1166         case SIOCGIFMEDIA:
1167                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1168                     SIOCxIFMEDIA (Get/Set Interface Media)");
1169                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1170                 break;
1171         case SIOCSIFCAP:
1172             {
1173                 int mask, reinit;
1174
1175                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1176                 reinit = 0;
1177                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1178 #ifdef DEVICE_POLLING
1179                 if (mask & IFCAP_POLLING) {
1180                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1181                                 error = ether_poll_register(igb_poll, ifp);
1182                                 if (error)
1183                                         return (error);
1184                                 IGB_CORE_LOCK(adapter);
1185                                 igb_disable_intr(adapter);
1186                                 ifp->if_capenable |= IFCAP_POLLING;
1187                                 IGB_CORE_UNLOCK(adapter);
1188                         } else {
1189                                 error = ether_poll_deregister(ifp);
1190                                 /* Enable interrupt even in error case */
1191                                 IGB_CORE_LOCK(adapter);
1192                                 igb_enable_intr(adapter);
1193                                 ifp->if_capenable &= ~IFCAP_POLLING;
1194                                 IGB_CORE_UNLOCK(adapter);
1195                         }
1196                 }
1197 #endif
1198 #if __FreeBSD_version >= 1000000
1199                 /* HW cannot turn these on/off separately */
1200                 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1201                         ifp->if_capenable ^= IFCAP_RXCSUM;
1202                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1203                         reinit = 1;
1204                 }
1205                 if (mask & IFCAP_TXCSUM) {
1206                         ifp->if_capenable ^= IFCAP_TXCSUM;
1207                         reinit = 1;
1208                 }
1209                 if (mask & IFCAP_TXCSUM_IPV6) {
1210                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1211                         reinit = 1;
1212                 }
1213 #else
1214                 if (mask & IFCAP_HWCSUM) {
1215                         ifp->if_capenable ^= IFCAP_HWCSUM;
1216                         reinit = 1;
1217                 }
1218 #endif
1219                 if (mask & IFCAP_TSO4) {
1220                         ifp->if_capenable ^= IFCAP_TSO4;
1221                         reinit = 1;
1222                 }
1223                 if (mask & IFCAP_TSO6) {
1224                         ifp->if_capenable ^= IFCAP_TSO6;
1225                         reinit = 1;
1226                 }
1227                 if (mask & IFCAP_VLAN_HWTAGGING) {
1228                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1229                         reinit = 1;
1230                 }
1231                 if (mask & IFCAP_VLAN_HWFILTER) {
1232                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1233                         reinit = 1;
1234                 }
1235                 if (mask & IFCAP_VLAN_HWTSO) {
1236                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1237                         reinit = 1;
1238                 }
1239                 if (mask & IFCAP_LRO) {
1240                         ifp->if_capenable ^= IFCAP_LRO;
1241                         reinit = 1;
1242                 }
1243                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1244                         igb_init(adapter);
1245                 VLAN_CAPABILITIES(ifp);
1246                 break;
1247             }
1248
1249         default:
1250                 error = ether_ioctl(ifp, command, data);
1251                 break;
1252         }
1253
1254         return (error);
1255 }
1256
1257
1258 /*********************************************************************
1259  *  Init entry point
1260  *
1261  *  This routine is used in two ways. It is used by the stack as
1262  *  init entry point in network interface structure. It is also used
1263  *  by the driver as a hw/sw initialization routine to get to a
1264  *  consistent state.
1265  *
1266  *  return 0 on success, positive on failure
1267  **********************************************************************/
1268
1269 static void
1270 igb_init_locked(struct adapter *adapter)
1271 {
1272         struct ifnet    *ifp = adapter->ifp;
1273         device_t        dev = adapter->dev;
1274
1275         INIT_DEBUGOUT("igb_init: begin");
1276
1277         IGB_CORE_LOCK_ASSERT(adapter);
1278
1279         igb_disable_intr(adapter);
1280         callout_stop(&adapter->timer);
1281
1282         /* Get the latest mac address, User can use a LAA */
1283         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1284               ETHER_ADDR_LEN);
1285
1286         /* Put the address into the Receive Address Array */
1287         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1288
1289         igb_reset(adapter);
1290         igb_update_link_status(adapter);
1291
1292         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1293
1294         /* Set hardware offload abilities */
1295         ifp->if_hwassist = 0;
1296         if (ifp->if_capenable & IFCAP_TXCSUM) {
1297 #if __FreeBSD_version >= 1000000
1298                 ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
1299                 if (adapter->hw.mac.type != e1000_82575)
1300                         ifp->if_hwassist |= CSUM_IP_SCTP;
1301 #else
1302                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1303 #if __FreeBSD_version >= 800000
1304                 if (adapter->hw.mac.type != e1000_82575)
1305                         ifp->if_hwassist |= CSUM_SCTP;
1306 #endif
1307 #endif
1308         }
1309
1310 #if __FreeBSD_version >= 1000000
1311         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
1312                 ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
1313                 if (adapter->hw.mac.type != e1000_82575)
1314                         ifp->if_hwassist |= CSUM_IP6_SCTP;
1315         }
1316 #endif
1317         if (ifp->if_capenable & IFCAP_TSO)
1318                 ifp->if_hwassist |= CSUM_TSO;
1319
1320         /* Clear bad data from Rx FIFOs */
1321         e1000_rx_fifo_flush_82575(&adapter->hw);
1322
1323         /* Configure for OS presence */
1324         igb_init_manageability(adapter);
1325
1326         /* Prepare transmit descriptors and buffers */
1327         igb_setup_transmit_structures(adapter);
1328         igb_initialize_transmit_units(adapter);
1329
1330         /* Setup Multicast table */
1331         igb_set_multi(adapter);
1332
1333         /*
1334         ** Figure out the desired mbuf pool
1335         ** for doing jumbo/packetsplit
1336         */
1337         if (adapter->max_frame_size <= 2048)
1338                 adapter->rx_mbuf_sz = MCLBYTES;
1339         else if (adapter->max_frame_size <= 4096)
1340                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1341         else
1342                 adapter->rx_mbuf_sz = MJUM9BYTES;
1343
1344         /* Prepare receive descriptors and buffers */
1345         if (igb_setup_receive_structures(adapter)) {
1346                 device_printf(dev, "Could not setup receive structures\n");
1347                 return;
1348         }
1349         igb_initialize_receive_units(adapter);
1350
1351         /* Enable VLAN support */
1352         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1353                 igb_setup_vlan_hw_support(adapter);
1354                                 
1355         /* Don't lose promiscuous settings */
1356         igb_set_promisc(adapter);
1357
1358         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1359         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1360
1361         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1362         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1363
1364         if (adapter->msix > 1) /* Set up queue routing */
1365                 igb_configure_queues(adapter);
1366
1367         /* this clears any pending interrupts */
1368         E1000_READ_REG(&adapter->hw, E1000_ICR);
1369 #ifdef DEVICE_POLLING
1370         /*
1371          * Only enable interrupts if we are not polling, make sure
1372          * they are off otherwise.
1373          */
1374         if (ifp->if_capenable & IFCAP_POLLING)
1375                 igb_disable_intr(adapter);
1376         else
1377 #endif /* DEVICE_POLLING */
1378         {
1379                 igb_enable_intr(adapter);
1380                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1381         }
1382
1383         /* Set Energy Efficient Ethernet */
1384         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1385                 if (adapter->hw.mac.type == e1000_i354)
1386                         e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1387                 else
1388                         e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1389         }
1390 }
1391
1392 static void
1393 igb_init(void *arg)
1394 {
1395         struct adapter *adapter = arg;
1396
1397         IGB_CORE_LOCK(adapter);
1398         igb_init_locked(adapter);
1399         IGB_CORE_UNLOCK(adapter);
1400 }
1401
1402
1403 static void
1404 igb_handle_que(void *context, int pending)
1405 {
1406         struct igb_queue *que = context;
1407         struct adapter *adapter = que->adapter;
1408         struct tx_ring *txr = que->txr;
1409         struct ifnet    *ifp = adapter->ifp;
1410
1411         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1412                 bool    more;
1413
1414                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1415
1416                 IGB_TX_LOCK(txr);
1417                 igb_txeof(txr);
1418 #ifndef IGB_LEGACY_TX
1419                 /* Process the stack queue only if not depleted */
1420                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1421                     !drbr_empty(ifp, txr->br))
1422                         igb_mq_start_locked(ifp, txr);
1423 #else
1424                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1425                         igb_start_locked(txr, ifp);
1426 #endif
1427                 IGB_TX_UNLOCK(txr);
1428                 /* Do we need another? */
1429                 if (more) {
1430                         taskqueue_enqueue(que->tq, &que->que_task);
1431                         return;
1432                 }
1433         }
1434
1435 #ifdef DEVICE_POLLING
1436         if (ifp->if_capenable & IFCAP_POLLING)
1437                 return;
1438 #endif
1439         /* Reenable this interrupt */
1440         if (que->eims)
1441                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1442         else
1443                 igb_enable_intr(adapter);
1444 }
1445
1446 /* Deal with link in a sleepable context */
1447 static void
1448 igb_handle_link(void *context, int pending)
1449 {
1450         struct adapter *adapter = context;
1451
1452         IGB_CORE_LOCK(adapter);
1453         igb_handle_link_locked(adapter);
1454         IGB_CORE_UNLOCK(adapter);
1455 }
1456
1457 static void
1458 igb_handle_link_locked(struct adapter *adapter)
1459 {
1460         struct tx_ring  *txr = adapter->tx_rings;
1461         struct ifnet *ifp = adapter->ifp;
1462
1463         IGB_CORE_LOCK_ASSERT(adapter);
1464         adapter->hw.mac.get_link_status = 1;
1465         igb_update_link_status(adapter);
1466         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1467                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1468                         IGB_TX_LOCK(txr);
1469 #ifndef IGB_LEGACY_TX
1470                         /* Process the stack queue only if not depleted */
1471                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1472                             !drbr_empty(ifp, txr->br))
1473                                 igb_mq_start_locked(ifp, txr);
1474 #else
1475                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1476                                 igb_start_locked(txr, ifp);
1477 #endif
1478                         IGB_TX_UNLOCK(txr);
1479                 }
1480         }
1481 }
1482
1483 /*********************************************************************
1484  *
1485  *  MSI/Legacy Deferred
1486  *  Interrupt Service routine  
1487  *
1488  *********************************************************************/
1489 static int
1490 igb_irq_fast(void *arg)
1491 {
1492         struct adapter          *adapter = arg;
1493         struct igb_queue        *que = adapter->queues;
1494         u32                     reg_icr;
1495
1496
1497         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1498
1499         /* Hot eject?  */
1500         if (reg_icr == 0xffffffff)
1501                 return FILTER_STRAY;
1502
1503         /* Definitely not our interrupt.  */
1504         if (reg_icr == 0x0)
1505                 return FILTER_STRAY;
1506
1507         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1508                 return FILTER_STRAY;
1509
1510         /*
1511          * Mask interrupts until the taskqueue is finished running.  This is
1512          * cheap, just assume that it is needed.  This also works around the
1513          * MSI message reordering errata on certain systems.
1514          */
1515         igb_disable_intr(adapter);
1516         taskqueue_enqueue(que->tq, &que->que_task);
1517
1518         /* Link status change */
1519         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1520                 taskqueue_enqueue(que->tq, &adapter->link_task);
1521
1522         if (reg_icr & E1000_ICR_RXO)
1523                 adapter->rx_overruns++;
1524         return FILTER_HANDLED;
1525 }
1526
1527 #ifdef DEVICE_POLLING
1528 #if __FreeBSD_version >= 800000
1529 #define POLL_RETURN_COUNT(a) (a)
1530 static int
1531 #else
1532 #define POLL_RETURN_COUNT(a)
1533 static void
1534 #endif
1535 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1536 {
1537         struct adapter          *adapter = ifp->if_softc;
1538         struct igb_queue        *que;
1539         struct tx_ring          *txr;
1540         u32                     reg_icr, rx_done = 0;
1541         u32                     loop = IGB_MAX_LOOP;
1542         bool                    more;
1543
1544         IGB_CORE_LOCK(adapter);
1545         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1546                 IGB_CORE_UNLOCK(adapter);
1547                 return POLL_RETURN_COUNT(rx_done);
1548         }
1549
1550         if (cmd == POLL_AND_CHECK_STATUS) {
1551                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1552                 /* Link status change */
1553                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1554                         igb_handle_link_locked(adapter);
1555
1556                 if (reg_icr & E1000_ICR_RXO)
1557                         adapter->rx_overruns++;
1558         }
1559         IGB_CORE_UNLOCK(adapter);
1560
1561         for (int i = 0; i < adapter->num_queues; i++) {
1562                 que = &adapter->queues[i];
1563                 txr = que->txr;
1564
1565                 igb_rxeof(que, count, &rx_done);
1566
1567                 IGB_TX_LOCK(txr);
1568                 do {
1569                         more = igb_txeof(txr);
1570                 } while (loop-- && more);
1571 #ifndef IGB_LEGACY_TX
1572                 if (!drbr_empty(ifp, txr->br))
1573                         igb_mq_start_locked(ifp, txr);
1574 #else
1575                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1576                         igb_start_locked(txr, ifp);
1577 #endif
1578                 IGB_TX_UNLOCK(txr);
1579         }
1580
1581         return POLL_RETURN_COUNT(rx_done);
1582 }
1583 #endif /* DEVICE_POLLING */
1584
1585 /*********************************************************************
1586  *
1587  *  MSIX Que Interrupt Service routine
1588  *
1589  **********************************************************************/
1590 static void
1591 igb_msix_que(void *arg)
1592 {
1593         struct igb_queue *que = arg;
1594         struct adapter *adapter = que->adapter;
1595         struct ifnet   *ifp = adapter->ifp;
1596         struct tx_ring *txr = que->txr;
1597         struct rx_ring *rxr = que->rxr;
1598         u32             newitr = 0;
1599         bool            more_rx;
1600
1601         /* Ignore spurious interrupts */
1602         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1603                 return;
1604
1605         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1606         ++que->irqs;
1607
1608         IGB_TX_LOCK(txr);
1609         igb_txeof(txr);
1610 #ifndef IGB_LEGACY_TX
1611         /* Process the stack queue only if not depleted */
1612         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1613             !drbr_empty(ifp, txr->br))
1614                 igb_mq_start_locked(ifp, txr);
1615 #else
1616         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1617                 igb_start_locked(txr, ifp);
1618 #endif
1619         IGB_TX_UNLOCK(txr);
1620
1621         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1622
1623         if (adapter->enable_aim == FALSE)
1624                 goto no_calc;
1625         /*
1626         ** Do Adaptive Interrupt Moderation:
1627         **  - Write out last calculated setting
1628         **  - Calculate based on average size over
1629         **    the last interval.
1630         */
1631         if (que->eitr_setting)
1632                 E1000_WRITE_REG(&adapter->hw,
1633                     E1000_EITR(que->msix), que->eitr_setting);
1634  
1635         que->eitr_setting = 0;
1636
1637         /* Idle, do nothing */
1638         if ((txr->bytes == 0) && (rxr->bytes == 0))
1639                 goto no_calc;
1640                                 
1641         /* Used half Default if sub-gig */
1642         if (adapter->link_speed != 1000)
1643                 newitr = IGB_DEFAULT_ITR / 2;
1644         else {
1645                 if ((txr->bytes) && (txr->packets))
1646                         newitr = txr->bytes/txr->packets;
1647                 if ((rxr->bytes) && (rxr->packets))
1648                         newitr = max(newitr,
1649                             (rxr->bytes / rxr->packets));
1650                 newitr += 24; /* account for hardware frame, crc */
1651                 /* set an upper boundary */
1652                 newitr = min(newitr, 3000);
1653                 /* Be nice to the mid range */
1654                 if ((newitr > 300) && (newitr < 1200))
1655                         newitr = (newitr / 3);
1656                 else
1657                         newitr = (newitr / 2);
1658         }
1659         newitr &= 0x7FFC;  /* Mask invalid bits */
1660         if (adapter->hw.mac.type == e1000_82575)
1661                 newitr |= newitr << 16;
1662         else
1663                 newitr |= E1000_EITR_CNT_IGNR;
1664                  
1665         /* save for next interrupt */
1666         que->eitr_setting = newitr;
1667
1668         /* Reset state */
1669         txr->bytes = 0;
1670         txr->packets = 0;
1671         rxr->bytes = 0;
1672         rxr->packets = 0;
1673
1674 no_calc:
1675         /* Schedule a clean task if needed*/
1676         if (more_rx)
1677                 taskqueue_enqueue(que->tq, &que->que_task);
1678         else
1679                 /* Reenable this interrupt */
1680                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1681         return;
1682 }
1683
1684
1685 /*********************************************************************
1686  *
1687  *  MSIX Link Interrupt Service routine
1688  *
1689  **********************************************************************/
1690
1691 static void
1692 igb_msix_link(void *arg)
1693 {
1694         struct adapter  *adapter = arg;
1695         u32             icr;
1696
1697         ++adapter->link_irq;
1698         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1699         if (!(icr & E1000_ICR_LSC))
1700                 goto spurious;
1701         igb_handle_link(adapter, 0);
1702
1703 spurious:
1704         /* Rearm */
1705         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1706         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1707         return;
1708 }
1709
1710
1711 /*********************************************************************
1712  *
1713  *  Media Ioctl callback
1714  *
1715  *  This routine is called whenever the user queries the status of
1716  *  the interface using ifconfig.
1717  *
1718  **********************************************************************/
1719 static void
1720 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1721 {
1722         struct adapter *adapter = ifp->if_softc;
1723
1724         INIT_DEBUGOUT("igb_media_status: begin");
1725
1726         IGB_CORE_LOCK(adapter);
1727         igb_update_link_status(adapter);
1728
1729         ifmr->ifm_status = IFM_AVALID;
1730         ifmr->ifm_active = IFM_ETHER;
1731
1732         if (!adapter->link_active) {
1733                 IGB_CORE_UNLOCK(adapter);
1734                 return;
1735         }
1736
1737         ifmr->ifm_status |= IFM_ACTIVE;
1738
1739         switch (adapter->link_speed) {
1740         case 10:
1741                 ifmr->ifm_active |= IFM_10_T;
1742                 break;
1743         case 100:
1744                 /*
1745                 ** Support for 100Mb SFP - these are Fiber 
1746                 ** but the media type appears as serdes
1747                 */
1748                 if (adapter->hw.phy.media_type ==
1749                     e1000_media_type_internal_serdes)
1750                         ifmr->ifm_active |= IFM_100_FX;
1751                 else
1752                         ifmr->ifm_active |= IFM_100_TX;
1753                 break;
1754         case 1000:
1755                 ifmr->ifm_active |= IFM_1000_T;
1756                 break;
1757         case 2500:
1758                 ifmr->ifm_active |= IFM_2500_SX;
1759                 break;
1760         }
1761
1762         if (adapter->link_duplex == FULL_DUPLEX)
1763                 ifmr->ifm_active |= IFM_FDX;
1764         else
1765                 ifmr->ifm_active |= IFM_HDX;
1766
1767         IGB_CORE_UNLOCK(adapter);
1768 }
1769
1770 /*********************************************************************
1771  *
1772  *  Media Ioctl callback
1773  *
1774  *  This routine is called when the user changes speed/duplex using
1775  *  media/mediopt option with ifconfig.
1776  *
1777  **********************************************************************/
1778 static int
1779 igb_media_change(struct ifnet *ifp)
1780 {
1781         struct adapter *adapter = ifp->if_softc;
1782         struct ifmedia  *ifm = &adapter->media;
1783
1784         INIT_DEBUGOUT("igb_media_change: begin");
1785
1786         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1787                 return (EINVAL);
1788
1789         IGB_CORE_LOCK(adapter);
1790         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1791         case IFM_AUTO:
1792                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1793                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1794                 break;
1795         case IFM_1000_LX:
1796         case IFM_1000_SX:
1797         case IFM_1000_T:
1798                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1799                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1800                 break;
1801         case IFM_100_TX:
1802                 adapter->hw.mac.autoneg = FALSE;
1803                 adapter->hw.phy.autoneg_advertised = 0;
1804                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1805                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1806                 else
1807                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1808                 break;
1809         case IFM_10_T:
1810                 adapter->hw.mac.autoneg = FALSE;
1811                 adapter->hw.phy.autoneg_advertised = 0;
1812                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1813                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1814                 else
1815                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1816                 break;
1817         default:
1818                 device_printf(adapter->dev, "Unsupported media type\n");
1819         }
1820
1821         igb_init_locked(adapter);
1822         IGB_CORE_UNLOCK(adapter);
1823
1824         return (0);
1825 }
1826
1827
1828 /*********************************************************************
1829  *
1830  *  This routine maps the mbufs to Advanced TX descriptors.
1831  *  
1832  **********************************************************************/
1833 static int
1834 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1835 {
1836         struct adapter  *adapter = txr->adapter;
1837         u32             olinfo_status = 0, cmd_type_len;
1838         int             i, j, error, nsegs;
1839         int             first;
1840         bool            remap = TRUE;
1841         struct mbuf     *m_head;
1842         bus_dma_segment_t segs[IGB_MAX_SCATTER];
1843         bus_dmamap_t    map;
1844         struct igb_tx_buf *txbuf;
1845         union e1000_adv_tx_desc *txd = NULL;
1846
1847         m_head = *m_headp;
1848
1849         /* Basic descriptor defines */
1850         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1851             E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1852
1853         if (m_head->m_flags & M_VLANTAG)
1854                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1855
1856         /*
1857          * Important to capture the first descriptor
1858          * used because it will contain the index of
1859          * the one we tell the hardware to report back
1860          */
1861         first = txr->next_avail_desc;
1862         txbuf = &txr->tx_buffers[first];
1863         map = txbuf->map;
1864
1865         /*
1866          * Map the packet for DMA.
1867          */
1868 retry:
1869         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1870             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1871
1872         if (__predict_false(error)) {
1873                 struct mbuf *m;
1874
1875                 switch (error) {
1876                 case EFBIG:
1877                         /* Try it again? - one try */
1878                         if (remap == TRUE) {
1879                                 remap = FALSE;
1880                                 m = m_collapse(*m_headp, M_NOWAIT,
1881                                     IGB_MAX_SCATTER);
1882                                 if (m == NULL) {
1883                                         adapter->mbuf_defrag_failed++;
1884                                         m_freem(*m_headp);
1885                                         *m_headp = NULL;
1886                                         return (ENOBUFS);
1887                                 }
1888                                 *m_headp = m;
1889                                 goto retry;
1890                         } else
1891                                 return (error);
1892                 default:
1893                         txr->no_tx_dma_setup++;
1894                         m_freem(*m_headp);
1895                         *m_headp = NULL;
1896                         return (error);
1897                 }
1898         }
1899
1900         /* Make certain there are enough descriptors */
1901         if (txr->tx_avail < (nsegs + 2)) {
1902                 txr->no_desc_avail++;
1903                 bus_dmamap_unload(txr->txtag, map);
1904                 return (ENOBUFS);
1905         }
1906         m_head = *m_headp;
1907
1908         /*
1909         ** Set up the appropriate offload context
1910         ** this will consume the first descriptor
1911         */
1912         error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1913         if (__predict_false(error)) {
1914                 m_freem(*m_headp);
1915                 *m_headp = NULL;
1916                 return (error);
1917         }
1918
1919         /* 82575 needs the queue index added */
1920         if (adapter->hw.mac.type == e1000_82575)
1921                 olinfo_status |= txr->me << 4;
1922
1923         i = txr->next_avail_desc;
1924         for (j = 0; j < nsegs; j++) {
1925                 bus_size_t seglen;
1926                 bus_addr_t segaddr;
1927
1928                 txbuf = &txr->tx_buffers[i];
1929                 txd = &txr->tx_base[i];
1930                 seglen = segs[j].ds_len;
1931                 segaddr = htole64(segs[j].ds_addr);
1932
1933                 txd->read.buffer_addr = segaddr;
1934                 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1935                     cmd_type_len | seglen);
1936                 txd->read.olinfo_status = htole32(olinfo_status);
1937
1938                 if (++i == txr->num_desc)
1939                         i = 0;
1940         }
1941
1942         txd->read.cmd_type_len |=
1943             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1944         txr->tx_avail -= nsegs;
1945         txr->next_avail_desc = i;
1946
1947         txbuf->m_head = m_head;
1948         /*
1949         ** Here we swap the map so the last descriptor,
1950         ** which gets the completion interrupt has the
1951         ** real map, and the first descriptor gets the
1952         ** unused map from this descriptor.
1953         */
1954         txr->tx_buffers[first].map = txbuf->map;
1955         txbuf->map = map;
1956         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1957
1958         /* Set the EOP descriptor that will be marked done */
1959         txbuf = &txr->tx_buffers[first];
1960         txbuf->eop = txd;
1961
1962         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1963             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1964         /*
1965          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1966          * hardware that this frame is available to transmit.
1967          */
1968         ++txr->total_packets;
1969         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1970
1971         return (0);
1972 }
1973 static void
1974 igb_set_promisc(struct adapter *adapter)
1975 {
1976         struct ifnet    *ifp = adapter->ifp;
1977         struct e1000_hw *hw = &adapter->hw;
1978         u32             reg;
1979
1980         if (adapter->vf_ifp) {
1981                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1982                 return;
1983         }
1984
1985         reg = E1000_READ_REG(hw, E1000_RCTL);
1986         if (ifp->if_flags & IFF_PROMISC) {
1987                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1988                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1989         } else if (ifp->if_flags & IFF_ALLMULTI) {
1990                 reg |= E1000_RCTL_MPE;
1991                 reg &= ~E1000_RCTL_UPE;
1992                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1993         }
1994 }
1995
1996 static void
1997 igb_disable_promisc(struct adapter *adapter)
1998 {
1999         struct e1000_hw *hw = &adapter->hw;
2000         struct ifnet    *ifp = adapter->ifp;
2001         u32             reg;
2002         int             mcnt = 0;
2003
2004         if (adapter->vf_ifp) {
2005                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2006                 return;
2007         }
2008         reg = E1000_READ_REG(hw, E1000_RCTL);
2009         reg &=  (~E1000_RCTL_UPE);
2010         if (ifp->if_flags & IFF_ALLMULTI)
2011                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2012         else {
2013                 struct  ifmultiaddr *ifma;
2014 #if __FreeBSD_version < 800000
2015                 IF_ADDR_LOCK(ifp);
2016 #else   
2017                 if_maddr_rlock(ifp);
2018 #endif
2019                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2020                         if (ifma->ifma_addr->sa_family != AF_LINK)
2021                                 continue;
2022                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2023                                 break;
2024                         mcnt++;
2025                 }
2026 #if __FreeBSD_version < 800000
2027                 IF_ADDR_UNLOCK(ifp);
2028 #else
2029                 if_maddr_runlock(ifp);
2030 #endif
2031         }
2032         /* Don't disable if in MAX groups */
2033         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2034                 reg &=  (~E1000_RCTL_MPE);
2035         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2036 }
2037
2038
2039 /*********************************************************************
2040  *  Multicast Update
2041  *
2042  *  This routine is called whenever multicast address list is updated.
2043  *
2044  **********************************************************************/
2045
2046 static void
2047 igb_set_multi(struct adapter *adapter)
2048 {
2049         struct ifnet    *ifp = adapter->ifp;
2050         struct ifmultiaddr *ifma;
2051         u32 reg_rctl = 0;
2052         u8  *mta;
2053
2054         int mcnt = 0;
2055
2056         IOCTL_DEBUGOUT("igb_set_multi: begin");
2057
2058         mta = adapter->mta;
2059         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2060             MAX_NUM_MULTICAST_ADDRESSES);
2061
2062 #if __FreeBSD_version < 800000
2063         IF_ADDR_LOCK(ifp);
2064 #else
2065         if_maddr_rlock(ifp);
2066 #endif
2067         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2068                 if (ifma->ifma_addr->sa_family != AF_LINK)
2069                         continue;
2070
2071                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2072                         break;
2073
2074                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2075                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2076                 mcnt++;
2077         }
2078 #if __FreeBSD_version < 800000
2079         IF_ADDR_UNLOCK(ifp);
2080 #else
2081         if_maddr_runlock(ifp);
2082 #endif
2083
2084         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2085                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2086                 reg_rctl |= E1000_RCTL_MPE;
2087                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2088         } else
2089                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2090 }
2091
2092
2093 /*********************************************************************
2094  *  Timer routine:
2095  *      This routine checks for link status,
2096  *      updates statistics, and does the watchdog.
2097  *
2098  **********************************************************************/
2099
2100 static void
2101 igb_local_timer(void *arg)
2102 {
2103         struct adapter          *adapter = arg;
2104         device_t                dev = adapter->dev;
2105         struct ifnet            *ifp = adapter->ifp;
2106         struct tx_ring          *txr = adapter->tx_rings;
2107         struct igb_queue        *que = adapter->queues;
2108         int                     hung = 0, busy = 0;
2109
2110
2111         IGB_CORE_LOCK_ASSERT(adapter);
2112
2113         igb_update_link_status(adapter);
2114         igb_update_stats_counters(adapter);
2115
2116         /*
2117         ** Check the TX queues status
2118         **      - central locked handling of OACTIVE
2119         **      - watchdog only if all queues show hung
2120         */
2121         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2122                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2123                     (adapter->pause_frames == 0))
2124                         ++hung;
2125                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2126                         ++busy;
2127                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2128                         taskqueue_enqueue(que->tq, &que->que_task);
2129         }
2130         if (hung == adapter->num_queues)
2131                 goto timeout;
2132         if (busy == adapter->num_queues)
2133                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2134         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2135             (busy < adapter->num_queues))
2136                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2137
2138         adapter->pause_frames = 0;
2139         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2140 #ifndef DEVICE_POLLING
2141         /* Schedule all queue interrupts - deadlock protection */
2142         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2143 #endif
2144         return;
2145
2146 timeout:
2147         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2148         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2149             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2150             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2151         device_printf(dev,"TX(%d) desc avail = %d,"
2152             "Next TX to Clean = %d\n",
2153             txr->me, txr->tx_avail, txr->next_to_clean);
2154         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2155         adapter->watchdog_events++;
2156         igb_init_locked(adapter);
2157 }
2158
2159 static void
2160 igb_update_link_status(struct adapter *adapter)
2161 {
2162         struct e1000_hw         *hw = &adapter->hw;
2163         struct e1000_fc_info    *fc = &hw->fc;
2164         struct ifnet            *ifp = adapter->ifp;
2165         device_t                dev = adapter->dev;
2166         struct tx_ring          *txr = adapter->tx_rings;
2167         u32                     link_check, thstat, ctrl;
2168         char                    *flowctl = NULL;
2169
2170         link_check = thstat = ctrl = 0;
2171
2172         /* Get the cached link value or read for real */
2173         switch (hw->phy.media_type) {
2174         case e1000_media_type_copper:
2175                 if (hw->mac.get_link_status) {
2176                         /* Do the work to read phy */
2177                         e1000_check_for_link(hw);
2178                         link_check = !hw->mac.get_link_status;
2179                 } else
2180                         link_check = TRUE;
2181                 break;
2182         case e1000_media_type_fiber:
2183                 e1000_check_for_link(hw);
2184                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2185                                  E1000_STATUS_LU);
2186                 break;
2187         case e1000_media_type_internal_serdes:
2188                 e1000_check_for_link(hw);
2189                 link_check = adapter->hw.mac.serdes_has_link;
2190                 break;
2191         /* VF device is type_unknown */
2192         case e1000_media_type_unknown:
2193                 e1000_check_for_link(hw);
2194                 link_check = !hw->mac.get_link_status;
2195                 /* Fall thru */
2196         default:
2197                 break;
2198         }
2199
2200         /* Check for thermal downshift or shutdown */
2201         if (hw->mac.type == e1000_i350) {
2202                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2203                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2204         }
2205
2206         /* Get the flow control for display */
2207         switch (fc->current_mode) {
2208         case e1000_fc_rx_pause:
2209                 flowctl = "RX";
2210                 break;  
2211         case e1000_fc_tx_pause:
2212                 flowctl = "TX";
2213                 break;  
2214         case e1000_fc_full:
2215                 flowctl = "Full";
2216                 break;  
2217         case e1000_fc_none:
2218         default:
2219                 flowctl = "None";
2220                 break;  
2221         }
2222
2223         /* Now we check if a transition has happened */
2224         if (link_check && (adapter->link_active == 0)) {
2225                 e1000_get_speed_and_duplex(&adapter->hw, 
2226                     &adapter->link_speed, &adapter->link_duplex);
2227                 if (bootverbose)
2228                         device_printf(dev, "Link is up %d Mbps %s,"
2229                             " Flow Control: %s\n",
2230                             adapter->link_speed,
2231                             ((adapter->link_duplex == FULL_DUPLEX) ?
2232                             "Full Duplex" : "Half Duplex"), flowctl);
2233                 adapter->link_active = 1;
2234                 ifp->if_baudrate = adapter->link_speed * 1000000;
2235                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2236                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2237                         device_printf(dev, "Link: thermal downshift\n");
2238                 /* Delay Link Up for Phy update */
2239                 if (((hw->mac.type == e1000_i210) ||
2240                     (hw->mac.type == e1000_i211)) &&
2241                     (hw->phy.id == I210_I_PHY_ID))
2242                         msec_delay(I210_LINK_DELAY);
2243                 /* Reset if the media type changed. */
2244                 if (hw->dev_spec._82575.media_changed) {
2245                         hw->dev_spec._82575.media_changed = false;
2246                         adapter->flags |= IGB_MEDIA_RESET;
2247                         igb_reset(adapter);
2248                 }       
2249                 /* This can sleep */
2250                 if_link_state_change(ifp, LINK_STATE_UP);
2251         } else if (!link_check && (adapter->link_active == 1)) {
2252                 ifp->if_baudrate = adapter->link_speed = 0;
2253                 adapter->link_duplex = 0;
2254                 if (bootverbose)
2255                         device_printf(dev, "Link is Down\n");
2256                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2257                     (thstat & E1000_THSTAT_PWR_DOWN))
2258                         device_printf(dev, "Link: thermal shutdown\n");
2259                 adapter->link_active = 0;
2260                 /* This can sleep */
2261                 if_link_state_change(ifp, LINK_STATE_DOWN);
2262                 /* Reset queue state */
2263                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2264                         txr->queue_status = IGB_QUEUE_IDLE;
2265         }
2266 }
2267
2268 /*********************************************************************
2269  *
2270  *  This routine disables all traffic on the adapter by issuing a
2271  *  global reset on the MAC and deallocates TX/RX buffers.
2272  *
2273  **********************************************************************/
2274
2275 static void
2276 igb_stop(void *arg)
2277 {
2278         struct adapter  *adapter = arg;
2279         struct ifnet    *ifp = adapter->ifp;
2280         struct tx_ring *txr = adapter->tx_rings;
2281
2282         IGB_CORE_LOCK_ASSERT(adapter);
2283
2284         INIT_DEBUGOUT("igb_stop: begin");
2285
2286         igb_disable_intr(adapter);
2287
2288         callout_stop(&adapter->timer);
2289
2290         /* Tell the stack that the interface is no longer active */
2291         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2292         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2293
2294         /* Disarm watchdog timer. */
2295         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2296                 IGB_TX_LOCK(txr);
2297                 txr->queue_status = IGB_QUEUE_IDLE;
2298                 IGB_TX_UNLOCK(txr);
2299         }
2300
2301         e1000_reset_hw(&adapter->hw);
2302         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2303
2304         e1000_led_off(&adapter->hw);
2305         e1000_cleanup_led(&adapter->hw);
2306 }
2307
2308
2309 /*********************************************************************
2310  *
2311  *  Determine hardware revision.
2312  *
2313  **********************************************************************/
2314 static void
2315 igb_identify_hardware(struct adapter *adapter)
2316 {
2317         device_t dev = adapter->dev;
2318
2319         /* Make sure our PCI config space has the necessary stuff set */
2320         pci_enable_busmaster(dev);
2321         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2322
2323         /* Save off the information about this board */
2324         adapter->hw.vendor_id = pci_get_vendor(dev);
2325         adapter->hw.device_id = pci_get_device(dev);
2326         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2327         adapter->hw.subsystem_vendor_id =
2328             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2329         adapter->hw.subsystem_device_id =
2330             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2331
2332         /* Set MAC type early for PCI setup */
2333         e1000_set_mac_type(&adapter->hw);
2334
2335         /* Are we a VF device? */
2336         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2337             (adapter->hw.mac.type == e1000_vfadapt_i350))
2338                 adapter->vf_ifp = 1;
2339         else
2340                 adapter->vf_ifp = 0;
2341 }
2342
2343 static int
2344 igb_allocate_pci_resources(struct adapter *adapter)
2345 {
2346         device_t        dev = adapter->dev;
2347         int             rid;
2348
2349         rid = PCIR_BAR(0);
2350         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2351             &rid, RF_ACTIVE);
2352         if (adapter->pci_mem == NULL) {
2353                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2354                 return (ENXIO);
2355         }
2356         adapter->osdep.mem_bus_space_tag =
2357             rman_get_bustag(adapter->pci_mem);
2358         adapter->osdep.mem_bus_space_handle =
2359             rman_get_bushandle(adapter->pci_mem);
2360         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2361
2362         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2363
2364         /* This will setup either MSI/X or MSI */
2365         adapter->msix = igb_setup_msix(adapter);
2366         adapter->hw.back = &adapter->osdep;
2367
2368         return (0);
2369 }
2370
2371 /*********************************************************************
2372  *
2373  *  Setup the Legacy or MSI Interrupt handler
2374  *
2375  **********************************************************************/
2376 static int
2377 igb_allocate_legacy(struct adapter *adapter)
2378 {
2379         device_t                dev = adapter->dev;
2380         struct igb_queue        *que = adapter->queues;
2381 #ifndef IGB_LEGACY_TX
2382         struct tx_ring          *txr = adapter->tx_rings;
2383 #endif
2384         int                     error, rid = 0;
2385
2386         /* Turn off all interrupts */
2387         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2388
2389         /* MSI RID is 1 */
2390         if (adapter->msix == 1)
2391                 rid = 1;
2392
2393         /* We allocate a single interrupt resource */
2394         adapter->res = bus_alloc_resource_any(dev,
2395             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2396         if (adapter->res == NULL) {
2397                 device_printf(dev, "Unable to allocate bus resource: "
2398                     "interrupt\n");
2399                 return (ENXIO);
2400         }
2401
2402 #ifndef IGB_LEGACY_TX
2403         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2404 #endif
2405
2406         /*
2407          * Try allocating a fast interrupt and the associated deferred
2408          * processing contexts.
2409          */
2410         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2411         /* Make tasklet for deferred link handling */
2412         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2413         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2414             taskqueue_thread_enqueue, &que->tq);
2415         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2416             device_get_nameunit(adapter->dev));
2417         if ((error = bus_setup_intr(dev, adapter->res,
2418             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2419             adapter, &adapter->tag)) != 0) {
2420                 device_printf(dev, "Failed to register fast interrupt "
2421                             "handler: %d\n", error);
2422                 taskqueue_free(que->tq);
2423                 que->tq = NULL;
2424                 return (error);
2425         }
2426
2427         return (0);
2428 }
2429
2430
2431 /*********************************************************************
2432  *
2433  *  Setup the MSIX Queue Interrupt handlers: 
2434  *
2435  **********************************************************************/
2436 static int
2437 igb_allocate_msix(struct adapter *adapter)
2438 {
2439         device_t                dev = adapter->dev;
2440         struct igb_queue        *que = adapter->queues;
2441         int                     error, rid, vector = 0;
2442         int                     cpu_id = 0;
2443 #ifdef  RSS
2444         cpuset_t cpu_mask;
2445 #endif
2446
2447         /* Be sure to start with all interrupts disabled */
2448         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2449         E1000_WRITE_FLUSH(&adapter->hw);
2450
2451 #ifdef  RSS
2452         /*
2453          * If we're doing RSS, the number of queues needs to
2454          * match the number of RSS buckets that are configured.
2455          *
2456          * + If there's more queues than RSS buckets, we'll end
2457          *   up with queues that get no traffic.
2458          *
2459          * + If there's more RSS buckets than queues, we'll end
2460          *   up having multiple RSS buckets map to the same queue,
2461          *   so there'll be some contention.
2462          */
2463         if (adapter->num_queues != rss_getnumbuckets()) {
2464                 device_printf(dev,
2465                     "%s: number of queues (%d) != number of RSS buckets (%d)"
2466                     "; performance will be impacted.\n",
2467                     __func__,
2468                     adapter->num_queues,
2469                     rss_getnumbuckets());
2470         }
2471 #endif
2472
2473         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2474                 rid = vector +1;
2475                 que->res = bus_alloc_resource_any(dev,
2476                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2477                 if (que->res == NULL) {
2478                         device_printf(dev,
2479                             "Unable to allocate bus resource: "
2480                             "MSIX Queue Interrupt\n");
2481                         return (ENXIO);
2482                 }
2483                 error = bus_setup_intr(dev, que->res,
2484                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2485                     igb_msix_que, que, &que->tag);
2486                 if (error) {
2487                         que->res = NULL;
2488                         device_printf(dev, "Failed to register Queue handler");
2489                         return (error);
2490                 }
2491 #if __FreeBSD_version >= 800504
2492                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2493 #endif
2494                 que->msix = vector;
2495                 if (adapter->hw.mac.type == e1000_82575)
2496                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2497                 else
2498                         que->eims = 1 << vector;
2499
2500 #ifdef  RSS
2501                 /*
2502                  * The queue ID is used as the RSS layer bucket ID.
2503                  * We look up the queue ID -> RSS CPU ID and select
2504                  * that.
2505                  */
2506                 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2507 #else
2508                 /*
2509                  * Bind the msix vector, and thus the
2510                  * rings to the corresponding cpu.
2511                  *
2512                  * This just happens to match the default RSS round-robin
2513                  * bucket -> queue -> CPU allocation.
2514                  */
2515                 if (adapter->num_queues > 1) {
2516                         if (igb_last_bind_cpu < 0)
2517                                 igb_last_bind_cpu = CPU_FIRST();
2518                         cpu_id = igb_last_bind_cpu;
2519                 }
2520 #endif
2521
2522                 if (adapter->num_queues > 1) {
2523                         bus_bind_intr(dev, que->res, cpu_id);
2524 #ifdef  RSS
2525                         device_printf(dev,
2526                                 "Bound queue %d to RSS bucket %d\n",
2527                                 i, cpu_id);
2528 #else
2529                         device_printf(dev,
2530                                 "Bound queue %d to cpu %d\n",
2531                                 i, cpu_id);
2532 #endif
2533                 }
2534
2535 #ifndef IGB_LEGACY_TX
2536                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2537                     que->txr);
2538 #endif
2539                 /* Make tasklet for deferred handling */
2540                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2541                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2542                     taskqueue_thread_enqueue, &que->tq);
2543                 if (adapter->num_queues > 1) {
2544                         /*
2545                          * Only pin the taskqueue thread to a CPU if
2546                          * RSS is in use.
2547                          *
2548                          * This again just happens to match the default RSS
2549                          * round-robin bucket -> queue -> CPU allocation.
2550                          */
2551 #ifdef  RSS
2552                         CPU_SETOF(cpu_id, &cpu_mask);
2553                         taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2554                             &cpu_mask,
2555                             "%s que (bucket %d)",
2556                             device_get_nameunit(adapter->dev),
2557                             cpu_id);
2558 #else
2559                         taskqueue_start_threads(&que->tq, 1, PI_NET,
2560                             "%s que (qid %d)",
2561                             device_get_nameunit(adapter->dev),
2562                             cpu_id);
2563 #endif
2564                 } else {
2565                         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2566                             device_get_nameunit(adapter->dev));
2567                 }
2568
2569                 /* Finally update the last bound CPU id */
2570                 if (adapter->num_queues > 1)
2571                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2572         }
2573
2574         /* And Link */
2575         rid = vector + 1;
2576         adapter->res = bus_alloc_resource_any(dev,
2577             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2578         if (adapter->res == NULL) {
2579                 device_printf(dev,
2580                     "Unable to allocate bus resource: "
2581                     "MSIX Link Interrupt\n");
2582                 return (ENXIO);
2583         }
2584         if ((error = bus_setup_intr(dev, adapter->res,
2585             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2586             igb_msix_link, adapter, &adapter->tag)) != 0) {
2587                 device_printf(dev, "Failed to register Link handler");
2588                 return (error);
2589         }
2590 #if __FreeBSD_version >= 800504
2591         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2592 #endif
2593         adapter->linkvec = vector;
2594
2595         return (0);
2596 }
2597
2598
2599 static void
2600 igb_configure_queues(struct adapter *adapter)
2601 {
2602         struct  e1000_hw        *hw = &adapter->hw;
2603         struct  igb_queue       *que;
2604         u32                     tmp, ivar = 0, newitr = 0;
2605
2606         /* First turn on RSS capability */
2607         if (adapter->hw.mac.type != e1000_82575)
2608                 E1000_WRITE_REG(hw, E1000_GPIE,
2609                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2610                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2611
2612         /* Turn on MSIX */
2613         switch (adapter->hw.mac.type) {
2614         case e1000_82580:
2615         case e1000_i350:
2616         case e1000_i354:
2617         case e1000_i210:
2618         case e1000_i211:
2619         case e1000_vfadapt:
2620         case e1000_vfadapt_i350:
2621                 /* RX entries */
2622                 for (int i = 0; i < adapter->num_queues; i++) {
2623                         u32 index = i >> 1;
2624                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2625                         que = &adapter->queues[i];
2626                         if (i & 1) {
2627                                 ivar &= 0xFF00FFFF;
2628                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2629                         } else {
2630                                 ivar &= 0xFFFFFF00;
2631                                 ivar |= que->msix | E1000_IVAR_VALID;
2632                         }
2633                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2634                 }
2635                 /* TX entries */
2636                 for (int i = 0; i < adapter->num_queues; i++) {
2637                         u32 index = i >> 1;
2638                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2639                         que = &adapter->queues[i];
2640                         if (i & 1) {
2641                                 ivar &= 0x00FFFFFF;
2642                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2643                         } else {
2644                                 ivar &= 0xFFFF00FF;
2645                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2646                         }
2647                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2648                         adapter->que_mask |= que->eims;
2649                 }
2650
2651                 /* And for the link interrupt */
2652                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2653                 adapter->link_mask = 1 << adapter->linkvec;
2654                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2655                 break;
2656         case e1000_82576:
2657                 /* RX entries */
2658                 for (int i = 0; i < adapter->num_queues; i++) {
2659                         u32 index = i & 0x7; /* Each IVAR has two entries */
2660                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2661                         que = &adapter->queues[i];
2662                         if (i < 8) {
2663                                 ivar &= 0xFFFFFF00;
2664                                 ivar |= que->msix | E1000_IVAR_VALID;
2665                         } else {
2666                                 ivar &= 0xFF00FFFF;
2667                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2668                         }
2669                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2670                         adapter->que_mask |= que->eims;
2671                 }
2672                 /* TX entries */
2673                 for (int i = 0; i < adapter->num_queues; i++) {
2674                         u32 index = i & 0x7; /* Each IVAR has two entries */
2675                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2676                         que = &adapter->queues[i];
2677                         if (i < 8) {
2678                                 ivar &= 0xFFFF00FF;
2679                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2680                         } else {
2681                                 ivar &= 0x00FFFFFF;
2682                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2683                         }
2684                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2685                         adapter->que_mask |= que->eims;
2686                 }
2687
2688                 /* And for the link interrupt */
2689                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2690                 adapter->link_mask = 1 << adapter->linkvec;
2691                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2692                 break;
2693
2694         case e1000_82575:
2695                 /* enable MSI-X support*/
2696                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2697                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2698                 /* Auto-Mask interrupts upon ICR read. */
2699                 tmp |= E1000_CTRL_EXT_EIAME;
2700                 tmp |= E1000_CTRL_EXT_IRCA;
2701                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2702
2703                 /* Queues */
2704                 for (int i = 0; i < adapter->num_queues; i++) {
2705                         que = &adapter->queues[i];
2706                         tmp = E1000_EICR_RX_QUEUE0 << i;
2707                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2708                         que->eims = tmp;
2709                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2710                             i, que->eims);
2711                         adapter->que_mask |= que->eims;
2712                 }
2713
2714                 /* Link */
2715                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2716                     E1000_EIMS_OTHER);
2717                 adapter->link_mask |= E1000_EIMS_OTHER;
2718         default:
2719                 break;
2720         }
2721
2722         /* Set the starting interrupt rate */
2723         if (igb_max_interrupt_rate > 0)
2724                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2725
2726         if (hw->mac.type == e1000_82575)
2727                 newitr |= newitr << 16;
2728         else
2729                 newitr |= E1000_EITR_CNT_IGNR;
2730
2731         for (int i = 0; i < adapter->num_queues; i++) {
2732                 que = &adapter->queues[i];
2733                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2734         }
2735
2736         return;
2737 }
2738
2739
2740 static void
2741 igb_free_pci_resources(struct adapter *adapter)
2742 {
2743         struct          igb_queue *que = adapter->queues;
2744         device_t        dev = adapter->dev;
2745         int             rid;
2746
2747         /*
2748         ** There is a slight possibility of a failure mode
2749         ** in attach that will result in entering this function
2750         ** before interrupt resources have been initialized, and
2751         ** in that case we do not want to execute the loops below
2752         ** We can detect this reliably by the state of the adapter
2753         ** res pointer.
2754         */
2755         if (adapter->res == NULL)
2756                 goto mem;
2757
2758         /*
2759          * First release all the interrupt resources:
2760          */
2761         for (int i = 0; i < adapter->num_queues; i++, que++) {
2762                 rid = que->msix + 1;
2763                 if (que->tag != NULL) {
2764                         bus_teardown_intr(dev, que->res, que->tag);
2765                         que->tag = NULL;
2766                 }
2767                 if (que->res != NULL)
2768                         bus_release_resource(dev,
2769                             SYS_RES_IRQ, rid, que->res);
2770         }
2771
2772         /* Clean the Legacy or Link interrupt last */
2773         if (adapter->linkvec) /* we are doing MSIX */
2774                 rid = adapter->linkvec + 1;
2775         else
2776                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2777
2778         que = adapter->queues;
2779         if (adapter->tag != NULL) {
2780                 taskqueue_drain(que->tq, &adapter->link_task);
2781                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2782                 adapter->tag = NULL;
2783         }
2784         if (adapter->res != NULL)
2785                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2786
2787         for (int i = 0; i < adapter->num_queues; i++, que++) {
2788                 if (que->tq != NULL) {
2789 #ifndef IGB_LEGACY_TX
2790                         taskqueue_drain(que->tq, &que->txr->txq_task);
2791 #endif
2792                         taskqueue_drain(que->tq, &que->que_task);
2793                         taskqueue_free(que->tq);
2794                 }
2795         }
2796 mem:
2797         if (adapter->msix)
2798                 pci_release_msi(dev);
2799
2800         if (adapter->msix_mem != NULL)
2801                 bus_release_resource(dev, SYS_RES_MEMORY,
2802                     adapter->memrid, adapter->msix_mem);
2803
2804         if (adapter->pci_mem != NULL)
2805                 bus_release_resource(dev, SYS_RES_MEMORY,
2806                     PCIR_BAR(0), adapter->pci_mem);
2807
2808 }
2809
2810 /*
2811  * Setup Either MSI/X or MSI
2812  */
2813 static int
2814 igb_setup_msix(struct adapter *adapter)
2815 {
2816         device_t        dev = adapter->dev;
2817         int             bar, want, queues, msgs, maxqueues;
2818
2819         /* tuneable override */
2820         if (igb_enable_msix == 0)
2821                 goto msi;
2822
2823         /* First try MSI/X */
2824         msgs = pci_msix_count(dev); 
2825         if (msgs == 0)
2826                 goto msi;
2827         /*
2828         ** Some new devices, as with ixgbe, now may
2829         ** use a different BAR, so we need to keep
2830         ** track of which is used.
2831         */
2832         adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2833         bar = pci_read_config(dev, adapter->memrid, 4);
2834         if (bar == 0) /* use next bar */
2835                 adapter->memrid += 4;
2836         adapter->msix_mem = bus_alloc_resource_any(dev,
2837             SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2838         if (adapter->msix_mem == NULL) {
2839                 /* May not be enabled */
2840                 device_printf(adapter->dev,
2841                     "Unable to map MSIX table \n");
2842                 goto msi;
2843         }
2844
2845         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2846
2847         /* Override via tuneable */
2848         if (igb_num_queues != 0)
2849                 queues = igb_num_queues;
2850
2851 #ifdef  RSS
2852         /* If we're doing RSS, clamp at the number of RSS buckets */
2853         if (queues > rss_getnumbuckets())
2854                 queues = rss_getnumbuckets();
2855 #endif
2856
2857
2858         /* Sanity check based on HW */
2859         switch (adapter->hw.mac.type) {
2860                 case e1000_82575:
2861                         maxqueues = 4;
2862                         break;
2863                 case e1000_82576:
2864                 case e1000_82580:
2865                 case e1000_i350:
2866                 case e1000_i354:
2867                         maxqueues = 8;
2868                         break;
2869                 case e1000_i210:
2870                         maxqueues = 4;
2871                         break;
2872                 case e1000_i211:
2873                         maxqueues = 2;
2874                         break;
2875                 default:  /* VF interfaces */
2876                         maxqueues = 1;
2877                         break;
2878         }
2879
2880         /* Final clamp on the actual hardware capability */
2881         if (queues > maxqueues)
2882                 queues = maxqueues;
2883
2884         /*
2885         ** One vector (RX/TX pair) per queue
2886         ** plus an additional for Link interrupt
2887         */
2888         want = queues + 1;
2889         if (msgs >= want)
2890                 msgs = want;
2891         else {
2892                 device_printf(adapter->dev,
2893                     "MSIX Configuration Problem, "
2894                     "%d vectors configured, but %d queues wanted!\n",
2895                     msgs, want);
2896                 goto msi;
2897         }
2898         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2899                 device_printf(adapter->dev,
2900                     "Using MSIX interrupts with %d vectors\n", msgs);
2901                 adapter->num_queues = queues;
2902                 return (msgs);
2903         }
2904         /*
2905         ** If MSIX alloc failed or provided us with
2906         ** less than needed, free and fall through to MSI
2907         */
2908         pci_release_msi(dev);
2909
2910 msi:
2911         if (adapter->msix_mem != NULL) {
2912                 bus_release_resource(dev, SYS_RES_MEMORY,
2913                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2914                 adapter->msix_mem = NULL;
2915         }
2916         msgs = 1;
2917         if (pci_alloc_msi(dev, &msgs) == 0) {
2918                 device_printf(adapter->dev," Using an MSI interrupt\n");
2919                 return (msgs);
2920         }
2921         device_printf(adapter->dev," Using a Legacy interrupt\n");
2922         return (0);
2923 }
2924
2925 /*********************************************************************
2926  *
2927  *  Initialize the DMA Coalescing feature
2928  *
2929  **********************************************************************/
2930 static void
2931 igb_init_dmac(struct adapter *adapter, u32 pba)
2932 {
2933         device_t        dev = adapter->dev;
2934         struct e1000_hw *hw = &adapter->hw;
2935         u32             dmac, reg = ~E1000_DMACR_DMAC_EN;
2936         u16             hwm;
2937
2938         if (hw->mac.type == e1000_i211)
2939                 return;
2940
2941         if (hw->mac.type > e1000_82580) {
2942
2943                 if (adapter->dmac == 0) { /* Disabling it */
2944                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2945                         return;
2946                 } else
2947                         device_printf(dev, "DMA Coalescing enabled\n");
2948
2949                 /* Set starting threshold */
2950                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2951
2952                 hwm = 64 * pba - adapter->max_frame_size / 16;
2953                 if (hwm < 64 * (pba - 6))
2954                         hwm = 64 * (pba - 6);
2955                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2956                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2957                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2958                     & E1000_FCRTC_RTH_COAL_MASK);
2959                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2960
2961
2962                 dmac = pba - adapter->max_frame_size / 512;
2963                 if (dmac < pba - 10)
2964                         dmac = pba - 10;
2965                 reg = E1000_READ_REG(hw, E1000_DMACR);
2966                 reg &= ~E1000_DMACR_DMACTHR_MASK;
2967                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2968                     & E1000_DMACR_DMACTHR_MASK);
2969
2970                 /* transition to L0x or L1 if available..*/
2971                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2972
2973                 /* Check if status is 2.5Gb backplane connection
2974                 * before configuration of watchdog timer, which is
2975                 * in msec values in 12.8usec intervals
2976                 * watchdog timer= msec values in 32usec intervals
2977                 * for non 2.5Gb connection
2978                 */
2979                 if (hw->mac.type == e1000_i354) {
2980                         int status = E1000_READ_REG(hw, E1000_STATUS);
2981                         if ((status & E1000_STATUS_2P5_SKU) &&
2982                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2983                                 reg |= ((adapter->dmac * 5) >> 6);
2984                         else
2985                                 reg |= (adapter->dmac >> 5);
2986                 } else {
2987                         reg |= (adapter->dmac >> 5);
2988                 }
2989
2990                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2991
2992                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2993
2994                 /* Set the interval before transition */
2995                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2996                 if (hw->mac.type == e1000_i350)
2997                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
2998                 /*
2999                 ** in 2.5Gb connection, TTLX unit is 0.4 usec
3000                 ** which is 0x4*2 = 0xA. But delay is still 4 usec
3001                 */
3002                 if (hw->mac.type == e1000_i354) {
3003                         int status = E1000_READ_REG(hw, E1000_STATUS);
3004                         if ((status & E1000_STATUS_2P5_SKU) &&
3005                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
3006                                 reg |= 0xA;
3007                         else
3008                                 reg |= 0x4;
3009                 } else {
3010                         reg |= 0x4;
3011                 }
3012
3013                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3014
3015                 /* free space in tx packet buffer to wake from DMA coal */
3016                 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
3017                     (2 * adapter->max_frame_size)) >> 6);
3018
3019                 /* make low power state decision controlled by DMA coal */
3020                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3021                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3022                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3023
3024         } else if (hw->mac.type == e1000_82580) {
3025                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3026                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3027                     reg & ~E1000_PCIEMISC_LX_DECISION);
3028                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3029         }
3030 }
3031
3032
3033 /*********************************************************************
3034  *
3035  *  Set up an fresh starting state
3036  *
3037  **********************************************************************/
3038 static void
3039 igb_reset(struct adapter *adapter)
3040 {
3041         device_t        dev = adapter->dev;
3042         struct e1000_hw *hw = &adapter->hw;
3043         struct e1000_fc_info *fc = &hw->fc;
3044         struct ifnet    *ifp = adapter->ifp;
3045         u32             pba = 0;
3046         u16             hwm;
3047
3048         INIT_DEBUGOUT("igb_reset: begin");
3049
3050         /* Let the firmware know the OS is in control */
3051         igb_get_hw_control(adapter);
3052
3053         /*
3054          * Packet Buffer Allocation (PBA)
3055          * Writing PBA sets the receive portion of the buffer
3056          * the remainder is used for the transmit buffer.
3057          */
3058         switch (hw->mac.type) {
3059         case e1000_82575:
3060                 pba = E1000_PBA_32K;
3061                 break;
3062         case e1000_82576:
3063         case e1000_vfadapt:
3064                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3065                 pba &= E1000_RXPBS_SIZE_MASK_82576;
3066                 break;
3067         case e1000_82580:
3068         case e1000_i350:
3069         case e1000_i354:
3070         case e1000_vfadapt_i350:
3071                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3072                 pba = e1000_rxpbs_adjust_82580(pba);
3073                 break;
3074         case e1000_i210:
3075         case e1000_i211:
3076                 pba = E1000_PBA_34K;
3077         default:
3078                 break;
3079         }
3080
3081         /* Special needs in case of Jumbo frames */
3082         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3083                 u32 tx_space, min_tx, min_rx;
3084                 pba = E1000_READ_REG(hw, E1000_PBA);
3085                 tx_space = pba >> 16;
3086                 pba &= 0xffff;
3087                 min_tx = (adapter->max_frame_size +
3088                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3089                 min_tx = roundup2(min_tx, 1024);
3090                 min_tx >>= 10;
3091                 min_rx = adapter->max_frame_size;
3092                 min_rx = roundup2(min_rx, 1024);
3093                 min_rx >>= 10;
3094                 if (tx_space < min_tx &&
3095                     ((min_tx - tx_space) < pba)) {
3096                         pba = pba - (min_tx - tx_space);
3097                         /*
3098                          * if short on rx space, rx wins
3099                          * and must trump tx adjustment
3100                          */
3101                         if (pba < min_rx)
3102                                 pba = min_rx;
3103                 }
3104                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3105         }
3106
3107         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3108
3109         /*
3110          * These parameters control the automatic generation (Tx) and
3111          * response (Rx) to Ethernet PAUSE frames.
3112          * - High water mark should allow for at least two frames to be
3113          *   received after sending an XOFF.
3114          * - Low water mark works best when it is very near the high water mark.
3115          *   This allows the receiver to restart by sending XON when it has
3116          *   drained a bit.
3117          */
3118         hwm = min(((pba << 10) * 9 / 10),
3119             ((pba << 10) - 2 * adapter->max_frame_size));
3120
3121         if (hw->mac.type < e1000_82576) {
3122                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3123                 fc->low_water = fc->high_water - 8;
3124         } else {
3125                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3126                 fc->low_water = fc->high_water - 16;
3127         }
3128
3129         fc->pause_time = IGB_FC_PAUSE_TIME;
3130         fc->send_xon = TRUE;
3131         if (adapter->fc)
3132                 fc->requested_mode = adapter->fc;
3133         else
3134                 fc->requested_mode = e1000_fc_default;
3135
3136         /* Issue a global reset */
3137         e1000_reset_hw(hw);
3138         E1000_WRITE_REG(hw, E1000_WUC, 0);
3139
3140         /* Reset for AutoMediaDetect */
3141         if (adapter->flags & IGB_MEDIA_RESET) {
3142                 e1000_setup_init_funcs(hw, TRUE);
3143                 e1000_get_bus_info(hw);
3144                 adapter->flags &= ~IGB_MEDIA_RESET;
3145         }
3146
3147         if (e1000_init_hw(hw) < 0)
3148                 device_printf(dev, "Hardware Initialization Failed\n");
3149
3150         /* Setup DMA Coalescing */
3151         igb_init_dmac(adapter, pba);
3152
3153         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3154         e1000_get_phy_info(hw);
3155         e1000_check_for_link(hw);
3156         return;
3157 }
3158
3159 /*********************************************************************
3160  *
3161  *  Setup networking device structure and register an interface.
3162  *
3163  **********************************************************************/
3164 static int
3165 igb_setup_interface(device_t dev, struct adapter *adapter)
3166 {
3167         struct ifnet   *ifp;
3168
3169         INIT_DEBUGOUT("igb_setup_interface: begin");
3170
3171         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3172         if (ifp == NULL) {
3173                 device_printf(dev, "can not allocate ifnet structure\n");
3174                 return (-1);
3175         }
3176         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3177         ifp->if_init =  igb_init;
3178         ifp->if_softc = adapter;
3179         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3180         ifp->if_ioctl = igb_ioctl;
3181         ifp->if_get_counter = igb_get_counter;
3182
3183         /* TSO parameters */
3184         ifp->if_hw_tsomax = IP_MAXPACKET;
3185         ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3186         ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3187
3188 #ifndef IGB_LEGACY_TX
3189         ifp->if_transmit = igb_mq_start;
3190         ifp->if_qflush = igb_qflush;
3191 #else
3192         ifp->if_start = igb_start;
3193         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3194         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3195         IFQ_SET_READY(&ifp->if_snd);
3196 #endif
3197
3198         ether_ifattach(ifp, adapter->hw.mac.addr);
3199
3200         ifp->if_capabilities = ifp->if_capenable = 0;
3201
3202         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3203 #if __FreeBSD_version >= 1000000
3204         ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
3205 #endif
3206         ifp->if_capabilities |= IFCAP_TSO;
3207         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3208         ifp->if_capenable = ifp->if_capabilities;
3209
3210         /* Don't enable LRO by default */
3211         ifp->if_capabilities |= IFCAP_LRO;
3212
3213 #ifdef DEVICE_POLLING
3214         ifp->if_capabilities |= IFCAP_POLLING;
3215 #endif
3216
3217         /*
3218          * Tell the upper layer(s) we
3219          * support full VLAN capability.
3220          */
3221         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3222         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3223                              |  IFCAP_VLAN_HWTSO
3224                              |  IFCAP_VLAN_MTU;
3225         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3226                           |  IFCAP_VLAN_HWTSO
3227                           |  IFCAP_VLAN_MTU;
3228
3229         /*
3230         ** Don't turn this on by default, if vlans are
3231         ** created on another pseudo device (eg. lagg)
3232         ** then vlan events are not passed thru, breaking
3233         ** operation, but with HW FILTER off it works. If
3234         ** using vlans directly on the igb driver you can
3235         ** enable this and get full hardware tag filtering.
3236         */
3237         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3238
3239         /*
3240          * Specify the media types supported by this adapter and register
3241          * callbacks to update media and link information
3242          */
3243         ifmedia_init(&adapter->media, IFM_IMASK,
3244             igb_media_change, igb_media_status);
3245         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3246             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3247                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3248                             0, NULL);
3249                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3250         } else {
3251                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3252                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3253                             0, NULL);
3254                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3255                             0, NULL);
3256                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3257                             0, NULL);
3258                 if (adapter->hw.phy.type != e1000_phy_ife) {
3259                         ifmedia_add(&adapter->media,
3260                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3261                         ifmedia_add(&adapter->media,
3262                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3263                 }
3264         }
3265         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3266         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3267         return (0);
3268 }
3269
3270
3271 /*
3272  * Manage DMA'able memory.
3273  */
3274 static void
3275 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3276 {
3277         if (error)
3278                 return;
3279         *(bus_addr_t *) arg = segs[0].ds_addr;
3280 }
3281
3282 static int
3283 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3284         struct igb_dma_alloc *dma, int mapflags)
3285 {
3286         int error;
3287
3288         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3289                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3290                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3291                                 BUS_SPACE_MAXADDR,      /* highaddr */
3292                                 NULL, NULL,             /* filter, filterarg */
3293                                 size,                   /* maxsize */
3294                                 1,                      /* nsegments */
3295                                 size,                   /* maxsegsize */
3296                                 0,                      /* flags */
3297                                 NULL,                   /* lockfunc */
3298                                 NULL,                   /* lockarg */
3299                                 &dma->dma_tag);
3300         if (error) {
3301                 device_printf(adapter->dev,
3302                     "%s: bus_dma_tag_create failed: %d\n",
3303                     __func__, error);
3304                 goto fail_0;
3305         }
3306
3307         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3308             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3309         if (error) {
3310                 device_printf(adapter->dev,
3311                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3312                     __func__, (uintmax_t)size, error);
3313                 goto fail_2;
3314         }
3315
3316         dma->dma_paddr = 0;
3317         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3318             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3319         if (error || dma->dma_paddr == 0) {
3320                 device_printf(adapter->dev,
3321                     "%s: bus_dmamap_load failed: %d\n",
3322                     __func__, error);
3323                 goto fail_3;
3324         }
3325
3326         return (0);
3327
3328 fail_3:
3329         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3330 fail_2:
3331         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3332         bus_dma_tag_destroy(dma->dma_tag);
3333 fail_0:
3334         dma->dma_tag = NULL;
3335
3336         return (error);
3337 }
3338
3339 static void
3340 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3341 {
3342         if (dma->dma_tag == NULL)
3343                 return;
3344         if (dma->dma_paddr != 0) {
3345                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3346                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3347                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3348                 dma->dma_paddr = 0;
3349         }
3350         if (dma->dma_vaddr != NULL) {
3351                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3352                 dma->dma_vaddr = NULL;
3353         }
3354         bus_dma_tag_destroy(dma->dma_tag);
3355         dma->dma_tag = NULL;
3356 }
3357
3358
3359 /*********************************************************************
3360  *
3361  *  Allocate memory for the transmit and receive rings, and then
3362  *  the descriptors associated with each, called only once at attach.
3363  *
3364  **********************************************************************/
3365 static int
3366 igb_allocate_queues(struct adapter *adapter)
3367 {
3368         device_t dev = adapter->dev;
3369         struct igb_queue        *que = NULL;
3370         struct tx_ring          *txr = NULL;
3371         struct rx_ring          *rxr = NULL;
3372         int rsize, tsize, error = E1000_SUCCESS;
3373         int txconf = 0, rxconf = 0;
3374
3375         /* First allocate the top level queue structs */
3376         if (!(adapter->queues =
3377             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3378             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3379                 device_printf(dev, "Unable to allocate queue memory\n");
3380                 error = ENOMEM;
3381                 goto fail;
3382         }
3383
3384         /* Next allocate the TX ring struct memory */
3385         if (!(adapter->tx_rings =
3386             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3387             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3388                 device_printf(dev, "Unable to allocate TX ring memory\n");
3389                 error = ENOMEM;
3390                 goto tx_fail;
3391         }
3392
3393         /* Now allocate the RX */
3394         if (!(adapter->rx_rings =
3395             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3396             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3397                 device_printf(dev, "Unable to allocate RX ring memory\n");
3398                 error = ENOMEM;
3399                 goto rx_fail;
3400         }
3401
3402         tsize = roundup2(adapter->num_tx_desc *
3403             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3404         /*
3405          * Now set up the TX queues, txconf is needed to handle the
3406          * possibility that things fail midcourse and we need to
3407          * undo memory gracefully
3408          */ 
3409         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3410                 /* Set up some basics */
3411                 txr = &adapter->tx_rings[i];
3412                 txr->adapter = adapter;
3413                 txr->me = i;
3414                 txr->num_desc = adapter->num_tx_desc;
3415
3416                 /* Initialize the TX lock */
3417                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3418                     device_get_nameunit(dev), txr->me);
3419                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3420
3421                 if (igb_dma_malloc(adapter, tsize,
3422                         &txr->txdma, BUS_DMA_NOWAIT)) {
3423                         device_printf(dev,
3424                             "Unable to allocate TX Descriptor memory\n");
3425                         error = ENOMEM;
3426                         goto err_tx_desc;
3427                 }
3428                 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3429                 bzero((void *)txr->tx_base, tsize);
3430
3431                 /* Now allocate transmit buffers for the ring */
3432                 if (igb_allocate_transmit_buffers(txr)) {
3433                         device_printf(dev,
3434                             "Critical Failure setting up transmit buffers\n");
3435                         error = ENOMEM;
3436                         goto err_tx_desc;
3437                 }
3438 #ifndef IGB_LEGACY_TX
3439                 /* Allocate a buf ring */
3440                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3441                     M_WAITOK, &txr->tx_mtx);
3442 #endif
3443         }
3444
3445         /*
3446          * Next the RX queues...
3447          */ 
3448         rsize = roundup2(adapter->num_rx_desc *
3449             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3450         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3451                 rxr = &adapter->rx_rings[i];
3452                 rxr->adapter = adapter;
3453                 rxr->me = i;
3454
3455                 /* Initialize the RX lock */
3456                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3457                     device_get_nameunit(dev), txr->me);
3458                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3459
3460                 if (igb_dma_malloc(adapter, rsize,
3461                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3462                         device_printf(dev,
3463                             "Unable to allocate RxDescriptor memory\n");
3464                         error = ENOMEM;
3465                         goto err_rx_desc;
3466                 }
3467                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3468                 bzero((void *)rxr->rx_base, rsize);
3469
3470                 /* Allocate receive buffers for the ring*/
3471                 if (igb_allocate_receive_buffers(rxr)) {
3472                         device_printf(dev,
3473                             "Critical Failure setting up receive buffers\n");
3474                         error = ENOMEM;
3475                         goto err_rx_desc;
3476                 }
3477         }
3478
3479         /*
3480         ** Finally set up the queue holding structs
3481         */
3482         for (int i = 0; i < adapter->num_queues; i++) {
3483                 que = &adapter->queues[i];
3484                 que->adapter = adapter;
3485                 que->txr = &adapter->tx_rings[i];
3486                 que->rxr = &adapter->rx_rings[i];
3487         }
3488
3489         return (0);
3490
3491 err_rx_desc:
3492         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3493                 igb_dma_free(adapter, &rxr->rxdma);
3494 err_tx_desc:
3495         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3496                 igb_dma_free(adapter, &txr->txdma);
3497         free(adapter->rx_rings, M_DEVBUF);
3498 rx_fail:
3499 #ifndef IGB_LEGACY_TX
3500         buf_ring_free(txr->br, M_DEVBUF);
3501 #endif
3502         free(adapter->tx_rings, M_DEVBUF);
3503 tx_fail:
3504         free(adapter->queues, M_DEVBUF);
3505 fail:
3506         return (error);
3507 }
3508
3509 /*********************************************************************
3510  *
3511  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3512  *  the information needed to transmit a packet on the wire. This is
3513  *  called only once at attach, setup is done every reset.
3514  *
3515  **********************************************************************/
3516 static int
3517 igb_allocate_transmit_buffers(struct tx_ring *txr)
3518 {
3519         struct adapter *adapter = txr->adapter;
3520         device_t dev = adapter->dev;
3521         struct igb_tx_buf *txbuf;
3522         int error, i;
3523
3524         /*
3525          * Setup DMA descriptor areas.
3526          */
3527         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3528                                1, 0,                    /* alignment, bounds */
3529                                BUS_SPACE_MAXADDR,       /* lowaddr */
3530                                BUS_SPACE_MAXADDR,       /* highaddr */
3531                                NULL, NULL,              /* filter, filterarg */
3532                                IGB_TSO_SIZE,            /* maxsize */
3533                                IGB_MAX_SCATTER,         /* nsegments */
3534                                PAGE_SIZE,               /* maxsegsize */
3535                                0,                       /* flags */
3536                                NULL,                    /* lockfunc */
3537                                NULL,                    /* lockfuncarg */
3538                                &txr->txtag))) {
3539                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3540                 goto fail;
3541         }
3542
3543         if (!(txr->tx_buffers =
3544             (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3545             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3546                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3547                 error = ENOMEM;
3548                 goto fail;
3549         }
3550
3551         /* Create the descriptor buffer dma maps */
3552         txbuf = txr->tx_buffers;
3553         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3554                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3555                 if (error != 0) {
3556                         device_printf(dev, "Unable to create TX DMA map\n");
3557                         goto fail;
3558                 }
3559         }
3560
3561         return 0;
3562 fail:
3563         /* We free all, it handles case where we are in the middle */
3564         igb_free_transmit_structures(adapter);
3565         return (error);
3566 }
3567
3568 /*********************************************************************
3569  *
3570  *  Initialize a transmit ring.
3571  *
3572  **********************************************************************/
3573 static void
3574 igb_setup_transmit_ring(struct tx_ring *txr)
3575 {
3576         struct adapter *adapter = txr->adapter;
3577         struct igb_tx_buf *txbuf;
3578         int i;
3579 #ifdef DEV_NETMAP
3580         struct netmap_adapter *na = NA(adapter->ifp);
3581         struct netmap_slot *slot;
3582 #endif /* DEV_NETMAP */
3583
3584         /* Clear the old descriptor contents */
3585         IGB_TX_LOCK(txr);
3586 #ifdef DEV_NETMAP
3587         slot = netmap_reset(na, NR_TX, txr->me, 0);
3588 #endif /* DEV_NETMAP */
3589         bzero((void *)txr->tx_base,
3590               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3591         /* Reset indices */
3592         txr->next_avail_desc = 0;
3593         txr->next_to_clean = 0;
3594
3595         /* Free any existing tx buffers. */
3596         txbuf = txr->tx_buffers;
3597         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3598                 if (txbuf->m_head != NULL) {
3599                         bus_dmamap_sync(txr->txtag, txbuf->map,
3600                             BUS_DMASYNC_POSTWRITE);
3601                         bus_dmamap_unload(txr->txtag, txbuf->map);
3602                         m_freem(txbuf->m_head);
3603                         txbuf->m_head = NULL;
3604                 }
3605 #ifdef DEV_NETMAP
3606                 if (slot) {
3607                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3608                         /* no need to set the address */
3609                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3610                 }
3611 #endif /* DEV_NETMAP */
3612                 /* clear the watch index */
3613                 txbuf->eop = NULL;
3614         }
3615
3616         /* Set number of descriptors available */
3617         txr->tx_avail = adapter->num_tx_desc;
3618
3619         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3620             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3621         IGB_TX_UNLOCK(txr);
3622 }
3623
3624 /*********************************************************************
3625  *
3626  *  Initialize all transmit rings.
3627  *
3628  **********************************************************************/
3629 static void
3630 igb_setup_transmit_structures(struct adapter *adapter)
3631 {
3632         struct tx_ring *txr = adapter->tx_rings;
3633
3634         for (int i = 0; i < adapter->num_queues; i++, txr++)
3635                 igb_setup_transmit_ring(txr);
3636
3637         return;
3638 }
3639
3640 /*********************************************************************
3641  *
3642  *  Enable transmit unit.
3643  *
3644  **********************************************************************/
3645 static void
3646 igb_initialize_transmit_units(struct adapter *adapter)
3647 {
3648         struct tx_ring  *txr = adapter->tx_rings;
3649         struct e1000_hw *hw = &adapter->hw;
3650         u32             tctl, txdctl;
3651
3652         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3653         tctl = txdctl = 0;
3654
3655         /* Setup the Tx Descriptor Rings */
3656         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3657                 u64 bus_addr = txr->txdma.dma_paddr;
3658
3659                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3660                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3661                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3662                     (uint32_t)(bus_addr >> 32));
3663                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3664                     (uint32_t)bus_addr);
3665
3666                 /* Setup the HW Tx Head and Tail descriptor pointers */
3667                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3668                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3669
3670                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3671                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3672                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3673
3674                 txr->queue_status = IGB_QUEUE_IDLE;
3675
3676                 txdctl |= IGB_TX_PTHRESH;
3677                 txdctl |= IGB_TX_HTHRESH << 8;
3678                 txdctl |= IGB_TX_WTHRESH << 16;
3679                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3680                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3681         }
3682
3683         if (adapter->vf_ifp)
3684                 return;
3685
3686         e1000_config_collision_dist(hw);
3687
3688         /* Program the Transmit Control Register */
3689         tctl = E1000_READ_REG(hw, E1000_TCTL);
3690         tctl &= ~E1000_TCTL_CT;
3691         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3692                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3693
3694         /* This write will effectively turn on the transmit unit. */
3695         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3696 }
3697
3698 /*********************************************************************
3699  *
3700  *  Free all transmit rings.
3701  *
3702  **********************************************************************/
3703 static void
3704 igb_free_transmit_structures(struct adapter *adapter)
3705 {
3706         struct tx_ring *txr = adapter->tx_rings;
3707
3708         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3709                 IGB_TX_LOCK(txr);
3710                 igb_free_transmit_buffers(txr);
3711                 igb_dma_free(adapter, &txr->txdma);
3712                 IGB_TX_UNLOCK(txr);
3713                 IGB_TX_LOCK_DESTROY(txr);
3714         }
3715         free(adapter->tx_rings, M_DEVBUF);
3716 }
3717
3718 /*********************************************************************
3719  *
3720  *  Free transmit ring related data structures.
3721  *
3722  **********************************************************************/
3723 static void
3724 igb_free_transmit_buffers(struct tx_ring *txr)
3725 {
3726         struct adapter *adapter = txr->adapter;
3727         struct igb_tx_buf *tx_buffer;
3728         int             i;
3729
3730         INIT_DEBUGOUT("free_transmit_ring: begin");
3731
3732         if (txr->tx_buffers == NULL)
3733                 return;
3734
3735         tx_buffer = txr->tx_buffers;
3736         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3737                 if (tx_buffer->m_head != NULL) {
3738                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3739                             BUS_DMASYNC_POSTWRITE);
3740                         bus_dmamap_unload(txr->txtag,
3741                             tx_buffer->map);
3742                         m_freem(tx_buffer->m_head);
3743                         tx_buffer->m_head = NULL;
3744                         if (tx_buffer->map != NULL) {
3745                                 bus_dmamap_destroy(txr->txtag,
3746                                     tx_buffer->map);
3747                                 tx_buffer->map = NULL;
3748                         }
3749                 } else if (tx_buffer->map != NULL) {
3750                         bus_dmamap_unload(txr->txtag,
3751                             tx_buffer->map);
3752                         bus_dmamap_destroy(txr->txtag,
3753                             tx_buffer->map);
3754                         tx_buffer->map = NULL;
3755                 }
3756         }
3757 #ifndef IGB_LEGACY_TX
3758         if (txr->br != NULL)
3759                 buf_ring_free(txr->br, M_DEVBUF);
3760 #endif
3761         if (txr->tx_buffers != NULL) {
3762                 free(txr->tx_buffers, M_DEVBUF);
3763                 txr->tx_buffers = NULL;
3764         }
3765         if (txr->txtag != NULL) {
3766                 bus_dma_tag_destroy(txr->txtag);
3767                 txr->txtag = NULL;
3768         }
3769         return;
3770 }
3771
3772 /**********************************************************************
3773  *
3774  *  Setup work for hardware segmentation offload (TSO) on
3775  *  adapters using advanced tx descriptors
3776  *
3777  **********************************************************************/
3778 static int
3779 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3780     u32 *cmd_type_len, u32 *olinfo_status)
3781 {
3782         struct adapter *adapter = txr->adapter;
3783         struct e1000_adv_tx_context_desc *TXD;
3784         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3785         u32 mss_l4len_idx = 0, paylen;
3786         u16 vtag = 0, eh_type;
3787         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3788         struct ether_vlan_header *eh;
3789 #ifdef INET6
3790         struct ip6_hdr *ip6;
3791 #endif
3792 #ifdef INET
3793         struct ip *ip;
3794 #endif
3795         struct tcphdr *th;
3796
3797
3798         /*
3799          * Determine where frame payload starts.
3800          * Jump over vlan headers if already present
3801          */
3802         eh = mtod(mp, struct ether_vlan_header *);
3803         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3804                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3805                 eh_type = eh->evl_proto;
3806         } else {
3807                 ehdrlen = ETHER_HDR_LEN;
3808                 eh_type = eh->evl_encap_proto;
3809         }
3810
3811         switch (ntohs(eh_type)) {
3812 #ifdef INET6
3813         case ETHERTYPE_IPV6:
3814                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3815                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3816                 if (ip6->ip6_nxt != IPPROTO_TCP)
3817                         return (ENXIO);
3818                 ip_hlen = sizeof(struct ip6_hdr);
3819                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3820                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3821                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3822                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3823                 break;
3824 #endif
3825 #ifdef INET
3826         case ETHERTYPE_IP:
3827                 ip = (struct ip *)(mp->m_data + ehdrlen);
3828                 if (ip->ip_p != IPPROTO_TCP)
3829                         return (ENXIO);
3830                 ip->ip_sum = 0;
3831                 ip_hlen = ip->ip_hl << 2;
3832                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3833                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3834                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3835                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3836                 /* Tell transmit desc to also do IPv4 checksum. */
3837                 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3838                 break;
3839 #endif
3840         default:
3841                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3842                     __func__, ntohs(eh_type));
3843                 break;
3844         }
3845
3846         ctxd = txr->next_avail_desc;
3847         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3848
3849         tcp_hlen = th->th_off << 2;
3850
3851         /* This is used in the transmit desc in encap */
3852         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3853
3854         /* VLAN MACLEN IPLEN */
3855         if (mp->m_flags & M_VLANTAG) {
3856                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3857                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3858         }
3859
3860         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3861         vlan_macip_lens |= ip_hlen;
3862         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3863
3864         /* ADV DTYPE TUCMD */
3865         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3866         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3867         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3868
3869         /* MSS L4LEN IDX */
3870         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3871         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3872         /* 82575 needs the queue index added */
3873         if (adapter->hw.mac.type == e1000_82575)
3874                 mss_l4len_idx |= txr->me << 4;
3875         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3876
3877         TXD->seqnum_seed = htole32(0);
3878
3879         if (++ctxd == txr->num_desc)
3880                 ctxd = 0;
3881
3882         txr->tx_avail--;
3883         txr->next_avail_desc = ctxd;
3884         *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3885         *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3886         *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3887         ++txr->tso_tx;
3888         return (0);
3889 }
3890
3891 /*********************************************************************
3892  *
3893  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3894  *
3895  **********************************************************************/
3896
3897 static int
3898 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3899     u32 *cmd_type_len, u32 *olinfo_status)
3900 {
3901         struct e1000_adv_tx_context_desc *TXD;
3902         struct adapter *adapter = txr->adapter;
3903         struct ether_vlan_header *eh;
3904         struct ip *ip;
3905         struct ip6_hdr *ip6;
3906         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3907         int     ehdrlen, ip_hlen = 0;
3908         u16     etype;
3909         u8      ipproto = 0;
3910         int     offload = TRUE;
3911         int     ctxd = txr->next_avail_desc;
3912         u16     vtag = 0;
3913
3914         /* First check if TSO is to be used */
3915         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3916                 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3917
3918         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3919                 offload = FALSE;
3920
3921         /* Indicate the whole packet as payload when not doing TSO */
3922         *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3923
3924         /* Now ready a context descriptor */
3925         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3926
3927         /*
3928         ** In advanced descriptors the vlan tag must 
3929         ** be placed into the context descriptor. Hence
3930         ** we need to make one even if not doing offloads.
3931         */
3932         if (mp->m_flags & M_VLANTAG) {
3933                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3934                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3935         } else if (offload == FALSE) /* ... no offload to do */
3936                 return (0);
3937
3938         /*
3939          * Determine where frame payload starts.
3940          * Jump over vlan headers if already present,
3941          * helpful for QinQ too.
3942          */
3943         eh = mtod(mp, struct ether_vlan_header *);
3944         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3945                 etype = ntohs(eh->evl_proto);
3946                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3947         } else {
3948                 etype = ntohs(eh->evl_encap_proto);
3949                 ehdrlen = ETHER_HDR_LEN;
3950         }
3951
3952         /* Set the ether header length */
3953         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3954
3955         switch (etype) {
3956                 case ETHERTYPE_IP:
3957                         ip = (struct ip *)(mp->m_data + ehdrlen);
3958                         ip_hlen = ip->ip_hl << 2;
3959                         ipproto = ip->ip_p;
3960                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3961                         break;
3962                 case ETHERTYPE_IPV6:
3963                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3964                         ip_hlen = sizeof(struct ip6_hdr);
3965                         /* XXX-BZ this will go badly in case of ext hdrs. */
3966                         ipproto = ip6->ip6_nxt;
3967                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3968                         break;
3969                 default:
3970                         offload = FALSE;
3971                         break;
3972         }
3973
3974         vlan_macip_lens |= ip_hlen;
3975         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3976
3977         switch (ipproto) {
3978                 case IPPROTO_TCP:
3979 #if __FreeBSD_version >= 1000000
3980                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
3981 #else
3982                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3983 #endif
3984                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3985                         break;
3986                 case IPPROTO_UDP:
3987 #if __FreeBSD_version >= 1000000
3988                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
3989 #else
3990                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3991 #endif
3992                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3993                         break;
3994
3995 #if __FreeBSD_version >= 800000
3996                 case IPPROTO_SCTP:
3997 #if __FreeBSD_version >= 1000000
3998                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
3999 #else
4000                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
4001 #endif
4002                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4003                         break;
4004 #endif
4005                 default:
4006                         offload = FALSE;
4007                         break;
4008         }
4009
4010         if (offload) /* For the TX descriptor setup */
4011                 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4012
4013         /* 82575 needs the queue index added */
4014         if (adapter->hw.mac.type == e1000_82575)
4015                 mss_l4len_idx = txr->me << 4;
4016
4017         /* Now copy bits into descriptor */
4018         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
4019         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
4020         TXD->seqnum_seed = htole32(0);
4021         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
4022
4023         /* We've consumed the first desc, adjust counters */
4024         if (++ctxd == txr->num_desc)
4025                 ctxd = 0;
4026         txr->next_avail_desc = ctxd;
4027         --txr->tx_avail;
4028
4029         return (0);
4030 }
4031
4032 /**********************************************************************
4033  *
4034  *  Examine each tx_buffer in the used queue. If the hardware is done
4035  *  processing the packet then free associated resources. The
4036  *  tx_buffer is put back on the free queue.
4037  *
4038  *  TRUE return means there's work in the ring to clean, FALSE its empty.
4039  **********************************************************************/
4040 static bool
4041 igb_txeof(struct tx_ring *txr)
4042 {
4043         struct adapter          *adapter = txr->adapter;
4044 #ifdef DEV_NETMAP
4045         struct ifnet            *ifp = adapter->ifp;
4046 #endif /* DEV_NETMAP */
4047         u32                     work, processed = 0;
4048         int                     limit = adapter->tx_process_limit;
4049         struct igb_tx_buf       *buf;
4050         union e1000_adv_tx_desc *txd;
4051
4052         mtx_assert(&txr->tx_mtx, MA_OWNED);
4053
4054 #ifdef DEV_NETMAP
4055         if (netmap_tx_irq(ifp, txr->me))
4056                 return (FALSE);
4057 #endif /* DEV_NETMAP */
4058
4059         if (txr->tx_avail == txr->num_desc) {
4060                 txr->queue_status = IGB_QUEUE_IDLE;
4061                 return FALSE;
4062         }
4063
4064         /* Get work starting point */
4065         work = txr->next_to_clean;
4066         buf = &txr->tx_buffers[work];
4067         txd = &txr->tx_base[work];
4068         work -= txr->num_desc; /* The distance to ring end */
4069         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4070             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4071         do {
4072                 union e1000_adv_tx_desc *eop = buf->eop;
4073                 if (eop == NULL) /* No work */
4074                         break;
4075
4076                 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4077                         break;  /* I/O not complete */
4078
4079                 if (buf->m_head) {
4080                         txr->bytes +=
4081                             buf->m_head->m_pkthdr.len;
4082                         bus_dmamap_sync(txr->txtag,
4083                             buf->map,
4084                             BUS_DMASYNC_POSTWRITE);
4085                         bus_dmamap_unload(txr->txtag,
4086                             buf->map);
4087                         m_freem(buf->m_head);
4088                         buf->m_head = NULL;
4089                 }
4090                 buf->eop = NULL;
4091                 ++txr->tx_avail;
4092
4093                 /* We clean the range if multi segment */
4094                 while (txd != eop) {
4095                         ++txd;
4096                         ++buf;
4097                         ++work;
4098                         /* wrap the ring? */
4099                         if (__predict_false(!work)) {
4100                                 work -= txr->num_desc;
4101                                 buf = txr->tx_buffers;
4102                                 txd = txr->tx_base;
4103                         }
4104                         if (buf->m_head) {
4105                                 txr->bytes +=
4106                                     buf->m_head->m_pkthdr.len;
4107                                 bus_dmamap_sync(txr->txtag,
4108                                     buf->map,
4109                                     BUS_DMASYNC_POSTWRITE);
4110                                 bus_dmamap_unload(txr->txtag,
4111                                     buf->map);
4112                                 m_freem(buf->m_head);
4113                                 buf->m_head = NULL;
4114                         }
4115                         ++txr->tx_avail;
4116                         buf->eop = NULL;
4117
4118                 }
4119                 ++txr->packets;
4120                 ++processed;
4121                 txr->watchdog_time = ticks;
4122
4123                 /* Try the next packet */
4124                 ++txd;
4125                 ++buf;
4126                 ++work;
4127                 /* reset with a wrap */
4128                 if (__predict_false(!work)) {
4129                         work -= txr->num_desc;
4130                         buf = txr->tx_buffers;
4131                         txd = txr->tx_base;
4132                 }
4133                 prefetch(txd);
4134         } while (__predict_true(--limit));
4135
4136         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4137             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4138
4139         work += txr->num_desc;
4140         txr->next_to_clean = work;
4141
4142         /*
4143         ** Watchdog calculation, we know there's
4144         ** work outstanding or the first return
4145         ** would have been taken, so none processed
4146         ** for too long indicates a hang.
4147         */
4148         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4149                 txr->queue_status |= IGB_QUEUE_HUNG;
4150
4151         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4152                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;       
4153
4154         if (txr->tx_avail == txr->num_desc) {
4155                 txr->queue_status = IGB_QUEUE_IDLE;
4156                 return (FALSE);
4157         }
4158
4159         return (TRUE);
4160 }
4161
4162 /*********************************************************************
4163  *
4164  *  Refresh mbuf buffers for RX descriptor rings
4165  *   - now keeps its own state so discards due to resource
4166  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4167  *     it just returns, keeping its placeholder, thus it can simply
4168  *     be recalled to try again.
4169  *
4170  **********************************************************************/
4171 static void
4172 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4173 {
4174         struct adapter          *adapter = rxr->adapter;
4175         bus_dma_segment_t       hseg[1];
4176         bus_dma_segment_t       pseg[1];
4177         struct igb_rx_buf       *rxbuf;
4178         struct mbuf             *mh, *mp;
4179         int                     i, j, nsegs, error;
4180         bool                    refreshed = FALSE;
4181
4182         i = j = rxr->next_to_refresh;
4183         /*
4184         ** Get one descriptor beyond
4185         ** our work mark to control
4186         ** the loop.
4187         */
4188         if (++j == adapter->num_rx_desc)
4189                 j = 0;
4190
4191         while (j != limit) {
4192                 rxbuf = &rxr->rx_buffers[i];
4193                 /* No hdr mbuf used with header split off */
4194                 if (rxr->hdr_split == FALSE)
4195                         goto no_split;
4196                 if (rxbuf->m_head == NULL) {
4197                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4198                         if (mh == NULL)
4199                                 goto update;
4200                 } else
4201                         mh = rxbuf->m_head;
4202
4203                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4204                 mh->m_len = MHLEN;
4205                 mh->m_flags |= M_PKTHDR;
4206                 /* Get the memory mapping */
4207                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4208                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4209                 if (error != 0) {
4210                         printf("Refresh mbufs: hdr dmamap load"
4211                             " failure - %d\n", error);
4212                         m_free(mh);
4213                         rxbuf->m_head = NULL;
4214                         goto update;
4215                 }
4216                 rxbuf->m_head = mh;
4217                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4218                     BUS_DMASYNC_PREREAD);
4219                 rxr->rx_base[i].read.hdr_addr =
4220                     htole64(hseg[0].ds_addr);
4221 no_split:
4222                 if (rxbuf->m_pack == NULL) {
4223                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4224                             M_PKTHDR, adapter->rx_mbuf_sz);
4225                         if (mp == NULL)
4226                                 goto update;
4227                 } else
4228                         mp = rxbuf->m_pack;
4229
4230                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4231                 /* Get the memory mapping */
4232                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4233                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4234                 if (error != 0) {
4235                         printf("Refresh mbufs: payload dmamap load"
4236                             " failure - %d\n", error);
4237                         m_free(mp);
4238                         rxbuf->m_pack = NULL;
4239                         goto update;
4240                 }
4241                 rxbuf->m_pack = mp;
4242                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4243                     BUS_DMASYNC_PREREAD);
4244                 rxr->rx_base[i].read.pkt_addr =
4245                     htole64(pseg[0].ds_addr);
4246                 refreshed = TRUE; /* I feel wefreshed :) */
4247
4248                 i = j; /* our next is precalculated */
4249                 rxr->next_to_refresh = i;
4250                 if (++j == adapter->num_rx_desc)
4251                         j = 0;
4252         }
4253 update:
4254         if (refreshed) /* update tail */
4255                 E1000_WRITE_REG(&adapter->hw,
4256                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4257         return;
4258 }
4259
4260
4261 /*********************************************************************
4262  *
4263  *  Allocate memory for rx_buffer structures. Since we use one
4264  *  rx_buffer per received packet, the maximum number of rx_buffer's
4265  *  that we'll need is equal to the number of receive descriptors
4266  *  that we've allocated.
4267  *
4268  **********************************************************************/
4269 static int
4270 igb_allocate_receive_buffers(struct rx_ring *rxr)
4271 {
4272         struct  adapter         *adapter = rxr->adapter;
4273         device_t                dev = adapter->dev;
4274         struct igb_rx_buf       *rxbuf;
4275         int                     i, bsize, error;
4276
4277         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4278         if (!(rxr->rx_buffers =
4279             (struct igb_rx_buf *) malloc(bsize,
4280             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4281                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4282                 error = ENOMEM;
4283                 goto fail;
4284         }
4285
4286         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4287                                    1, 0,                /* alignment, bounds */
4288                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4289                                    BUS_SPACE_MAXADDR,   /* highaddr */
4290                                    NULL, NULL,          /* filter, filterarg */
4291                                    MSIZE,               /* maxsize */
4292                                    1,                   /* nsegments */
4293                                    MSIZE,               /* maxsegsize */
4294                                    0,                   /* flags */
4295                                    NULL,                /* lockfunc */
4296                                    NULL,                /* lockfuncarg */
4297                                    &rxr->htag))) {
4298                 device_printf(dev, "Unable to create RX DMA tag\n");
4299                 goto fail;
4300         }
4301
4302         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4303                                    1, 0,                /* alignment, bounds */
4304                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4305                                    BUS_SPACE_MAXADDR,   /* highaddr */
4306                                    NULL, NULL,          /* filter, filterarg */
4307                                    MJUM9BYTES,          /* maxsize */
4308                                    1,                   /* nsegments */
4309                                    MJUM9BYTES,          /* maxsegsize */
4310                                    0,                   /* flags */
4311                                    NULL,                /* lockfunc */
4312                                    NULL,                /* lockfuncarg */
4313                                    &rxr->ptag))) {
4314                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4315                 goto fail;
4316         }
4317
4318         for (i = 0; i < adapter->num_rx_desc; i++) {
4319                 rxbuf = &rxr->rx_buffers[i];
4320                 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4321                 if (error) {
4322                         device_printf(dev,
4323                             "Unable to create RX head DMA maps\n");
4324                         goto fail;
4325                 }
4326                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4327                 if (error) {
4328                         device_printf(dev,
4329                             "Unable to create RX packet DMA maps\n");
4330                         goto fail;
4331                 }
4332         }
4333
4334         return (0);
4335
4336 fail:
4337         /* Frees all, but can handle partial completion */
4338         igb_free_receive_structures(adapter);
4339         return (error);
4340 }
4341
4342
4343 static void
4344 igb_free_receive_ring(struct rx_ring *rxr)
4345 {
4346         struct  adapter         *adapter = rxr->adapter;
4347         struct igb_rx_buf       *rxbuf;
4348
4349
4350         for (int i = 0; i < adapter->num_rx_desc; i++) {
4351                 rxbuf = &rxr->rx_buffers[i];
4352                 if (rxbuf->m_head != NULL) {
4353                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4354                             BUS_DMASYNC_POSTREAD);
4355                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4356                         rxbuf->m_head->m_flags |= M_PKTHDR;
4357                         m_freem(rxbuf->m_head);
4358                 }
4359                 if (rxbuf->m_pack != NULL) {
4360                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4361                             BUS_DMASYNC_POSTREAD);
4362                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4363                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4364                         m_freem(rxbuf->m_pack);
4365                 }
4366                 rxbuf->m_head = NULL;
4367                 rxbuf->m_pack = NULL;
4368         }
4369 }
4370
4371
4372 /*********************************************************************
4373  *
4374  *  Initialize a receive ring and its buffers.
4375  *
4376  **********************************************************************/
4377 static int
4378 igb_setup_receive_ring(struct rx_ring *rxr)
4379 {
4380         struct  adapter         *adapter;
4381         struct  ifnet           *ifp;
4382         device_t                dev;
4383         struct igb_rx_buf       *rxbuf;
4384         bus_dma_segment_t       pseg[1], hseg[1];
4385         struct lro_ctrl         *lro = &rxr->lro;
4386         int                     rsize, nsegs, error = 0;
4387 #ifdef DEV_NETMAP
4388         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4389         struct netmap_slot *slot;
4390 #endif /* DEV_NETMAP */
4391
4392         adapter = rxr->adapter;
4393         dev = adapter->dev;
4394         ifp = adapter->ifp;
4395
4396         /* Clear the ring contents */
4397         IGB_RX_LOCK(rxr);
4398 #ifdef DEV_NETMAP
4399         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4400 #endif /* DEV_NETMAP */
4401         rsize = roundup2(adapter->num_rx_desc *
4402             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4403         bzero((void *)rxr->rx_base, rsize);
4404
4405         /*
4406         ** Free current RX buffer structures and their mbufs
4407         */
4408         igb_free_receive_ring(rxr);
4409
4410         /* Configure for header split? */
4411         if (igb_header_split)
4412                 rxr->hdr_split = TRUE;
4413
4414         /* Now replenish the ring mbufs */
4415         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4416                 struct mbuf     *mh, *mp;
4417
4418                 rxbuf = &rxr->rx_buffers[j];
4419 #ifdef DEV_NETMAP
4420                 if (slot) {
4421                         /* slot sj is mapped to the j-th NIC-ring entry */
4422                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4423                         uint64_t paddr;
4424                         void *addr;
4425
4426                         addr = PNMB(na, slot + sj, &paddr);
4427                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4428                         /* Update descriptor */
4429                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4430                         continue;
4431                 }
4432 #endif /* DEV_NETMAP */
4433                 if (rxr->hdr_split == FALSE)
4434                         goto skip_head;
4435
4436                 /* First the header */
4437                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4438                 if (rxbuf->m_head == NULL) {
4439                         error = ENOBUFS;
4440                         goto fail;
4441                 }
4442                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4443                 mh = rxbuf->m_head;
4444                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4445                 mh->m_flags |= M_PKTHDR;
4446                 /* Get the memory mapping */
4447                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4448                     rxbuf->hmap, rxbuf->m_head, hseg,
4449                     &nsegs, BUS_DMA_NOWAIT);
4450                 if (error != 0) /* Nothing elegant to do here */
4451                         goto fail;
4452                 bus_dmamap_sync(rxr->htag,
4453                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4454                 /* Update descriptor */
4455                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4456
4457 skip_head:
4458                 /* Now the payload cluster */
4459                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4460                     M_PKTHDR, adapter->rx_mbuf_sz);
4461                 if (rxbuf->m_pack == NULL) {
4462                         error = ENOBUFS;
4463                         goto fail;
4464                 }
4465                 mp = rxbuf->m_pack;
4466                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4467                 /* Get the memory mapping */
4468                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4469                     rxbuf->pmap, mp, pseg,
4470                     &nsegs, BUS_DMA_NOWAIT);
4471                 if (error != 0)
4472                         goto fail;
4473                 bus_dmamap_sync(rxr->ptag,
4474                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4475                 /* Update descriptor */
4476                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4477         }
4478
4479         /* Setup our descriptor indices */
4480         rxr->next_to_check = 0;
4481         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4482         rxr->lro_enabled = FALSE;
4483         rxr->rx_split_packets = 0;
4484         rxr->rx_bytes = 0;
4485
4486         rxr->fmp = NULL;
4487         rxr->lmp = NULL;
4488
4489         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4490             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4491
4492         /*
4493         ** Now set up the LRO interface, we
4494         ** also only do head split when LRO
4495         ** is enabled, since so often they
4496         ** are undesirable in similar setups.
4497         */
4498         if (ifp->if_capenable & IFCAP_LRO) {
4499                 error = tcp_lro_init(lro);
4500                 if (error) {
4501                         device_printf(dev, "LRO Initialization failed!\n");
4502                         goto fail;
4503                 }
4504                 INIT_DEBUGOUT("RX LRO Initialized\n");
4505                 rxr->lro_enabled = TRUE;
4506                 lro->ifp = adapter->ifp;
4507         }
4508
4509         IGB_RX_UNLOCK(rxr);
4510         return (0);
4511
4512 fail:
4513         igb_free_receive_ring(rxr);
4514         IGB_RX_UNLOCK(rxr);
4515         return (error);
4516 }
4517
4518
4519 /*********************************************************************
4520  *
4521  *  Initialize all receive rings.
4522  *
4523  **********************************************************************/
4524 static int
4525 igb_setup_receive_structures(struct adapter *adapter)
4526 {
4527         struct rx_ring *rxr = adapter->rx_rings;
4528         int i;
4529
4530         for (i = 0; i < adapter->num_queues; i++, rxr++)
4531                 if (igb_setup_receive_ring(rxr))
4532                         goto fail;
4533
4534         return (0);
4535 fail:
4536         /*
4537          * Free RX buffers allocated so far, we will only handle
4538          * the rings that completed, the failing case will have
4539          * cleaned up for itself. 'i' is the endpoint.
4540          */
4541         for (int j = 0; j < i; ++j) {
4542                 rxr = &adapter->rx_rings[j];
4543                 IGB_RX_LOCK(rxr);
4544                 igb_free_receive_ring(rxr);
4545                 IGB_RX_UNLOCK(rxr);
4546         }
4547
4548         return (ENOBUFS);
4549 }
4550
4551 /*
4552  * Initialise the RSS mapping for NICs that support multiple transmit/
4553  * receive rings.
4554  */
4555 static void
4556 igb_initialise_rss_mapping(struct adapter *adapter)
4557 {
4558         struct e1000_hw *hw = &adapter->hw;
4559         int i;
4560         int queue_id;
4561         u32 reta;
4562         u32 rss_key[10], mrqc, shift = 0;
4563
4564         /* XXX? */
4565         if (adapter->hw.mac.type == e1000_82575)
4566                 shift = 6;
4567
4568         /*
4569          * The redirection table controls which destination
4570          * queue each bucket redirects traffic to.
4571          * Each DWORD represents four queues, with the LSB
4572          * being the first queue in the DWORD.
4573          *
4574          * This just allocates buckets to queues using round-robin
4575          * allocation.
4576          *
4577          * NOTE: It Just Happens to line up with the default
4578          * RSS allocation method.
4579          */
4580
4581         /* Warning FM follows */
4582         reta = 0;
4583         for (i = 0; i < 128; i++) {
4584 #ifdef  RSS
4585                 queue_id = rss_get_indirection_to_bucket(i);
4586                 /*
4587                  * If we have more queues than buckets, we'll
4588                  * end up mapping buckets to a subset of the
4589                  * queues.
4590                  *
4591                  * If we have more buckets than queues, we'll
4592                  * end up instead assigning multiple buckets
4593                  * to queues.
4594                  *
4595                  * Both are suboptimal, but we need to handle
4596                  * the case so we don't go out of bounds
4597                  * indexing arrays and such.
4598                  */
4599                 queue_id = queue_id % adapter->num_queues;
4600 #else
4601                 queue_id = (i % adapter->num_queues);
4602 #endif
4603                 /* Adjust if required */
4604                 queue_id = queue_id << shift;
4605
4606                 /*
4607                  * The low 8 bits are for hash value (n+0);
4608                  * The next 8 bits are for hash value (n+1), etc.
4609                  */
4610                 reta = reta >> 8;
4611                 reta = reta | ( ((uint32_t) queue_id) << 24);
4612                 if ((i & 3) == 3) {
4613                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4614                         reta = 0;
4615                 }
4616         }
4617
4618         /* Now fill in hash table */
4619
4620         /*
4621          * MRQC: Multiple Receive Queues Command
4622          * Set queuing to RSS control, number depends on the device.
4623          */
4624         mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4625
4626 #ifdef  RSS
4627         /* XXX ew typecasting */
4628         rss_getkey((uint8_t *) &rss_key);
4629 #else
4630         arc4rand(&rss_key, sizeof(rss_key), 0);
4631 #endif
4632         for (i = 0; i < 10; i++)
4633                 E1000_WRITE_REG_ARRAY(hw,
4634                     E1000_RSSRK(0), i, rss_key[i]);
4635
4636         /*
4637          * Configure the RSS fields to hash upon.
4638          */
4639         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4640             E1000_MRQC_RSS_FIELD_IPV4_TCP);
4641         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4642             E1000_MRQC_RSS_FIELD_IPV6_TCP);
4643         mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4644             E1000_MRQC_RSS_FIELD_IPV6_UDP);
4645         mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4646             E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4647
4648         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4649 }
4650
4651 /*********************************************************************
4652  *
4653  *  Enable receive unit.
4654  *
4655  **********************************************************************/
4656 static void
4657 igb_initialize_receive_units(struct adapter *adapter)
4658 {
4659         struct rx_ring  *rxr = adapter->rx_rings;
4660         struct ifnet    *ifp = adapter->ifp;
4661         struct e1000_hw *hw = &adapter->hw;
4662         u32             rctl, rxcsum, psize, srrctl = 0;
4663
4664         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4665
4666         /*
4667          * Make sure receives are disabled while setting
4668          * up the descriptor ring
4669          */
4670         rctl = E1000_READ_REG(hw, E1000_RCTL);
4671         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4672
4673         /*
4674         ** Set up for header split
4675         */
4676         if (igb_header_split) {
4677                 /* Use a standard mbuf for the header */
4678                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4679                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4680         } else
4681                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4682
4683         /*
4684         ** Set up for jumbo frames
4685         */
4686         if (ifp->if_mtu > ETHERMTU) {
4687                 rctl |= E1000_RCTL_LPE;
4688                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4689                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4690                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4691                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4692                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4693                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4694                 }
4695                 /* Set maximum packet len */
4696                 psize = adapter->max_frame_size;
4697                 /* are we on a vlan? */
4698                 if (adapter->ifp->if_vlantrunk != NULL)
4699                         psize += VLAN_TAG_SIZE;
4700                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4701         } else {
4702                 rctl &= ~E1000_RCTL_LPE;
4703                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4704                 rctl |= E1000_RCTL_SZ_2048;
4705         }
4706
4707         /*
4708          * If TX flow control is disabled and there's >1 queue defined,
4709          * enable DROP.
4710          *
4711          * This drops frames rather than hanging the RX MAC for all queues.
4712          */
4713         if ((adapter->num_queues > 1) &&
4714             (adapter->fc == e1000_fc_none ||
4715              adapter->fc == e1000_fc_rx_pause)) {
4716                 srrctl |= E1000_SRRCTL_DROP_EN;
4717         }
4718
4719         /* Setup the Base and Length of the Rx Descriptor Rings */
4720         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4721                 u64 bus_addr = rxr->rxdma.dma_paddr;
4722                 u32 rxdctl;
4723
4724                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4725                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4726                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4727                     (uint32_t)(bus_addr >> 32));
4728                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4729                     (uint32_t)bus_addr);
4730                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4731                 /* Enable this Queue */
4732                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4733                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4734                 rxdctl &= 0xFFF00000;
4735                 rxdctl |= IGB_RX_PTHRESH;
4736                 rxdctl |= IGB_RX_HTHRESH << 8;
4737                 rxdctl |= IGB_RX_WTHRESH << 16;
4738                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4739         }
4740
4741         /*
4742         ** Setup for RX MultiQueue
4743         */
4744         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4745         if (adapter->num_queues >1) {
4746
4747                 /* rss setup */
4748                 igb_initialise_rss_mapping(adapter);
4749
4750                 /*
4751                 ** NOTE: Receive Full-Packet Checksum Offload 
4752                 ** is mutually exclusive with Multiqueue. However
4753                 ** this is not the same as TCP/IP checksums which
4754                 ** still work.
4755                 */
4756                 rxcsum |= E1000_RXCSUM_PCSD;
4757 #if __FreeBSD_version >= 800000
4758                 /* For SCTP Offload */
4759                 if ((hw->mac.type != e1000_82575) &&
4760                     (ifp->if_capenable & IFCAP_RXCSUM))
4761                         rxcsum |= E1000_RXCSUM_CRCOFL;
4762 #endif
4763         } else {
4764                 /* Non RSS setup */
4765                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4766                         rxcsum |= E1000_RXCSUM_IPPCSE;
4767 #if __FreeBSD_version >= 800000
4768                         if (adapter->hw.mac.type != e1000_82575)
4769                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4770 #endif
4771                 } else
4772                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4773         }
4774         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4775
4776         /* Setup the Receive Control Register */
4777         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4778         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4779                    E1000_RCTL_RDMTS_HALF |
4780                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4781         /* Strip CRC bytes. */
4782         rctl |= E1000_RCTL_SECRC;
4783         /* Make sure VLAN Filters are off */
4784         rctl &= ~E1000_RCTL_VFE;
4785         /* Don't store bad packets */
4786         rctl &= ~E1000_RCTL_SBP;
4787
4788         /* Enable Receives */
4789         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4790
4791         /*
4792          * Setup the HW Rx Head and Tail Descriptor Pointers
4793          *   - needs to be after enable
4794          */
4795         for (int i = 0; i < adapter->num_queues; i++) {
4796                 rxr = &adapter->rx_rings[i];
4797                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4798 #ifdef DEV_NETMAP
4799                 /*
4800                  * an init() while a netmap client is active must
4801                  * preserve the rx buffers passed to userspace.
4802                  * In this driver it means we adjust RDT to
4803                  * something different from next_to_refresh
4804                  * (which is not used in netmap mode).
4805                  */
4806                 if (ifp->if_capenable & IFCAP_NETMAP) {
4807                         struct netmap_adapter *na = NA(adapter->ifp);
4808                         struct netmap_kring *kring = &na->rx_rings[i];
4809                         int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4810
4811                         if (t >= adapter->num_rx_desc)
4812                                 t -= adapter->num_rx_desc;
4813                         else if (t < 0)
4814                                 t += adapter->num_rx_desc;
4815                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4816                 } else
4817 #endif /* DEV_NETMAP */
4818                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4819         }
4820         return;
4821 }
4822
4823 /*********************************************************************
4824  *
4825  *  Free receive rings.
4826  *
4827  **********************************************************************/
4828 static void
4829 igb_free_receive_structures(struct adapter *adapter)
4830 {
4831         struct rx_ring *rxr = adapter->rx_rings;
4832
4833         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4834                 struct lro_ctrl *lro = &rxr->lro;
4835                 igb_free_receive_buffers(rxr);
4836                 tcp_lro_free(lro);
4837                 igb_dma_free(adapter, &rxr->rxdma);
4838         }
4839
4840         free(adapter->rx_rings, M_DEVBUF);
4841 }
4842
4843 /*********************************************************************
4844  *
4845  *  Free receive ring data structures.
4846  *
4847  **********************************************************************/
4848 static void
4849 igb_free_receive_buffers(struct rx_ring *rxr)
4850 {
4851         struct adapter          *adapter = rxr->adapter;
4852         struct igb_rx_buf       *rxbuf;
4853         int i;
4854
4855         INIT_DEBUGOUT("free_receive_structures: begin");
4856
4857         /* Cleanup any existing buffers */
4858         if (rxr->rx_buffers != NULL) {
4859                 for (i = 0; i < adapter->num_rx_desc; i++) {
4860                         rxbuf = &rxr->rx_buffers[i];
4861                         if (rxbuf->m_head != NULL) {
4862                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4863                                     BUS_DMASYNC_POSTREAD);
4864                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4865                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4866                                 m_freem(rxbuf->m_head);
4867                         }
4868                         if (rxbuf->m_pack != NULL) {
4869                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4870                                     BUS_DMASYNC_POSTREAD);
4871                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4872                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4873                                 m_freem(rxbuf->m_pack);
4874                         }
4875                         rxbuf->m_head = NULL;
4876                         rxbuf->m_pack = NULL;
4877                         if (rxbuf->hmap != NULL) {
4878                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4879                                 rxbuf->hmap = NULL;
4880                         }
4881                         if (rxbuf->pmap != NULL) {
4882                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4883                                 rxbuf->pmap = NULL;
4884                         }
4885                 }
4886                 if (rxr->rx_buffers != NULL) {
4887                         free(rxr->rx_buffers, M_DEVBUF);
4888                         rxr->rx_buffers = NULL;
4889                 }
4890         }
4891
4892         if (rxr->htag != NULL) {
4893                 bus_dma_tag_destroy(rxr->htag);
4894                 rxr->htag = NULL;
4895         }
4896         if (rxr->ptag != NULL) {
4897                 bus_dma_tag_destroy(rxr->ptag);
4898                 rxr->ptag = NULL;
4899         }
4900 }
4901
4902 static __inline void
4903 igb_rx_discard(struct rx_ring *rxr, int i)
4904 {
4905         struct igb_rx_buf       *rbuf;
4906
4907         rbuf = &rxr->rx_buffers[i];
4908
4909         /* Partially received? Free the chain */
4910         if (rxr->fmp != NULL) {
4911                 rxr->fmp->m_flags |= M_PKTHDR;
4912                 m_freem(rxr->fmp);
4913                 rxr->fmp = NULL;
4914                 rxr->lmp = NULL;
4915         }
4916
4917         /*
4918         ** With advanced descriptors the writeback
4919         ** clobbers the buffer addrs, so its easier
4920         ** to just free the existing mbufs and take
4921         ** the normal refresh path to get new buffers
4922         ** and mapping.
4923         */
4924         if (rbuf->m_head) {
4925                 m_free(rbuf->m_head);
4926                 rbuf->m_head = NULL;
4927                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4928         }
4929
4930         if (rbuf->m_pack) {
4931                 m_free(rbuf->m_pack);
4932                 rbuf->m_pack = NULL;
4933                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4934         }
4935
4936         return;
4937 }
4938
4939 static __inline void
4940 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4941 {
4942
4943         /*
4944          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4945          * should be computed by hardware. Also it should not have VLAN tag in
4946          * ethernet header.
4947          */
4948         if (rxr->lro_enabled &&
4949             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4950             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4951             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4952             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4953             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4954             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4955                 /*
4956                  * Send to the stack if:
4957                  **  - LRO not enabled, or
4958                  **  - no LRO resources, or
4959                  **  - lro enqueue fails
4960                  */
4961                 if (rxr->lro.lro_cnt != 0)
4962                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4963                                 return;
4964         }
4965         IGB_RX_UNLOCK(rxr);
4966         (*ifp->if_input)(ifp, m);
4967         IGB_RX_LOCK(rxr);
4968 }
4969
4970 /*********************************************************************
4971  *
4972  *  This routine executes in interrupt context. It replenishes
4973  *  the mbufs in the descriptor and sends data which has been
4974  *  dma'ed into host memory to upper layer.
4975  *
4976  *  We loop at most count times if count is > 0, or until done if
4977  *  count < 0.
4978  *
4979  *  Return TRUE if more to clean, FALSE otherwise
4980  *********************************************************************/
4981 static bool
4982 igb_rxeof(struct igb_queue *que, int count, int *done)
4983 {
4984         struct adapter          *adapter = que->adapter;
4985         struct rx_ring          *rxr = que->rxr;
4986         struct ifnet            *ifp = adapter->ifp;
4987         struct lro_ctrl         *lro = &rxr->lro;
4988         int                     i, processed = 0, rxdone = 0;
4989         u32                     ptype, staterr = 0;
4990         union e1000_adv_rx_desc *cur;
4991
4992         IGB_RX_LOCK(rxr);
4993         /* Sync the ring. */
4994         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4995             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4996
4997 #ifdef DEV_NETMAP
4998         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4999                 IGB_RX_UNLOCK(rxr);
5000                 return (FALSE);
5001         }
5002 #endif /* DEV_NETMAP */
5003
5004         /* Main clean loop */
5005         for (i = rxr->next_to_check; count != 0;) {
5006                 struct mbuf             *sendmp, *mh, *mp;
5007                 struct igb_rx_buf       *rxbuf;
5008                 u16                     hlen, plen, hdr, vtag, pkt_info;
5009                 bool                    eop = FALSE;
5010  
5011                 cur = &rxr->rx_base[i];
5012                 staterr = le32toh(cur->wb.upper.status_error);
5013                 if ((staterr & E1000_RXD_STAT_DD) == 0)
5014                         break;
5015                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
5016                         break;
5017                 count--;
5018                 sendmp = mh = mp = NULL;
5019                 cur->wb.upper.status_error = 0;
5020                 rxbuf = &rxr->rx_buffers[i];
5021                 plen = le16toh(cur->wb.upper.length);
5022                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
5023                 if (((adapter->hw.mac.type == e1000_i350) ||
5024                     (adapter->hw.mac.type == e1000_i354)) &&
5025                     (staterr & E1000_RXDEXT_STATERR_LB))
5026                         vtag = be16toh(cur->wb.upper.vlan);
5027                 else
5028                         vtag = le16toh(cur->wb.upper.vlan);
5029                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
5030                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
5031                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
5032
5033                 /*
5034                  * Free the frame (all segments) if we're at EOP and
5035                  * it's an error.
5036                  *
5037                  * The datasheet states that EOP + status is only valid for
5038                  * the final segment in a multi-segment frame.
5039                  */
5040                 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
5041                         adapter->dropped_pkts++;
5042                         ++rxr->rx_discarded;
5043                         igb_rx_discard(rxr, i);
5044                         goto next_desc;
5045                 }
5046
5047                 /*
5048                 ** The way the hardware is configured to
5049                 ** split, it will ONLY use the header buffer
5050                 ** when header split is enabled, otherwise we
5051                 ** get normal behavior, ie, both header and
5052                 ** payload are DMA'd into the payload buffer.
5053                 **
5054                 ** The fmp test is to catch the case where a
5055                 ** packet spans multiple descriptors, in that
5056                 ** case only the first header is valid.
5057                 */
5058                 if (rxr->hdr_split && rxr->fmp == NULL) {
5059                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5060                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5061                             E1000_RXDADV_HDRBUFLEN_SHIFT;
5062                         if (hlen > IGB_HDR_BUF)
5063                                 hlen = IGB_HDR_BUF;
5064                         mh = rxr->rx_buffers[i].m_head;
5065                         mh->m_len = hlen;
5066                         /* clear buf pointer for refresh */
5067                         rxbuf->m_head = NULL;
5068                         /*
5069                         ** Get the payload length, this
5070                         ** could be zero if its a small
5071                         ** packet.
5072                         */
5073                         if (plen > 0) {
5074                                 mp = rxr->rx_buffers[i].m_pack;
5075                                 mp->m_len = plen;
5076                                 mh->m_next = mp;
5077                                 /* clear buf pointer */
5078                                 rxbuf->m_pack = NULL;
5079                                 rxr->rx_split_packets++;
5080                         }
5081                 } else {
5082                         /*
5083                         ** Either no header split, or a
5084                         ** secondary piece of a fragmented
5085                         ** split packet.
5086                         */
5087                         mh = rxr->rx_buffers[i].m_pack;
5088                         mh->m_len = plen;
5089                         /* clear buf info for refresh */
5090                         rxbuf->m_pack = NULL;
5091                 }
5092                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5093
5094                 ++processed; /* So we know when to refresh */
5095
5096                 /* Initial frame - setup */
5097                 if (rxr->fmp == NULL) {
5098                         mh->m_pkthdr.len = mh->m_len;
5099                         /* Save the head of the chain */
5100                         rxr->fmp = mh;
5101                         rxr->lmp = mh;
5102                         if (mp != NULL) {
5103                                 /* Add payload if split */
5104                                 mh->m_pkthdr.len += mp->m_len;
5105                                 rxr->lmp = mh->m_next;
5106                         }
5107                 } else {
5108                         /* Chain mbuf's together */
5109                         rxr->lmp->m_next = mh;
5110                         rxr->lmp = rxr->lmp->m_next;
5111                         rxr->fmp->m_pkthdr.len += mh->m_len;
5112                 }
5113
5114                 if (eop) {
5115                         rxr->fmp->m_pkthdr.rcvif = ifp;
5116                         rxr->rx_packets++;
5117                         /* capture data for AIM */
5118                         rxr->packets++;
5119                         rxr->bytes += rxr->fmp->m_pkthdr.len;
5120                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5121
5122                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5123                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
5124
5125                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5126                             (staterr & E1000_RXD_STAT_VP) != 0) {
5127                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5128                                 rxr->fmp->m_flags |= M_VLANTAG;
5129                         }
5130
5131                         /*
5132                          * In case of multiqueue, we have RXCSUM.PCSD bit set
5133                          * and never cleared. This means we have RSS hash
5134                          * available to be used.
5135                          */
5136                         if (adapter->num_queues > 1) {
5137                                 rxr->fmp->m_pkthdr.flowid = 
5138                                     le32toh(cur->wb.lower.hi_dword.rss);
5139                                 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5140                                         case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5141                                                 M_HASHTYPE_SET(rxr->fmp,
5142                                                     M_HASHTYPE_RSS_TCP_IPV4);
5143                                         break;
5144                                         case E1000_RXDADV_RSSTYPE_IPV4:
5145                                                 M_HASHTYPE_SET(rxr->fmp,
5146                                                     M_HASHTYPE_RSS_IPV4);
5147                                         break;
5148                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5149                                                 M_HASHTYPE_SET(rxr->fmp,
5150                                                     M_HASHTYPE_RSS_TCP_IPV6);
5151                                         break;
5152                                         case E1000_RXDADV_RSSTYPE_IPV6_EX:
5153                                                 M_HASHTYPE_SET(rxr->fmp,
5154                                                     M_HASHTYPE_RSS_IPV6_EX);
5155                                         break;
5156                                         case E1000_RXDADV_RSSTYPE_IPV6:
5157                                                 M_HASHTYPE_SET(rxr->fmp,
5158                                                     M_HASHTYPE_RSS_IPV6);
5159                                         break;
5160                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5161                                                 M_HASHTYPE_SET(rxr->fmp,
5162                                                     M_HASHTYPE_RSS_TCP_IPV6_EX);
5163                                         break;
5164                                         default:
5165                                                 /* XXX fallthrough */
5166                                                 M_HASHTYPE_SET(rxr->fmp,
5167                                                     M_HASHTYPE_OPAQUE_HASH);
5168                                 }
5169                         } else {
5170 #ifndef IGB_LEGACY_TX
5171                                 rxr->fmp->m_pkthdr.flowid = que->msix;
5172                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5173 #endif
5174                         }
5175                         sendmp = rxr->fmp;
5176                         /* Make sure to set M_PKTHDR. */
5177                         sendmp->m_flags |= M_PKTHDR;
5178                         rxr->fmp = NULL;
5179                         rxr->lmp = NULL;
5180                 }
5181
5182 next_desc:
5183                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5184                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5185
5186                 /* Advance our pointers to the next descriptor. */
5187                 if (++i == adapter->num_rx_desc)
5188                         i = 0;
5189                 /*
5190                 ** Send to the stack or LRO
5191                 */
5192                 if (sendmp != NULL) {
5193                         rxr->next_to_check = i;
5194                         igb_rx_input(rxr, ifp, sendmp, ptype);
5195                         i = rxr->next_to_check;
5196                         rxdone++;
5197                 }
5198
5199                 /* Every 8 descriptors we go to refresh mbufs */
5200                 if (processed == 8) {
5201                         igb_refresh_mbufs(rxr, i);
5202                         processed = 0;
5203                 }
5204         }
5205
5206         /* Catch any remainders */
5207         if (igb_rx_unrefreshed(rxr))
5208                 igb_refresh_mbufs(rxr, i);
5209
5210         rxr->next_to_check = i;
5211
5212         /*
5213          * Flush any outstanding LRO work
5214          */
5215         tcp_lro_flush_all(lro);
5216
5217         if (done != NULL)
5218                 *done += rxdone;
5219
5220         IGB_RX_UNLOCK(rxr);
5221         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5222 }
5223
5224 /*********************************************************************
5225  *
5226  *  Verify that the hardware indicated that the checksum is valid.
5227  *  Inform the stack about the status of checksum so that stack
5228  *  doesn't spend time verifying the checksum.
5229  *
5230  *********************************************************************/
5231 static void
5232 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5233 {
5234         u16 status = (u16)staterr;
5235         u8  errors = (u8) (staterr >> 24);
5236         int sctp;
5237
5238         /* Ignore Checksum bit is set */
5239         if (status & E1000_RXD_STAT_IXSM) {
5240                 mp->m_pkthdr.csum_flags = 0;
5241                 return;
5242         }
5243
5244         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5245             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5246                 sctp = 1;
5247         else
5248                 sctp = 0;
5249         if (status & E1000_RXD_STAT_IPCS) {
5250                 /* Did it pass? */
5251                 if (!(errors & E1000_RXD_ERR_IPE)) {
5252                         /* IP Checksum Good */
5253                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5254                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5255                 } else
5256                         mp->m_pkthdr.csum_flags = 0;
5257         }
5258
5259         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5260                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5261 #if __FreeBSD_version >= 800000
5262                 if (sctp) /* reassign */
5263                         type = CSUM_SCTP_VALID;
5264 #endif
5265                 /* Did it pass? */
5266                 if (!(errors & E1000_RXD_ERR_TCPE)) {
5267                         mp->m_pkthdr.csum_flags |= type;
5268                         if (sctp == 0)
5269                                 mp->m_pkthdr.csum_data = htons(0xffff);
5270                 }
5271         }
5272         return;
5273 }
5274
5275 /*
5276  * This routine is run via an vlan
5277  * config EVENT
5278  */
5279 static void
5280 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5281 {
5282         struct adapter  *adapter = ifp->if_softc;
5283         u32             index, bit;
5284
5285         if (ifp->if_softc !=  arg)   /* Not our event */
5286                 return;
5287
5288         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5289                 return;
5290
5291         IGB_CORE_LOCK(adapter);
5292         index = (vtag >> 5) & 0x7F;
5293         bit = vtag & 0x1F;
5294         adapter->shadow_vfta[index] |= (1 << bit);
5295         ++adapter->num_vlans;
5296         /* Change hw filter setting */
5297         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5298                 igb_setup_vlan_hw_support(adapter);
5299         IGB_CORE_UNLOCK(adapter);
5300 }
5301
5302 /*
5303  * This routine is run via an vlan
5304  * unconfig EVENT
5305  */
5306 static void
5307 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5308 {
5309         struct adapter  *adapter = ifp->if_softc;
5310         u32             index, bit;
5311
5312         if (ifp->if_softc !=  arg)
5313                 return;
5314
5315         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5316                 return;
5317
5318         IGB_CORE_LOCK(adapter);
5319         index = (vtag >> 5) & 0x7F;
5320         bit = vtag & 0x1F;
5321         adapter->shadow_vfta[index] &= ~(1 << bit);
5322         --adapter->num_vlans;
5323         /* Change hw filter setting */
5324         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5325                 igb_setup_vlan_hw_support(adapter);
5326         IGB_CORE_UNLOCK(adapter);
5327 }
5328
5329 static void
5330 igb_setup_vlan_hw_support(struct adapter *adapter)
5331 {
5332         struct e1000_hw *hw = &adapter->hw;
5333         struct ifnet    *ifp = adapter->ifp;
5334         u32             reg;
5335
5336         if (adapter->vf_ifp) {
5337                 e1000_rlpml_set_vf(hw,
5338                     adapter->max_frame_size + VLAN_TAG_SIZE);
5339                 return;
5340         }
5341
5342         reg = E1000_READ_REG(hw, E1000_CTRL);
5343         reg |= E1000_CTRL_VME;
5344         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5345
5346         /* Enable the Filter Table */
5347         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5348                 reg = E1000_READ_REG(hw, E1000_RCTL);
5349                 reg &= ~E1000_RCTL_CFIEN;
5350                 reg |= E1000_RCTL_VFE;
5351                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5352         }
5353
5354         /* Update the frame size */
5355         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5356             adapter->max_frame_size + VLAN_TAG_SIZE);
5357
5358         /* Don't bother with table if no vlans */
5359         if ((adapter->num_vlans == 0) ||
5360             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5361                 return;
5362         /*
5363         ** A soft reset zero's out the VFTA, so
5364         ** we need to repopulate it now.
5365         */
5366         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5367                 if (adapter->shadow_vfta[i] != 0) {
5368                         if (adapter->vf_ifp)
5369                                 e1000_vfta_set_vf(hw,
5370                                     adapter->shadow_vfta[i], TRUE);
5371                         else
5372                                 e1000_write_vfta(hw,
5373                                     i, adapter->shadow_vfta[i]);
5374                 }
5375 }
5376
5377 static void
5378 igb_enable_intr(struct adapter *adapter)
5379 {
5380         /* With RSS set up what to auto clear */
5381         if (adapter->msix_mem) {
5382                 u32 mask = (adapter->que_mask | adapter->link_mask);
5383                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5384                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5385                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5386                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5387                     E1000_IMS_LSC);
5388         } else {
5389                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5390                     IMS_ENABLE_MASK);
5391         }
5392         E1000_WRITE_FLUSH(&adapter->hw);
5393
5394         return;
5395 }
5396
5397 static void
5398 igb_disable_intr(struct adapter *adapter)
5399 {
5400         if (adapter->msix_mem) {
5401                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5402                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5403         } 
5404         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5405         E1000_WRITE_FLUSH(&adapter->hw);
5406         return;
5407 }
5408
5409 /*
5410  * Bit of a misnomer, what this really means is
5411  * to enable OS management of the system... aka
5412  * to disable special hardware management features 
5413  */
5414 static void
5415 igb_init_manageability(struct adapter *adapter)
5416 {
5417         if (adapter->has_manage) {
5418                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5419                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5420
5421                 /* disable hardware interception of ARP */
5422                 manc &= ~(E1000_MANC_ARP_EN);
5423
5424                 /* enable receiving management packets to the host */
5425                 manc |= E1000_MANC_EN_MNG2HOST;
5426                 manc2h |= 1 << 5;  /* Mng Port 623 */
5427                 manc2h |= 1 << 6;  /* Mng Port 664 */
5428                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5429                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5430         }
5431 }
5432
5433 /*
5434  * Give control back to hardware management
5435  * controller if there is one.
5436  */
5437 static void
5438 igb_release_manageability(struct adapter *adapter)
5439 {
5440         if (adapter->has_manage) {
5441                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5442
5443                 /* re-enable hardware interception of ARP */
5444                 manc |= E1000_MANC_ARP_EN;
5445                 manc &= ~E1000_MANC_EN_MNG2HOST;
5446
5447                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5448         }
5449 }
5450
5451 /*
5452  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5453  * For ASF and Pass Through versions of f/w this means that
5454  * the driver is loaded. 
5455  *
5456  */
5457 static void
5458 igb_get_hw_control(struct adapter *adapter)
5459 {
5460         u32 ctrl_ext;
5461
5462         if (adapter->vf_ifp)
5463                 return;
5464
5465         /* Let firmware know the driver has taken over */
5466         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5467         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5468             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5469 }
5470
5471 /*
5472  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5473  * For ASF and Pass Through versions of f/w this means that the
5474  * driver is no longer loaded.
5475  *
5476  */
5477 static void
5478 igb_release_hw_control(struct adapter *adapter)
5479 {
5480         u32 ctrl_ext;
5481
5482         if (adapter->vf_ifp)
5483                 return;
5484
5485         /* Let firmware taken over control of h/w */
5486         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5487         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5488             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5489 }
5490
5491 static int
5492 igb_is_valid_ether_addr(uint8_t *addr)
5493 {
5494         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5495
5496         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5497                 return (FALSE);
5498         }
5499
5500         return (TRUE);
5501 }
5502
5503
5504 /*
5505  * Enable PCI Wake On Lan capability
5506  */
5507 static void
5508 igb_enable_wakeup(device_t dev)
5509 {
5510         u16     cap, status;
5511         u8      id;
5512
5513         /* First find the capabilities pointer*/
5514         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5515         /* Read the PM Capabilities */
5516         id = pci_read_config(dev, cap, 1);
5517         if (id != PCIY_PMG)     /* Something wrong */
5518                 return;
5519         /* OK, we have the power capabilities, so
5520            now get the status register */
5521         cap += PCIR_POWER_STATUS;
5522         status = pci_read_config(dev, cap, 2);
5523         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5524         pci_write_config(dev, cap, status, 2);
5525         return;
5526 }
5527
5528 static void
5529 igb_led_func(void *arg, int onoff)
5530 {
5531         struct adapter  *adapter = arg;
5532
5533         IGB_CORE_LOCK(adapter);
5534         if (onoff) {
5535                 e1000_setup_led(&adapter->hw);
5536                 e1000_led_on(&adapter->hw);
5537         } else {
5538                 e1000_led_off(&adapter->hw);
5539                 e1000_cleanup_led(&adapter->hw);
5540         }
5541         IGB_CORE_UNLOCK(adapter);
5542 }
5543
5544 static uint64_t
5545 igb_get_vf_counter(if_t ifp, ift_counter cnt)
5546 {
5547         struct adapter *adapter;
5548         struct e1000_vf_stats *stats;
5549 #ifndef IGB_LEGACY_TX
5550         struct tx_ring *txr;
5551         uint64_t rv;
5552 #endif
5553
5554         adapter = if_getsoftc(ifp);
5555         stats = (struct e1000_vf_stats *)adapter->stats;
5556
5557         switch (cnt) {
5558         case IFCOUNTER_IPACKETS:
5559                 return (stats->gprc);
5560         case IFCOUNTER_OPACKETS:
5561                 return (stats->gptc);
5562         case IFCOUNTER_IBYTES:
5563                 return (stats->gorc);
5564         case IFCOUNTER_OBYTES:
5565                 return (stats->gotc);
5566         case IFCOUNTER_IMCASTS:
5567                 return (stats->mprc);
5568         case IFCOUNTER_IERRORS:
5569                 return (adapter->dropped_pkts);
5570         case IFCOUNTER_OERRORS:
5571                 return (adapter->watchdog_events);
5572 #ifndef IGB_LEGACY_TX
5573         case IFCOUNTER_OQDROPS:
5574                 rv = 0;
5575                 txr = adapter->tx_rings;
5576                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5577                         rv += txr->br->br_drops;
5578                 return (rv);
5579 #endif
5580         default:
5581                 return (if_get_counter_default(ifp, cnt));
5582         }
5583 }
5584
5585 static uint64_t
5586 igb_get_counter(if_t ifp, ift_counter cnt)
5587 {
5588         struct adapter *adapter;
5589         struct e1000_hw_stats *stats;
5590 #ifndef IGB_LEGACY_TX
5591         struct tx_ring *txr;
5592         uint64_t rv;
5593 #endif
5594
5595         adapter = if_getsoftc(ifp);
5596         if (adapter->vf_ifp)
5597                 return (igb_get_vf_counter(ifp, cnt));
5598
5599         stats = (struct e1000_hw_stats *)adapter->stats;
5600
5601         switch (cnt) {
5602         case IFCOUNTER_IPACKETS:
5603                 return (stats->gprc);
5604         case IFCOUNTER_OPACKETS:
5605                 return (stats->gptc);
5606         case IFCOUNTER_IBYTES:
5607                 return (stats->gorc);
5608         case IFCOUNTER_OBYTES:
5609                 return (stats->gotc);
5610         case IFCOUNTER_IMCASTS:
5611                 return (stats->mprc);
5612         case IFCOUNTER_OMCASTS:
5613                 return (stats->mptc);
5614         case IFCOUNTER_IERRORS:
5615                 return (adapter->dropped_pkts + stats->rxerrc +
5616                     stats->crcerrs + stats->algnerrc +
5617                     stats->ruc + stats->roc + stats->cexterr);
5618         case IFCOUNTER_OERRORS:
5619                 return (stats->ecol + stats->latecol +
5620                     adapter->watchdog_events);
5621         case IFCOUNTER_COLLISIONS:
5622                 return (stats->colc);
5623         case IFCOUNTER_IQDROPS:
5624                 return (stats->mpc);
5625 #ifndef IGB_LEGACY_TX
5626         case IFCOUNTER_OQDROPS:
5627                 rv = 0;
5628                 txr = adapter->tx_rings;
5629                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5630                         rv += txr->br->br_drops;
5631                 return (rv);
5632 #endif
5633         default:
5634                 return (if_get_counter_default(ifp, cnt));
5635         }
5636 }
5637
5638 /**********************************************************************
5639  *
5640  *  Update the board statistics counters.
5641  *
5642  **********************************************************************/
5643 static void
5644 igb_update_stats_counters(struct adapter *adapter)
5645 {
5646         struct e1000_hw         *hw = &adapter->hw;
5647         struct e1000_hw_stats   *stats;
5648
5649         /* 
5650         ** The virtual function adapter has only a
5651         ** small controlled set of stats, do only 
5652         ** those and return.
5653         */
5654         if (adapter->vf_ifp) {
5655                 igb_update_vf_stats_counters(adapter);
5656                 return;
5657         }
5658
5659         stats = (struct e1000_hw_stats  *)adapter->stats;
5660
5661         if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5662            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5663                 stats->symerrs +=
5664                     E1000_READ_REG(hw,E1000_SYMERRS);
5665                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5666         }
5667
5668         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5669         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5670         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5671         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5672
5673         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5674         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5675         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5676         stats->dc += E1000_READ_REG(hw, E1000_DC);
5677         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5678         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5679         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5680         /*
5681         ** For watchdog management we need to know if we have been
5682         ** paused during the last interval, so capture that here.
5683         */ 
5684         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5685         stats->xoffrxc += adapter->pause_frames;
5686         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5687         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5688         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5689         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5690         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5691         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5692         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5693         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5694         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5695         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5696         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5697         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5698
5699         /* For the 64-bit byte counters the low dword must be read first. */
5700         /* Both registers clear on the read of the high dword */
5701
5702         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5703             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5704         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5705             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5706
5707         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5708         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5709         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5710         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5711         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5712
5713         stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5714         stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5715         stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5716
5717         stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5718             ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5719         stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5720             ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5721
5722         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5723         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5724         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5725         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5726         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5727         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5728         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5729         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5730         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5731         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5732
5733         /* Interrupt Counts */
5734
5735         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5736         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5737         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5738         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5739         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5740         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5741         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5742         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5743         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5744
5745         /* Host to Card Statistics */
5746
5747         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5748         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5749         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5750         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5751         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5752         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5753         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5754         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5755             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5756         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5757             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5758         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5759         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5760         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5761
5762         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5763         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5764         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5765         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5766         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5767         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5768
5769         /* Driver specific counters */
5770         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5771         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5772         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5773         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5774         adapter->packet_buf_alloc_tx =
5775             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5776         adapter->packet_buf_alloc_rx =
5777             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5778 }
5779
5780
5781 /**********************************************************************
5782  *
5783  *  Initialize the VF board statistics counters.
5784  *
5785  **********************************************************************/
5786 static void
5787 igb_vf_init_stats(struct adapter *adapter)
5788 {
5789         struct e1000_hw *hw = &adapter->hw;
5790         struct e1000_vf_stats   *stats;
5791
5792         stats = (struct e1000_vf_stats  *)adapter->stats;
5793         if (stats == NULL)
5794                 return;
5795         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5796         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5797         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5798         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5799         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5800 }
5801  
5802 /**********************************************************************
5803  *
5804  *  Update the VF board statistics counters.
5805  *
5806  **********************************************************************/
5807 static void
5808 igb_update_vf_stats_counters(struct adapter *adapter)
5809 {
5810         struct e1000_hw *hw = &adapter->hw;
5811         struct e1000_vf_stats   *stats;
5812
5813         if (adapter->link_speed == 0)
5814                 return;
5815
5816         stats = (struct e1000_vf_stats  *)adapter->stats;
5817
5818         UPDATE_VF_REG(E1000_VFGPRC,
5819             stats->last_gprc, stats->gprc);
5820         UPDATE_VF_REG(E1000_VFGORC,
5821             stats->last_gorc, stats->gorc);
5822         UPDATE_VF_REG(E1000_VFGPTC,
5823             stats->last_gptc, stats->gptc);
5824         UPDATE_VF_REG(E1000_VFGOTC,
5825             stats->last_gotc, stats->gotc);
5826         UPDATE_VF_REG(E1000_VFMPRC,
5827             stats->last_mprc, stats->mprc);
5828 }
5829
5830 /* Export a single 32-bit register via a read-only sysctl. */
5831 static int
5832 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5833 {
5834         struct adapter *adapter;
5835         u_int val;
5836
5837         adapter = oidp->oid_arg1;
5838         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5839         return (sysctl_handle_int(oidp, &val, 0, req));
5840 }
5841
5842 /*
5843 **  Tuneable interrupt rate handler
5844 */
5845 static int
5846 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5847 {
5848         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5849         int                     error;
5850         u32                     reg, usec, rate;
5851                         
5852         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5853         usec = ((reg & 0x7FFC) >> 2);
5854         if (usec > 0)
5855                 rate = 1000000 / usec;
5856         else
5857                 rate = 0;
5858         error = sysctl_handle_int(oidp, &rate, 0, req);
5859         if (error || !req->newptr)
5860                 return error;
5861         return 0;
5862 }
5863
5864 /*
5865  * Add sysctl variables, one per statistic, to the system.
5866  */
5867 static void
5868 igb_add_hw_stats(struct adapter *adapter)
5869 {
5870         device_t dev = adapter->dev;
5871
5872         struct tx_ring *txr = adapter->tx_rings;
5873         struct rx_ring *rxr = adapter->rx_rings;
5874
5875         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5876         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5877         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5878         struct e1000_hw_stats *stats = adapter->stats;
5879
5880         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5881         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5882
5883 #define QUEUE_NAME_LEN 32
5884         char namebuf[QUEUE_NAME_LEN];
5885
5886         /* Driver Statistics */
5887         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5888                         CTLFLAG_RD, &adapter->dropped_pkts,
5889                         "Driver dropped packets");
5890         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 
5891                         CTLFLAG_RD, &adapter->link_irq,
5892                         "Link MSIX IRQ Handled");
5893         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5894                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5895                         "Defragmenting mbuf chain failed");
5896         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5897                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5898                         "Driver tx dma failure in xmit");
5899         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5900                         CTLFLAG_RD, &adapter->rx_overruns,
5901                         "RX overruns");
5902         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5903                         CTLFLAG_RD, &adapter->watchdog_events,
5904                         "Watchdog timeouts");
5905
5906         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5907                         CTLFLAG_RD, &adapter->device_control,
5908                         "Device Control Register");
5909         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5910                         CTLFLAG_RD, &adapter->rx_control,
5911                         "Receiver Control Register");
5912         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5913                         CTLFLAG_RD, &adapter->int_mask,
5914                         "Interrupt Mask");
5915         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5916                         CTLFLAG_RD, &adapter->eint_mask,
5917                         "Extended Interrupt Mask");
5918         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5919                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5920                         "Transmit Buffer Packet Allocation");
5921         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5922                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5923                         "Receive Buffer Packet Allocation");
5924         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5925                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5926                         "Flow Control High Watermark");
5927         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5928                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5929                         "Flow Control Low Watermark");
5930
5931         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5932                 struct lro_ctrl *lro = &rxr->lro;
5933
5934                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5935                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5936                                             CTLFLAG_RD, NULL, "Queue Name");
5937                 queue_list = SYSCTL_CHILDREN(queue_node);
5938
5939                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5940                                 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5941                                 sizeof(&adapter->queues[i]),
5942                                 igb_sysctl_interrupt_rate_handler,
5943                                 "IU", "Interrupt Rate");
5944
5945                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5946                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5947                                 igb_sysctl_reg_handler, "IU",
5948                                 "Transmit Descriptor Head");
5949                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5950                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5951                                 igb_sysctl_reg_handler, "IU",
5952                                 "Transmit Descriptor Tail");
5953                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5954                                 CTLFLAG_RD, &txr->no_desc_avail,
5955                                 "Queue Descriptors Unavailable");
5956                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5957                                 CTLFLAG_RD, &txr->total_packets,
5958                                 "Queue Packets Transmitted");
5959
5960                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5961                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5962                                 igb_sysctl_reg_handler, "IU",
5963                                 "Receive Descriptor Head");
5964                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5965                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5966                                 igb_sysctl_reg_handler, "IU",
5967                                 "Receive Descriptor Tail");
5968                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5969                                 CTLFLAG_RD, &rxr->rx_packets,
5970                                 "Queue Packets Received");
5971                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5972                                 CTLFLAG_RD, &rxr->rx_bytes,
5973                                 "Queue Bytes Received");
5974                 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
5975                                 CTLFLAG_RD, &lro->lro_queued, 0,
5976                                 "LRO Queued");
5977                 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
5978                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5979                                 "LRO Flushed");
5980         }
5981
5982         /* MAC stats get their own sub node */
5983
5984         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5985                                     CTLFLAG_RD, NULL, "MAC Statistics");
5986         stat_list = SYSCTL_CHILDREN(stat_node);
5987
5988         /*
5989         ** VF adapter has a very limited set of stats
5990         ** since its not managing the metal, so to speak.
5991         */
5992         if (adapter->vf_ifp) {
5993         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5994                         CTLFLAG_RD, &stats->gprc,
5995                         "Good Packets Received");
5996         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5997                         CTLFLAG_RD, &stats->gptc,
5998                         "Good Packets Transmitted");
5999         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
6000                         CTLFLAG_RD, &stats->gorc, 
6001                         "Good Octets Received"); 
6002         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
6003                         CTLFLAG_RD, &stats->gotc, 
6004                         "Good Octets Transmitted"); 
6005         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6006                         CTLFLAG_RD, &stats->mprc,
6007                         "Multicast Packets Received");
6008                 return;
6009         }
6010
6011         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
6012                         CTLFLAG_RD, &stats->ecol,
6013                         "Excessive collisions");
6014         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
6015                         CTLFLAG_RD, &stats->scc,
6016                         "Single collisions");
6017         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
6018                         CTLFLAG_RD, &stats->mcc,
6019                         "Multiple collisions");
6020         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
6021                         CTLFLAG_RD, &stats->latecol,
6022                         "Late collisions");
6023         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
6024                         CTLFLAG_RD, &stats->colc,
6025                         "Collision Count");
6026         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
6027                         CTLFLAG_RD, &stats->symerrs,
6028                         "Symbol Errors");
6029         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
6030                         CTLFLAG_RD, &stats->sec,
6031                         "Sequence Errors");
6032         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
6033                         CTLFLAG_RD, &stats->dc,
6034                         "Defer Count");
6035         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
6036                         CTLFLAG_RD, &stats->mpc,
6037                         "Missed Packets");
6038         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
6039                         CTLFLAG_RD, &stats->rlec,
6040                         "Receive Length Errors");
6041         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
6042                         CTLFLAG_RD, &stats->rnbc,
6043                         "Receive No Buffers");
6044         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
6045                         CTLFLAG_RD, &stats->ruc,
6046                         "Receive Undersize");
6047         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
6048                         CTLFLAG_RD, &stats->rfc,
6049                         "Fragmented Packets Received");
6050         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
6051                         CTLFLAG_RD, &stats->roc,
6052                         "Oversized Packets Received");
6053         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
6054                         CTLFLAG_RD, &stats->rjc,
6055                         "Recevied Jabber");
6056         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6057                         CTLFLAG_RD, &stats->rxerrc,
6058                         "Receive Errors");
6059         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6060                         CTLFLAG_RD, &stats->crcerrs,
6061                         "CRC errors");
6062         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6063                         CTLFLAG_RD, &stats->algnerrc,
6064                         "Alignment Errors");
6065         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6066                         CTLFLAG_RD, &stats->tncrs,
6067                         "Transmit with No CRS");
6068         /* On 82575 these are collision counts */
6069         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6070                         CTLFLAG_RD, &stats->cexterr,
6071                         "Collision/Carrier extension errors");
6072         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6073                         CTLFLAG_RD, &stats->xonrxc,
6074                         "XON Received");
6075         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6076                         CTLFLAG_RD, &stats->xontxc,
6077                         "XON Transmitted");
6078         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6079                         CTLFLAG_RD, &stats->xoffrxc,
6080                         "XOFF Received");
6081         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6082                         CTLFLAG_RD, &stats->xofftxc,
6083                         "XOFF Transmitted");
6084         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6085                         CTLFLAG_RD, &stats->fcruc,
6086                         "Unsupported Flow Control Received");
6087         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6088                         CTLFLAG_RD, &stats->mgprc,
6089                         "Management Packets Received");
6090         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6091                         CTLFLAG_RD, &stats->mgpdc,
6092                         "Management Packets Dropped");
6093         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6094                         CTLFLAG_RD, &stats->mgptc,
6095                         "Management Packets Transmitted");
6096         /* Packet Reception Stats */
6097         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6098                         CTLFLAG_RD, &stats->tpr,
6099                         "Total Packets Received");
6100         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6101                         CTLFLAG_RD, &stats->gprc,
6102                         "Good Packets Received");
6103         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6104                         CTLFLAG_RD, &stats->bprc,
6105                         "Broadcast Packets Received");
6106         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6107                         CTLFLAG_RD, &stats->mprc,
6108                         "Multicast Packets Received");
6109         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6110                         CTLFLAG_RD, &stats->prc64,
6111                         "64 byte frames received");
6112         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6113                         CTLFLAG_RD, &stats->prc127,
6114                         "65-127 byte frames received");
6115         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6116                         CTLFLAG_RD, &stats->prc255,
6117                         "128-255 byte frames received");
6118         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6119                         CTLFLAG_RD, &stats->prc511,
6120                         "256-511 byte frames received");
6121         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6122                         CTLFLAG_RD, &stats->prc1023,
6123                         "512-1023 byte frames received");
6124         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6125                         CTLFLAG_RD, &stats->prc1522,
6126                         "1023-1522 byte frames received");
6127         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
6128                         CTLFLAG_RD, &stats->gorc, 
6129                         "Good Octets Received");
6130         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 
6131                         CTLFLAG_RD, &stats->tor, 
6132                         "Total Octets Received");
6133
6134         /* Packet Transmission Stats */
6135         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
6136                         CTLFLAG_RD, &stats->gotc, 
6137                         "Good Octets Transmitted"); 
6138         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 
6139                         CTLFLAG_RD, &stats->tot, 
6140                         "Total Octets Transmitted");
6141         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6142                         CTLFLAG_RD, &stats->tpt,
6143                         "Total Packets Transmitted");
6144         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6145                         CTLFLAG_RD, &stats->gptc,
6146                         "Good Packets Transmitted");
6147         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6148                         CTLFLAG_RD, &stats->bptc,
6149                         "Broadcast Packets Transmitted");
6150         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6151                         CTLFLAG_RD, &stats->mptc,
6152                         "Multicast Packets Transmitted");
6153         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6154                         CTLFLAG_RD, &stats->ptc64,
6155                         "64 byte frames transmitted");
6156         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6157                         CTLFLAG_RD, &stats->ptc127,
6158                         "65-127 byte frames transmitted");
6159         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6160                         CTLFLAG_RD, &stats->ptc255,
6161                         "128-255 byte frames transmitted");
6162         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6163                         CTLFLAG_RD, &stats->ptc511,
6164                         "256-511 byte frames transmitted");
6165         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6166                         CTLFLAG_RD, &stats->ptc1023,
6167                         "512-1023 byte frames transmitted");
6168         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6169                         CTLFLAG_RD, &stats->ptc1522,
6170                         "1024-1522 byte frames transmitted");
6171         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6172                         CTLFLAG_RD, &stats->tsctc,
6173                         "TSO Contexts Transmitted");
6174         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6175                         CTLFLAG_RD, &stats->tsctfc,
6176                         "TSO Contexts Failed");
6177
6178
6179         /* Interrupt Stats */
6180
6181         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
6182                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
6183         int_list = SYSCTL_CHILDREN(int_node);
6184
6185         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6186                         CTLFLAG_RD, &stats->iac,
6187                         "Interrupt Assertion Count");
6188
6189         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6190                         CTLFLAG_RD, &stats->icrxptc,
6191                         "Interrupt Cause Rx Pkt Timer Expire Count");
6192
6193         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6194                         CTLFLAG_RD, &stats->icrxatc,
6195                         "Interrupt Cause Rx Abs Timer Expire Count");
6196
6197         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6198                         CTLFLAG_RD, &stats->ictxptc,
6199                         "Interrupt Cause Tx Pkt Timer Expire Count");
6200
6201         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6202                         CTLFLAG_RD, &stats->ictxatc,
6203                         "Interrupt Cause Tx Abs Timer Expire Count");
6204
6205         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6206                         CTLFLAG_RD, &stats->ictxqec,
6207                         "Interrupt Cause Tx Queue Empty Count");
6208
6209         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6210                         CTLFLAG_RD, &stats->ictxqmtc,
6211                         "Interrupt Cause Tx Queue Min Thresh Count");
6212
6213         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6214                         CTLFLAG_RD, &stats->icrxdmtc,
6215                         "Interrupt Cause Rx Desc Min Thresh Count");
6216
6217         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6218                         CTLFLAG_RD, &stats->icrxoc,
6219                         "Interrupt Cause Receiver Overrun Count");
6220
6221         /* Host to Card Stats */
6222
6223         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
6224                                     CTLFLAG_RD, NULL, 
6225                                     "Host to Card Statistics");
6226
6227         host_list = SYSCTL_CHILDREN(host_node);
6228
6229         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6230                         CTLFLAG_RD, &stats->cbtmpc,
6231                         "Circuit Breaker Tx Packet Count");
6232
6233         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6234                         CTLFLAG_RD, &stats->htdpmc,
6235                         "Host Transmit Discarded Packets");
6236
6237         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6238                         CTLFLAG_RD, &stats->rpthc,
6239                         "Rx Packets To Host");
6240
6241         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6242                         CTLFLAG_RD, &stats->cbrmpc,
6243                         "Circuit Breaker Rx Packet Count");
6244
6245         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6246                         CTLFLAG_RD, &stats->cbrdpc,
6247                         "Circuit Breaker Rx Dropped Count");
6248
6249         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6250                         CTLFLAG_RD, &stats->hgptc,
6251                         "Host Good Packets Tx Count");
6252
6253         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6254                         CTLFLAG_RD, &stats->htcbdpc,
6255                         "Host Tx Circuit Breaker Dropped Count");
6256
6257         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6258                         CTLFLAG_RD, &stats->hgorc,
6259                         "Host Good Octets Received Count");
6260
6261         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6262                         CTLFLAG_RD, &stats->hgotc,
6263                         "Host Good Octets Transmit Count");
6264
6265         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6266                         CTLFLAG_RD, &stats->lenerrs,
6267                         "Length Errors");
6268
6269         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6270                         CTLFLAG_RD, &stats->scvpc,
6271                         "SerDes/SGMII Code Violation Pkt Count");
6272
6273         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6274                         CTLFLAG_RD, &stats->hrmpc,
6275                         "Header Redirection Missed Packet Count");
6276 }
6277
6278
6279 /**********************************************************************
6280  *
6281  *  This routine provides a way to dump out the adapter eeprom,
6282  *  often a useful debug/service tool. This only dumps the first
6283  *  32 words, stuff that matters is in that extent.
6284  *
6285  **********************************************************************/
6286 static int
6287 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6288 {
6289         struct adapter *adapter;
6290         int error;
6291         int result;
6292
6293         result = -1;
6294         error = sysctl_handle_int(oidp, &result, 0, req);
6295
6296         if (error || !req->newptr)
6297                 return (error);
6298
6299         /*
6300          * This value will cause a hex dump of the
6301          * first 32 16-bit words of the EEPROM to
6302          * the screen.
6303          */
6304         if (result == 1) {
6305                 adapter = (struct adapter *)arg1;
6306                 igb_print_nvm_info(adapter);
6307         }
6308
6309         return (error);
6310 }
6311
6312 static void
6313 igb_print_nvm_info(struct adapter *adapter)
6314 {
6315         u16     eeprom_data;
6316         int     i, j, row = 0;
6317
6318         /* Its a bit crude, but it gets the job done */
6319         printf("\nInterface EEPROM Dump:\n");
6320         printf("Offset\n0x0000  ");
6321         for (i = 0, j = 0; i < 32; i++, j++) {
6322                 if (j == 8) { /* Make the offset block */
6323                         j = 0; ++row;
6324                         printf("\n0x00%x0  ",row);
6325                 }
6326                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6327                 printf("%04x ", eeprom_data);
6328         }
6329         printf("\n");
6330 }
6331
6332 static void
6333 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6334         const char *description, int *limit, int value)
6335 {
6336         *limit = value;
6337         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6338             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6339             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6340 }
6341
6342 /*
6343 ** Set flow control using sysctl:
6344 ** Flow control values:
6345 **      0 - off
6346 **      1 - rx pause
6347 **      2 - tx pause
6348 **      3 - full
6349 */
6350 static int
6351 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6352 {
6353         int             error;
6354         static int      input = 3; /* default is full */
6355         struct adapter  *adapter = (struct adapter *) arg1;
6356
6357         error = sysctl_handle_int(oidp, &input, 0, req);
6358
6359         if ((error) || (req->newptr == NULL))
6360                 return (error);
6361
6362         switch (input) {
6363                 case e1000_fc_rx_pause:
6364                 case e1000_fc_tx_pause:
6365                 case e1000_fc_full:
6366                 case e1000_fc_none:
6367                         adapter->hw.fc.requested_mode = input;
6368                         adapter->fc = input;
6369                         break;
6370                 default:
6371                         /* Do nothing */
6372                         return (error);
6373         }
6374
6375         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6376         e1000_force_mac_fc(&adapter->hw);
6377         /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6378         return (error);
6379 }
6380
6381 /*
6382 ** Manage DMA Coalesce:
6383 ** Control values:
6384 **      0/1 - off/on
6385 **      Legal timer values are:
6386 **      250,500,1000-10000 in thousands
6387 */
6388 static int
6389 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6390 {
6391         struct adapter *adapter = (struct adapter *) arg1;
6392         int             error;
6393
6394         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6395
6396         if ((error) || (req->newptr == NULL))
6397                 return (error);
6398
6399         switch (adapter->dmac) {
6400                 case 0:
6401                         /* Disabling */
6402                         break;
6403                 case 1: /* Just enable and use default */
6404                         adapter->dmac = 1000;
6405                         break;
6406                 case 250:
6407                 case 500:
6408                 case 1000:
6409                 case 2000:
6410                 case 3000:
6411                 case 4000:
6412                 case 5000:
6413                 case 6000:
6414                 case 7000:
6415                 case 8000:
6416                 case 9000:
6417                 case 10000:
6418                         /* Legal values - allow */
6419                         break;
6420                 default:
6421                         /* Do nothing, illegal value */
6422                         adapter->dmac = 0;
6423                         return (EINVAL);
6424         }
6425         /* Reinit the interface */
6426         igb_init(adapter);
6427         return (error);
6428 }
6429
6430 /*
6431 ** Manage Energy Efficient Ethernet:
6432 ** Control values:
6433 **     0/1 - enabled/disabled
6434 */
6435 static int
6436 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6437 {
6438         struct adapter  *adapter = (struct adapter *) arg1;
6439         int             error, value;
6440
6441         value = adapter->hw.dev_spec._82575.eee_disable;
6442         error = sysctl_handle_int(oidp, &value, 0, req);
6443         if (error || req->newptr == NULL)
6444                 return (error);
6445         IGB_CORE_LOCK(adapter);
6446         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6447         igb_init_locked(adapter);
6448         IGB_CORE_UNLOCK(adapter);
6449         return (0);
6450 }