]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_igb.c
Revert 287914,287762.
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "opt_rss.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #include "opt_altq.h"
43 #endif
44
45 #include "if_igb.h"
46
47 /*********************************************************************
48  *  Driver version:
49  *********************************************************************/
50 char igb_driver_version[] = "2.5.2";
51
52
53 /*********************************************************************
54  *  PCI Device ID Table
55  *
56  *  Used by probe to select devices to load on
57  *  Last field stores an index into e1000_strings
58  *  Last entry must be all 0s
59  *
60  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61  *********************************************************************/
62
63 static igb_vendor_info_t igb_vendor_info_array[] =
64 {
65         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
72         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
79         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
81         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
88         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,  0, 0, 0},
89         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
90         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,  0, 0, 0},
91         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
93         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,  0, 0, 0},
98         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
99         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,  0, 0, 0},
100         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
101         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,  0, 0, 0},
104         /* required last entry */
105         {0, 0, 0, 0, 0}
106 };
107
108 /*********************************************************************
109  *  Table of branding strings for all supported NICs.
110  *********************************************************************/
111
112 static char *igb_strings[] = {
113         "Intel(R) PRO/1000 Network Connection"
114 };
115
116 /*********************************************************************
117  *  Function prototypes
118  *********************************************************************/
119 static int      igb_probe(device_t);
120 static int      igb_attach(device_t);
121 static int      igb_detach(device_t);
122 static int      igb_shutdown(device_t);
123 static int      igb_suspend(device_t);
124 static int      igb_resume(device_t);
125 #ifndef IGB_LEGACY_TX
126 static int      igb_mq_start(struct ifnet *, struct mbuf *);
127 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128 static void     igb_qflush(struct ifnet *);
129 static void     igb_deferred_mq_start(void *, int);
130 #else
131 static void     igb_start(struct ifnet *);
132 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133 #endif
134 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
135 static uint64_t igb_get_counter(if_t, ift_counter);
136 static void     igb_init(void *);
137 static void     igb_init_locked(struct adapter *);
138 static void     igb_stop(void *);
139 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
140 static int      igb_media_change(struct ifnet *);
141 static void     igb_identify_hardware(struct adapter *);
142 static int      igb_allocate_pci_resources(struct adapter *);
143 static int      igb_allocate_msix(struct adapter *);
144 static int      igb_allocate_legacy(struct adapter *);
145 static int      igb_setup_msix(struct adapter *);
146 static void     igb_free_pci_resources(struct adapter *);
147 static void     igb_local_timer(void *);
148 static void     igb_reset(struct adapter *);
149 static int      igb_setup_interface(device_t, struct adapter *);
150 static int      igb_allocate_queues(struct adapter *);
151 static void     igb_configure_queues(struct adapter *);
152
153 static int      igb_allocate_transmit_buffers(struct tx_ring *);
154 static void     igb_setup_transmit_structures(struct adapter *);
155 static void     igb_setup_transmit_ring(struct tx_ring *);
156 static void     igb_initialize_transmit_units(struct adapter *);
157 static void     igb_free_transmit_structures(struct adapter *);
158 static void     igb_free_transmit_buffers(struct tx_ring *);
159
160 static int      igb_allocate_receive_buffers(struct rx_ring *);
161 static int      igb_setup_receive_structures(struct adapter *);
162 static int      igb_setup_receive_ring(struct rx_ring *);
163 static void     igb_initialize_receive_units(struct adapter *);
164 static void     igb_free_receive_structures(struct adapter *);
165 static void     igb_free_receive_buffers(struct rx_ring *);
166 static void     igb_free_receive_ring(struct rx_ring *);
167
168 static void     igb_enable_intr(struct adapter *);
169 static void     igb_disable_intr(struct adapter *);
170 static void     igb_update_stats_counters(struct adapter *);
171 static bool     igb_txeof(struct tx_ring *);
172
173 static __inline void igb_rx_discard(struct rx_ring *, int);
174 static __inline void igb_rx_input(struct rx_ring *,
175                     struct ifnet *, struct mbuf *, u32);
176
177 static bool     igb_rxeof(struct igb_queue *, int, int *);
178 static void     igb_rx_checksum(u32, struct mbuf *, u32);
179 static int      igb_tx_ctx_setup(struct tx_ring *,
180                     struct mbuf *, u32 *, u32 *);
181 static int      igb_tso_setup(struct tx_ring *,
182                     struct mbuf *, u32 *, u32 *);
183 static void     igb_set_promisc(struct adapter *);
184 static void     igb_disable_promisc(struct adapter *);
185 static void     igb_set_multi(struct adapter *);
186 static void     igb_update_link_status(struct adapter *);
187 static void     igb_refresh_mbufs(struct rx_ring *, int);
188
189 static void     igb_register_vlan(void *, struct ifnet *, u16);
190 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
191 static void     igb_setup_vlan_hw_support(struct adapter *);
192
193 static int      igb_xmit(struct tx_ring *, struct mbuf **);
194 static int      igb_dma_malloc(struct adapter *, bus_size_t,
195                     struct igb_dma_alloc *, int);
196 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198 static void     igb_print_nvm_info(struct adapter *);
199 static int      igb_is_valid_ether_addr(u8 *);
200 static void     igb_add_hw_stats(struct adapter *);
201
202 static void     igb_vf_init_stats(struct adapter *);
203 static void     igb_update_vf_stats_counters(struct adapter *);
204
205 /* Management and WOL Support */
206 static void     igb_init_manageability(struct adapter *);
207 static void     igb_release_manageability(struct adapter *);
208 static void     igb_get_hw_control(struct adapter *);
209 static void     igb_release_hw_control(struct adapter *);
210 static void     igb_enable_wakeup(device_t);
211 static void     igb_led_func(void *, int);
212
213 static int      igb_irq_fast(void *);
214 static void     igb_msix_que(void *);
215 static void     igb_msix_link(void *);
216 static void     igb_handle_que(void *context, int pending);
217 static void     igb_handle_link(void *context, int pending);
218 static void     igb_handle_link_locked(struct adapter *);
219
220 static void     igb_set_sysctl_value(struct adapter *, const char *,
221                     const char *, int *, int);
222 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226 #ifdef DEVICE_POLLING
227 static poll_handler_t igb_poll;
228 #endif /* POLLING */
229
230 /*********************************************************************
231  *  FreeBSD Device Interface Entry Points
232  *********************************************************************/
233
234 static device_method_t igb_methods[] = {
235         /* Device interface */
236         DEVMETHOD(device_probe, igb_probe),
237         DEVMETHOD(device_attach, igb_attach),
238         DEVMETHOD(device_detach, igb_detach),
239         DEVMETHOD(device_shutdown, igb_shutdown),
240         DEVMETHOD(device_suspend, igb_suspend),
241         DEVMETHOD(device_resume, igb_resume),
242         DEVMETHOD_END
243 };
244
245 static driver_t igb_driver = {
246         "igb", igb_methods, sizeof(struct adapter),
247 };
248
249 static devclass_t igb_devclass;
250 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251 MODULE_DEPEND(igb, pci, 1, 1, 1);
252 MODULE_DEPEND(igb, ether, 1, 1, 1);
253 #ifdef DEV_NETMAP
254 MODULE_DEPEND(igb, netmap, 1, 1, 1);
255 #endif /* DEV_NETMAP */
256
257 /*********************************************************************
258  *  Tunable default values.
259  *********************************************************************/
260
261 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263 /* Descriptor defaults */
264 static int igb_rxd = IGB_DEFAULT_RXD;
265 static int igb_txd = IGB_DEFAULT_TXD;
266 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267     "Number of receive descriptors per queue");
268 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269     "Number of transmit descriptors per queue");
270
271 /*
272 ** AIM: Adaptive Interrupt Moderation
273 ** which means that the interrupt rate
274 ** is varied over time based on the
275 ** traffic for that interrupt vector
276 */
277 static int igb_enable_aim = TRUE;
278 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279     "Enable adaptive interrupt moderation");
280
281 /*
282  * MSIX should be the default for best performance,
283  * but this allows it to be forced off for testing.
284  */         
285 static int igb_enable_msix = 1;
286 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287     "Enable MSI-X interrupts");
288
289 /*
290 ** Tuneable Interrupt rate
291 */
292 static int igb_max_interrupt_rate = 8000;
293 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296 #ifndef IGB_LEGACY_TX
297 /*
298 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
299 */
300 static int igb_buf_ring_size = IGB_BR_SIZE;
301 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302     &igb_buf_ring_size, 0, "Size of the bufring");
303 #endif
304
305 /*
306 ** Header split causes the packet header to
307 ** be dma'd to a seperate mbuf from the payload.
308 ** this can have memory alignment benefits. But
309 ** another plus is that small packets often fit
310 ** into the header and thus use no cluster. Its
311 ** a very workload dependent type feature.
312 */
313 static int igb_header_split = FALSE;
314 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315     "Enable receive mbuf header split");
316
317 /*
318 ** This will autoconfigure based on the
319 ** number of CPUs and max supported
320 ** MSIX messages if left at 0.
321 */
322 static int igb_num_queues = 0;
323 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324     "Number of queues to configure, 0 indicates autoconfigure");
325
326 /*
327 ** Global variable to store last used CPU when binding queues
328 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
329 ** queue is bound to a cpu.
330 */
331 static int igb_last_bind_cpu = -1;
332
333 /* How many packets rxeof tries to clean at a time */
334 static int igb_rx_process_limit = 100;
335 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336     &igb_rx_process_limit, 0,
337     "Maximum number of received packets to process at a time, -1 means unlimited");
338
339 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
340 #include <dev/netmap/if_igb_netmap.h>
341 #endif /* DEV_NETMAP */
342 /*********************************************************************
343  *  Device identification routine
344  *
345  *  igb_probe determines if the driver should be loaded on
346  *  adapter based on PCI vendor/device id of the adapter.
347  *
348  *  return BUS_PROBE_DEFAULT on success, positive on failure
349  *********************************************************************/
350
351 static int
352 igb_probe(device_t dev)
353 {
354         char            adapter_name[256];
355         uint16_t        pci_vendor_id = 0;
356         uint16_t        pci_device_id = 0;
357         uint16_t        pci_subvendor_id = 0;
358         uint16_t        pci_subdevice_id = 0;
359         igb_vendor_info_t *ent;
360
361         INIT_DEBUGOUT("igb_probe: begin");
362
363         pci_vendor_id = pci_get_vendor(dev);
364         if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
365                 return (ENXIO);
366
367         pci_device_id = pci_get_device(dev);
368         pci_subvendor_id = pci_get_subvendor(dev);
369         pci_subdevice_id = pci_get_subdevice(dev);
370
371         ent = igb_vendor_info_array;
372         while (ent->vendor_id != 0) {
373                 if ((pci_vendor_id == ent->vendor_id) &&
374                     (pci_device_id == ent->device_id) &&
375
376                     ((pci_subvendor_id == ent->subvendor_id) ||
377                     (ent->subvendor_id == 0)) &&
378
379                     ((pci_subdevice_id == ent->subdevice_id) ||
380                     (ent->subdevice_id == 0))) {
381                         sprintf(adapter_name, "%s, Version - %s",
382                                 igb_strings[ent->index],
383                                 igb_driver_version);
384                         device_set_desc_copy(dev, adapter_name);
385                         return (BUS_PROBE_DEFAULT);
386                 }
387                 ent++;
388         }
389         return (ENXIO);
390 }
391
392 /*********************************************************************
393  *  Device initialization routine
394  *
395  *  The attach entry point is called when the driver is being loaded.
396  *  This routine identifies the type of hardware, allocates all resources
397  *  and initializes the hardware.
398  *
399  *  return 0 on success, positive on failure
400  *********************************************************************/
401
402 static int
403 igb_attach(device_t dev)
404 {
405         struct adapter  *adapter;
406         int             error = 0;
407         u16             eeprom_data;
408
409         INIT_DEBUGOUT("igb_attach: begin");
410
411         if (resource_disabled("igb", device_get_unit(dev))) {
412                 device_printf(dev, "Disabled by device hint\n");
413                 return (ENXIO);
414         }
415
416         adapter = device_get_softc(dev);
417         adapter->dev = adapter->osdep.dev = dev;
418         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
419
420         /* SYSCTLs */
421         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
422             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
423             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
424             igb_sysctl_nvm_info, "I", "NVM Information");
425
426         igb_set_sysctl_value(adapter, "enable_aim",
427             "Interrupt Moderation", &adapter->enable_aim,
428             igb_enable_aim);
429
430         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
431             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
432             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
433             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
434
435         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
436
437         /* Determine hardware and mac info */
438         igb_identify_hardware(adapter);
439
440         /* Setup PCI resources */
441         if (igb_allocate_pci_resources(adapter)) {
442                 device_printf(dev, "Allocation of PCI resources failed\n");
443                 error = ENXIO;
444                 goto err_pci;
445         }
446
447         /* Do Shared Code initialization */
448         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
449                 device_printf(dev, "Setup of Shared code failed\n");
450                 error = ENXIO;
451                 goto err_pci;
452         }
453
454         e1000_get_bus_info(&adapter->hw);
455
456         /* Sysctl for limiting the amount of work done in the taskqueue */
457         igb_set_sysctl_value(adapter, "rx_processing_limit",
458             "max number of rx packets to process",
459             &adapter->rx_process_limit, igb_rx_process_limit);
460
461         /*
462          * Validate number of transmit and receive descriptors. It
463          * must not exceed hardware maximum, and must be multiple
464          * of E1000_DBA_ALIGN.
465          */
466         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
467             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
468                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
469                     IGB_DEFAULT_TXD, igb_txd);
470                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
471         } else
472                 adapter->num_tx_desc = igb_txd;
473         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
474             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
475                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
476                     IGB_DEFAULT_RXD, igb_rxd);
477                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
478         } else
479                 adapter->num_rx_desc = igb_rxd;
480
481         adapter->hw.mac.autoneg = DO_AUTO_NEG;
482         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
483         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
484
485         /* Copper options */
486         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
487                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
488                 adapter->hw.phy.disable_polarity_correction = FALSE;
489                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
490         }
491
492         /*
493          * Set the frame limits assuming
494          * standard ethernet sized frames.
495          */
496         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
497
498         /*
499         ** Allocate and Setup Queues
500         */
501         if (igb_allocate_queues(adapter)) {
502                 error = ENOMEM;
503                 goto err_pci;
504         }
505
506         /* Allocate the appropriate stats memory */
507         if (adapter->vf_ifp) {
508                 adapter->stats =
509                     (struct e1000_vf_stats *)malloc(sizeof \
510                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
511                 igb_vf_init_stats(adapter);
512         } else
513                 adapter->stats =
514                     (struct e1000_hw_stats *)malloc(sizeof \
515                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
516         if (adapter->stats == NULL) {
517                 device_printf(dev, "Can not allocate stats memory\n");
518                 error = ENOMEM;
519                 goto err_late;
520         }
521
522         /* Allocate multicast array memory. */
523         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
524             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
525         if (adapter->mta == NULL) {
526                 device_printf(dev, "Can not allocate multicast setup array\n");
527                 error = ENOMEM;
528                 goto err_late;
529         }
530
531         /* Some adapter-specific advanced features */
532         if (adapter->hw.mac.type >= e1000_i350) {
533                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
534                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
535                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
536                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
537                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
538                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
539                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
540                     adapter, 0, igb_sysctl_eee, "I",
541                     "Disable Energy Efficient Ethernet");
542                 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
543                         if (adapter->hw.mac.type == e1000_i354)
544                                 e1000_set_eee_i354(&adapter->hw);
545                         else
546                                 e1000_set_eee_i350(&adapter->hw);
547                 }
548         }
549
550         /*
551         ** Start from a known state, this is
552         ** important in reading the nvm and
553         ** mac from that.
554         */
555         e1000_reset_hw(&adapter->hw);
556
557         /* Make sure we have a good EEPROM before we read from it */
558         if (((adapter->hw.mac.type != e1000_i210) &&
559             (adapter->hw.mac.type != e1000_i211)) &&
560             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
561                 /*
562                 ** Some PCI-E parts fail the first check due to
563                 ** the link being in sleep state, call it again,
564                 ** if it fails a second time its a real issue.
565                 */
566                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
567                         device_printf(dev,
568                             "The EEPROM Checksum Is Not Valid\n");
569                         error = EIO;
570                         goto err_late;
571                 }
572         }
573
574         /*
575         ** Copy the permanent MAC address out of the EEPROM
576         */
577         if (e1000_read_mac_addr(&adapter->hw) < 0) {
578                 device_printf(dev, "EEPROM read error while reading MAC"
579                     " address\n");
580                 error = EIO;
581                 goto err_late;
582         }
583         /* Check its sanity */
584         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
585                 device_printf(dev, "Invalid MAC address\n");
586                 error = EIO;
587                 goto err_late;
588         }
589
590         /* Setup OS specific network interface */
591         if (igb_setup_interface(dev, adapter) != 0)
592                 goto err_late;
593
594         /* Now get a good starting state */
595         igb_reset(adapter);
596
597         /* Initialize statistics */
598         igb_update_stats_counters(adapter);
599
600         adapter->hw.mac.get_link_status = 1;
601         igb_update_link_status(adapter);
602
603         /* Indicate SOL/IDER usage */
604         if (e1000_check_reset_block(&adapter->hw))
605                 device_printf(dev,
606                     "PHY reset is blocked due to SOL/IDER session.\n");
607
608         /* Determine if we have to control management hardware */
609         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
610
611         /*
612          * Setup Wake-on-Lan
613          */
614         /* APME bit in EEPROM is mapped to WUC.APME */
615         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
616         if (eeprom_data)
617                 adapter->wol = E1000_WUFC_MAG;
618
619         /* Register for VLAN events */
620         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
621              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
622         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
623              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
624
625         igb_add_hw_stats(adapter);
626
627         /* Tell the stack that the interface is not active */
628         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
629         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
630
631         adapter->led_dev = led_create(igb_led_func, adapter,
632             device_get_nameunit(dev));
633
634         /* 
635         ** Configure Interrupts
636         */
637         if ((adapter->msix > 1) && (igb_enable_msix))
638                 error = igb_allocate_msix(adapter);
639         else /* MSI or Legacy */
640                 error = igb_allocate_legacy(adapter);
641         if (error)
642                 goto err_late;
643
644 #ifdef DEV_NETMAP
645         igb_netmap_attach(adapter);
646 #endif /* DEV_NETMAP */
647         INIT_DEBUGOUT("igb_attach: end");
648
649         return (0);
650
651 err_late:
652         igb_detach(dev);
653         igb_free_transmit_structures(adapter);
654         igb_free_receive_structures(adapter);
655         igb_release_hw_control(adapter);
656 err_pci:
657         igb_free_pci_resources(adapter);
658         if (adapter->ifp != NULL)
659                 if_free(adapter->ifp);
660         free(adapter->mta, M_DEVBUF);
661         IGB_CORE_LOCK_DESTROY(adapter);
662
663         return (error);
664 }
665
666 /*********************************************************************
667  *  Device removal routine
668  *
669  *  The detach entry point is called when the driver is being removed.
670  *  This routine stops the adapter and deallocates all the resources
671  *  that were allocated for driver operation.
672  *
673  *  return 0 on success, positive on failure
674  *********************************************************************/
675
676 static int
677 igb_detach(device_t dev)
678 {
679         struct adapter  *adapter = device_get_softc(dev);
680         struct ifnet    *ifp = adapter->ifp;
681
682         INIT_DEBUGOUT("igb_detach: begin");
683
684         /* Make sure VLANS are not using driver */
685         if (adapter->ifp->if_vlantrunk != NULL) {
686                 device_printf(dev,"Vlan in use, detach first\n");
687                 return (EBUSY);
688         }
689
690         ether_ifdetach(adapter->ifp);
691
692         if (adapter->led_dev != NULL)
693                 led_destroy(adapter->led_dev);
694
695 #ifdef DEVICE_POLLING
696         if (ifp->if_capenable & IFCAP_POLLING)
697                 ether_poll_deregister(ifp);
698 #endif
699
700         IGB_CORE_LOCK(adapter);
701         adapter->in_detach = 1;
702         igb_stop(adapter);
703         IGB_CORE_UNLOCK(adapter);
704
705         e1000_phy_hw_reset(&adapter->hw);
706
707         /* Give control back to firmware */
708         igb_release_manageability(adapter);
709         igb_release_hw_control(adapter);
710
711         if (adapter->wol) {
712                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
713                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
714                 igb_enable_wakeup(dev);
715         }
716
717         /* Unregister VLAN events */
718         if (adapter->vlan_attach != NULL)
719                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
720         if (adapter->vlan_detach != NULL)
721                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
722
723         callout_drain(&adapter->timer);
724
725 #ifdef DEV_NETMAP
726         netmap_detach(adapter->ifp);
727 #endif /* DEV_NETMAP */
728         igb_free_pci_resources(adapter);
729         bus_generic_detach(dev);
730         if_free(ifp);
731
732         igb_free_transmit_structures(adapter);
733         igb_free_receive_structures(adapter);
734         if (adapter->mta != NULL)
735                 free(adapter->mta, M_DEVBUF);
736
737         IGB_CORE_LOCK_DESTROY(adapter);
738
739         return (0);
740 }
741
742 /*********************************************************************
743  *
744  *  Shutdown entry point
745  *
746  **********************************************************************/
747
748 static int
749 igb_shutdown(device_t dev)
750 {
751         return igb_suspend(dev);
752 }
753
754 /*
755  * Suspend/resume device methods.
756  */
757 static int
758 igb_suspend(device_t dev)
759 {
760         struct adapter *adapter = device_get_softc(dev);
761
762         IGB_CORE_LOCK(adapter);
763
764         igb_stop(adapter);
765
766         igb_release_manageability(adapter);
767         igb_release_hw_control(adapter);
768
769         if (adapter->wol) {
770                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
771                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
772                 igb_enable_wakeup(dev);
773         }
774
775         IGB_CORE_UNLOCK(adapter);
776
777         return bus_generic_suspend(dev);
778 }
779
780 static int
781 igb_resume(device_t dev)
782 {
783         struct adapter *adapter = device_get_softc(dev);
784         struct tx_ring  *txr = adapter->tx_rings;
785         struct ifnet *ifp = adapter->ifp;
786
787         IGB_CORE_LOCK(adapter);
788         igb_init_locked(adapter);
789         igb_init_manageability(adapter);
790
791         if ((ifp->if_flags & IFF_UP) &&
792             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
793                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
794                         IGB_TX_LOCK(txr);
795 #ifndef IGB_LEGACY_TX
796                         /* Process the stack queue only if not depleted */
797                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
798                             !drbr_empty(ifp, txr->br))
799                                 igb_mq_start_locked(ifp, txr);
800 #else
801                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
802                                 igb_start_locked(txr, ifp);
803 #endif
804                         IGB_TX_UNLOCK(txr);
805                 }
806         }
807         IGB_CORE_UNLOCK(adapter);
808
809         return bus_generic_resume(dev);
810 }
811
812
813 #ifdef IGB_LEGACY_TX
814
815 /*********************************************************************
816  *  Transmit entry point
817  *
818  *  igb_start is called by the stack to initiate a transmit.
819  *  The driver will remain in this routine as long as there are
820  *  packets to transmit and transmit resources are available.
821  *  In case resources are not available stack is notified and
822  *  the packet is requeued.
823  **********************************************************************/
824
825 static void
826 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
827 {
828         struct adapter  *adapter = ifp->if_softc;
829         struct mbuf     *m_head;
830
831         IGB_TX_LOCK_ASSERT(txr);
832
833         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
834             IFF_DRV_RUNNING)
835                 return;
836         if (!adapter->link_active)
837                 return;
838
839         /* Call cleanup if number of TX descriptors low */
840         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
841                 igb_txeof(txr);
842
843         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
844                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
845                         txr->queue_status |= IGB_QUEUE_DEPLETED;
846                         break;
847                 }
848                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
849                 if (m_head == NULL)
850                         break;
851                 /*
852                  *  Encapsulation can modify our pointer, and or make it
853                  *  NULL on failure.  In that event, we can't requeue.
854                  */
855                 if (igb_xmit(txr, &m_head)) {
856                         if (m_head != NULL)
857                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
858                         if (txr->tx_avail <= IGB_MAX_SCATTER)
859                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
860                         break;
861                 }
862
863                 /* Send a copy of the frame to the BPF listener */
864                 ETHER_BPF_MTAP(ifp, m_head);
865
866                 /* Set watchdog on */
867                 txr->watchdog_time = ticks;
868                 txr->queue_status |= IGB_QUEUE_WORKING;
869         }
870 }
871  
872 /*
873  * Legacy TX driver routine, called from the
874  * stack, always uses tx[0], and spins for it.
875  * Should not be used with multiqueue tx
876  */
877 static void
878 igb_start(struct ifnet *ifp)
879 {
880         struct adapter  *adapter = ifp->if_softc;
881         struct tx_ring  *txr = adapter->tx_rings;
882
883         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
884                 IGB_TX_LOCK(txr);
885                 igb_start_locked(txr, ifp);
886                 IGB_TX_UNLOCK(txr);
887         }
888         return;
889 }
890
891 #else /* ~IGB_LEGACY_TX */
892
893 /*
894 ** Multiqueue Transmit Entry:
895 **  quick turnaround to the stack
896 **
897 */
898 static int
899 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
900 {
901         struct adapter          *adapter = ifp->if_softc;
902         struct igb_queue        *que;
903         struct tx_ring          *txr;
904         int                     i, err = 0;
905 #ifdef  RSS
906         uint32_t                bucket_id;
907 #endif
908
909         /* Which queue to use */
910         /*
911          * When doing RSS, map it to the same outbound queue
912          * as the incoming flow would be mapped to.
913          *
914          * If everything is setup correctly, it should be the
915          * same bucket that the current CPU we're on is.
916          */
917         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
918 #ifdef  RSS
919                 if (rss_hash2bucket(m->m_pkthdr.flowid,
920                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
921                         /* XXX TODO: spit out something if bucket_id > num_queues? */
922                         i = bucket_id % adapter->num_queues;
923                 } else {
924 #endif
925                         i = m->m_pkthdr.flowid % adapter->num_queues;
926 #ifdef  RSS
927                 }
928 #endif
929         } else {
930                 i = curcpu % adapter->num_queues;
931         }
932         txr = &adapter->tx_rings[i];
933         que = &adapter->queues[i];
934
935         err = drbr_enqueue(ifp, txr->br, m);
936         if (err)
937                 return (err);
938         if (IGB_TX_TRYLOCK(txr)) {
939                 igb_mq_start_locked(ifp, txr);
940                 IGB_TX_UNLOCK(txr);
941         } else
942                 taskqueue_enqueue(que->tq, &txr->txq_task);
943
944         return (0);
945 }
946
947 static int
948 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
949 {
950         struct adapter  *adapter = txr->adapter;
951         struct mbuf     *next;
952         int             err = 0, enq = 0;
953
954         IGB_TX_LOCK_ASSERT(txr);
955
956         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
957             adapter->link_active == 0)
958                 return (ENETDOWN);
959
960         /* Process the queue */
961         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
962                 if ((err = igb_xmit(txr, &next)) != 0) {
963                         if (next == NULL) {
964                                 /* It was freed, move forward */
965                                 drbr_advance(ifp, txr->br);
966                         } else {
967                                 /* 
968                                  * Still have one left, it may not be
969                                  * the same since the transmit function
970                                  * may have changed it.
971                                  */
972                                 drbr_putback(ifp, txr->br, next);
973                         }
974                         break;
975                 }
976                 drbr_advance(ifp, txr->br);
977                 enq++;
978                 if (next->m_flags & M_MCAST && adapter->vf_ifp)
979                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
980                 ETHER_BPF_MTAP(ifp, next);
981                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
982                         break;
983         }
984         if (enq > 0) {
985                 /* Set the watchdog */
986                 txr->queue_status |= IGB_QUEUE_WORKING;
987                 txr->watchdog_time = ticks;
988         }
989         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
990                 igb_txeof(txr);
991         if (txr->tx_avail <= IGB_MAX_SCATTER)
992                 txr->queue_status |= IGB_QUEUE_DEPLETED;
993         return (err);
994 }
995
996 /*
997  * Called from a taskqueue to drain queued transmit packets.
998  */
999 static void
1000 igb_deferred_mq_start(void *arg, int pending)
1001 {
1002         struct tx_ring *txr = arg;
1003         struct adapter *adapter = txr->adapter;
1004         struct ifnet *ifp = adapter->ifp;
1005
1006         IGB_TX_LOCK(txr);
1007         if (!drbr_empty(ifp, txr->br))
1008                 igb_mq_start_locked(ifp, txr);
1009         IGB_TX_UNLOCK(txr);
1010 }
1011
1012 /*
1013 ** Flush all ring buffers
1014 */
1015 static void
1016 igb_qflush(struct ifnet *ifp)
1017 {
1018         struct adapter  *adapter = ifp->if_softc;
1019         struct tx_ring  *txr = adapter->tx_rings;
1020         struct mbuf     *m;
1021
1022         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1023                 IGB_TX_LOCK(txr);
1024                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1025                         m_freem(m);
1026                 IGB_TX_UNLOCK(txr);
1027         }
1028         if_qflush(ifp);
1029 }
1030 #endif /* ~IGB_LEGACY_TX */
1031
1032 /*********************************************************************
1033  *  Ioctl entry point
1034  *
1035  *  igb_ioctl is called when the user wants to configure the
1036  *  interface.
1037  *
1038  *  return 0 on success, positive on failure
1039  **********************************************************************/
1040
1041 static int
1042 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1043 {
1044         struct adapter  *adapter = ifp->if_softc;
1045         struct ifreq    *ifr = (struct ifreq *)data;
1046 #if defined(INET) || defined(INET6)
1047         struct ifaddr   *ifa = (struct ifaddr *)data;
1048 #endif
1049         bool            avoid_reset = FALSE;
1050         int             error = 0;
1051
1052         if (adapter->in_detach)
1053                 return (error);
1054
1055         switch (command) {
1056         case SIOCSIFADDR:
1057 #ifdef INET
1058                 if (ifa->ifa_addr->sa_family == AF_INET)
1059                         avoid_reset = TRUE;
1060 #endif
1061 #ifdef INET6
1062                 if (ifa->ifa_addr->sa_family == AF_INET6)
1063                         avoid_reset = TRUE;
1064 #endif
1065                 /*
1066                 ** Calling init results in link renegotiation,
1067                 ** so we avoid doing it when possible.
1068                 */
1069                 if (avoid_reset) {
1070                         ifp->if_flags |= IFF_UP;
1071                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1072                                 igb_init(adapter);
1073 #ifdef INET
1074                         if (!(ifp->if_flags & IFF_NOARP))
1075                                 arp_ifinit(ifp, ifa);
1076 #endif
1077                 } else
1078                         error = ether_ioctl(ifp, command, data);
1079                 break;
1080         case SIOCSIFMTU:
1081             {
1082                 int max_frame_size;
1083
1084                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1085
1086                 IGB_CORE_LOCK(adapter);
1087                 max_frame_size = 9234;
1088                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1089                     ETHER_CRC_LEN) {
1090                         IGB_CORE_UNLOCK(adapter);
1091                         error = EINVAL;
1092                         break;
1093                 }
1094
1095                 ifp->if_mtu = ifr->ifr_mtu;
1096                 adapter->max_frame_size =
1097                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1098                 igb_init_locked(adapter);
1099                 IGB_CORE_UNLOCK(adapter);
1100                 break;
1101             }
1102         case SIOCSIFFLAGS:
1103                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1104                     SIOCSIFFLAGS (Set Interface Flags)");
1105                 IGB_CORE_LOCK(adapter);
1106                 if (ifp->if_flags & IFF_UP) {
1107                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1108                                 if ((ifp->if_flags ^ adapter->if_flags) &
1109                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1110                                         igb_disable_promisc(adapter);
1111                                         igb_set_promisc(adapter);
1112                                 }
1113                         } else
1114                                 igb_init_locked(adapter);
1115                 } else
1116                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1117                                 igb_stop(adapter);
1118                 adapter->if_flags = ifp->if_flags;
1119                 IGB_CORE_UNLOCK(adapter);
1120                 break;
1121         case SIOCADDMULTI:
1122         case SIOCDELMULTI:
1123                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1124                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1125                         IGB_CORE_LOCK(adapter);
1126                         igb_disable_intr(adapter);
1127                         igb_set_multi(adapter);
1128 #ifdef DEVICE_POLLING
1129                         if (!(ifp->if_capenable & IFCAP_POLLING))
1130 #endif
1131                                 igb_enable_intr(adapter);
1132                         IGB_CORE_UNLOCK(adapter);
1133                 }
1134                 break;
1135         case SIOCSIFMEDIA:
1136                 /* Check SOL/IDER usage */
1137                 IGB_CORE_LOCK(adapter);
1138                 if (e1000_check_reset_block(&adapter->hw)) {
1139                         IGB_CORE_UNLOCK(adapter);
1140                         device_printf(adapter->dev, "Media change is"
1141                             " blocked due to SOL/IDER session.\n");
1142                         break;
1143                 }
1144                 IGB_CORE_UNLOCK(adapter);
1145         case SIOCGIFMEDIA:
1146                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1147                     SIOCxIFMEDIA (Get/Set Interface Media)");
1148                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1149                 break;
1150         case SIOCSIFCAP:
1151             {
1152                 int mask, reinit;
1153
1154                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1155                 reinit = 0;
1156                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1157 #ifdef DEVICE_POLLING
1158                 if (mask & IFCAP_POLLING) {
1159                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1160                                 error = ether_poll_register(igb_poll, ifp);
1161                                 if (error)
1162                                         return (error);
1163                                 IGB_CORE_LOCK(adapter);
1164                                 igb_disable_intr(adapter);
1165                                 ifp->if_capenable |= IFCAP_POLLING;
1166                                 IGB_CORE_UNLOCK(adapter);
1167                         } else {
1168                                 error = ether_poll_deregister(ifp);
1169                                 /* Enable interrupt even in error case */
1170                                 IGB_CORE_LOCK(adapter);
1171                                 igb_enable_intr(adapter);
1172                                 ifp->if_capenable &= ~IFCAP_POLLING;
1173                                 IGB_CORE_UNLOCK(adapter);
1174                         }
1175                 }
1176 #endif
1177                 if (mask & IFCAP_HWCSUM) {
1178                         ifp->if_capenable ^= IFCAP_HWCSUM;
1179                         reinit = 1;
1180                 }
1181                 if (mask & IFCAP_TSO4) {
1182                         ifp->if_capenable ^= IFCAP_TSO4;
1183                         reinit = 1;
1184                 }
1185                 if (mask & IFCAP_TSO6) {
1186                         ifp->if_capenable ^= IFCAP_TSO6;
1187                         reinit = 1;
1188                 }
1189                 if (mask & IFCAP_VLAN_HWTAGGING) {
1190                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1191                         reinit = 1;
1192                 }
1193                 if (mask & IFCAP_VLAN_HWFILTER) {
1194                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1195                         reinit = 1;
1196                 }
1197                 if (mask & IFCAP_VLAN_HWTSO) {
1198                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1199                         reinit = 1;
1200                 }
1201                 if (mask & IFCAP_LRO) {
1202                         ifp->if_capenable ^= IFCAP_LRO;
1203                         reinit = 1;
1204                 }
1205                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1206                         igb_init(adapter);
1207                 VLAN_CAPABILITIES(ifp);
1208                 break;
1209             }
1210
1211         default:
1212                 error = ether_ioctl(ifp, command, data);
1213                 break;
1214         }
1215
1216         return (error);
1217 }
1218
1219
1220 /*********************************************************************
1221  *  Init entry point
1222  *
1223  *  This routine is used in two ways. It is used by the stack as
1224  *  init entry point in network interface structure. It is also used
1225  *  by the driver as a hw/sw initialization routine to get to a
1226  *  consistent state.
1227  *
1228  *  return 0 on success, positive on failure
1229  **********************************************************************/
1230
1231 static void
1232 igb_init_locked(struct adapter *adapter)
1233 {
1234         struct ifnet    *ifp = adapter->ifp;
1235         device_t        dev = adapter->dev;
1236
1237         INIT_DEBUGOUT("igb_init: begin");
1238
1239         IGB_CORE_LOCK_ASSERT(adapter);
1240
1241         igb_disable_intr(adapter);
1242         callout_stop(&adapter->timer);
1243
1244         /* Get the latest mac address, User can use a LAA */
1245         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1246               ETHER_ADDR_LEN);
1247
1248         /* Put the address into the Receive Address Array */
1249         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1250
1251         igb_reset(adapter);
1252         igb_update_link_status(adapter);
1253
1254         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1255
1256         /* Set hardware offload abilities */
1257         ifp->if_hwassist = 0;
1258         if (ifp->if_capenable & IFCAP_TXCSUM) {
1259                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1260 #if __FreeBSD_version >= 800000
1261                 if (adapter->hw.mac.type == e1000_82576)
1262                         ifp->if_hwassist |= CSUM_SCTP;
1263 #endif
1264         }
1265
1266         if (ifp->if_capenable & IFCAP_TSO)
1267                 ifp->if_hwassist |= CSUM_TSO;
1268
1269         /* Configure for OS presence */
1270         igb_init_manageability(adapter);
1271
1272         /* Prepare transmit descriptors and buffers */
1273         igb_setup_transmit_structures(adapter);
1274         igb_initialize_transmit_units(adapter);
1275
1276         /* Setup Multicast table */
1277         igb_set_multi(adapter);
1278
1279         /*
1280         ** Figure out the desired mbuf pool
1281         ** for doing jumbo/packetsplit
1282         */
1283         if (adapter->max_frame_size <= 2048)
1284                 adapter->rx_mbuf_sz = MCLBYTES;
1285         else if (adapter->max_frame_size <= 4096)
1286                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1287         else
1288                 adapter->rx_mbuf_sz = MJUM9BYTES;
1289
1290         /* Prepare receive descriptors and buffers */
1291         if (igb_setup_receive_structures(adapter)) {
1292                 device_printf(dev, "Could not setup receive structures\n");
1293                 return;
1294         }
1295         igb_initialize_receive_units(adapter);
1296         e1000_rx_fifo_flush_82575(&adapter->hw);
1297
1298         /* Enable VLAN support */
1299         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1300                 igb_setup_vlan_hw_support(adapter);
1301                                 
1302         /* Don't lose promiscuous settings */
1303         igb_set_promisc(adapter);
1304
1305         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1306         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1307
1308         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1309         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1310
1311         if (adapter->msix > 1) /* Set up queue routing */
1312                 igb_configure_queues(adapter);
1313
1314         /* this clears any pending interrupts */
1315         E1000_READ_REG(&adapter->hw, E1000_ICR);
1316 #ifdef DEVICE_POLLING
1317         /*
1318          * Only enable interrupts if we are not polling, make sure
1319          * they are off otherwise.
1320          */
1321         if (ifp->if_capenable & IFCAP_POLLING)
1322                 igb_disable_intr(adapter);
1323         else
1324 #endif /* DEVICE_POLLING */
1325         {
1326                 igb_enable_intr(adapter);
1327                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1328         }
1329
1330         /* Set Energy Efficient Ethernet */
1331         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1332                 if (adapter->hw.mac.type == e1000_i354)
1333                         e1000_set_eee_i354(&adapter->hw);
1334                 else
1335                         e1000_set_eee_i350(&adapter->hw);
1336         }
1337 }
1338
1339 static void
1340 igb_init(void *arg)
1341 {
1342         struct adapter *adapter = arg;
1343
1344         IGB_CORE_LOCK(adapter);
1345         igb_init_locked(adapter);
1346         IGB_CORE_UNLOCK(adapter);
1347 }
1348
1349
1350 static void
1351 igb_handle_que(void *context, int pending)
1352 {
1353         struct igb_queue *que = context;
1354         struct adapter *adapter = que->adapter;
1355         struct tx_ring *txr = que->txr;
1356         struct ifnet    *ifp = adapter->ifp;
1357
1358         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1359                 bool    more;
1360
1361                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1362
1363                 IGB_TX_LOCK(txr);
1364                 igb_txeof(txr);
1365 #ifndef IGB_LEGACY_TX
1366                 /* Process the stack queue only if not depleted */
1367                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1368                     !drbr_empty(ifp, txr->br))
1369                         igb_mq_start_locked(ifp, txr);
1370 #else
1371                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1372                         igb_start_locked(txr, ifp);
1373 #endif
1374                 IGB_TX_UNLOCK(txr);
1375                 /* Do we need another? */
1376                 if (more) {
1377                         taskqueue_enqueue(que->tq, &que->que_task);
1378                         return;
1379                 }
1380         }
1381
1382 #ifdef DEVICE_POLLING
1383         if (ifp->if_capenable & IFCAP_POLLING)
1384                 return;
1385 #endif
1386         /* Reenable this interrupt */
1387         if (que->eims)
1388                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1389         else
1390                 igb_enable_intr(adapter);
1391 }
1392
1393 /* Deal with link in a sleepable context */
1394 static void
1395 igb_handle_link(void *context, int pending)
1396 {
1397         struct adapter *adapter = context;
1398
1399         IGB_CORE_LOCK(adapter);
1400         igb_handle_link_locked(adapter);
1401         IGB_CORE_UNLOCK(adapter);
1402 }
1403
1404 static void
1405 igb_handle_link_locked(struct adapter *adapter)
1406 {
1407         struct tx_ring  *txr = adapter->tx_rings;
1408         struct ifnet *ifp = adapter->ifp;
1409
1410         IGB_CORE_LOCK_ASSERT(adapter);
1411         adapter->hw.mac.get_link_status = 1;
1412         igb_update_link_status(adapter);
1413         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1414                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1415                         IGB_TX_LOCK(txr);
1416 #ifndef IGB_LEGACY_TX
1417                         /* Process the stack queue only if not depleted */
1418                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1419                             !drbr_empty(ifp, txr->br))
1420                                 igb_mq_start_locked(ifp, txr);
1421 #else
1422                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1423                                 igb_start_locked(txr, ifp);
1424 #endif
1425                         IGB_TX_UNLOCK(txr);
1426                 }
1427         }
1428 }
1429
1430 /*********************************************************************
1431  *
1432  *  MSI/Legacy Deferred
1433  *  Interrupt Service routine  
1434  *
1435  *********************************************************************/
1436 static int
1437 igb_irq_fast(void *arg)
1438 {
1439         struct adapter          *adapter = arg;
1440         struct igb_queue        *que = adapter->queues;
1441         u32                     reg_icr;
1442
1443
1444         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1445
1446         /* Hot eject?  */
1447         if (reg_icr == 0xffffffff)
1448                 return FILTER_STRAY;
1449
1450         /* Definitely not our interrupt.  */
1451         if (reg_icr == 0x0)
1452                 return FILTER_STRAY;
1453
1454         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1455                 return FILTER_STRAY;
1456
1457         /*
1458          * Mask interrupts until the taskqueue is finished running.  This is
1459          * cheap, just assume that it is needed.  This also works around the
1460          * MSI message reordering errata on certain systems.
1461          */
1462         igb_disable_intr(adapter);
1463         taskqueue_enqueue(que->tq, &que->que_task);
1464
1465         /* Link status change */
1466         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1467                 taskqueue_enqueue(que->tq, &adapter->link_task);
1468
1469         if (reg_icr & E1000_ICR_RXO)
1470                 adapter->rx_overruns++;
1471         return FILTER_HANDLED;
1472 }
1473
1474 #ifdef DEVICE_POLLING
1475 #if __FreeBSD_version >= 800000
1476 #define POLL_RETURN_COUNT(a) (a)
1477 static int
1478 #else
1479 #define POLL_RETURN_COUNT(a)
1480 static void
1481 #endif
1482 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1483 {
1484         struct adapter          *adapter = ifp->if_softc;
1485         struct igb_queue        *que;
1486         struct tx_ring          *txr;
1487         u32                     reg_icr, rx_done = 0;
1488         u32                     loop = IGB_MAX_LOOP;
1489         bool                    more;
1490
1491         IGB_CORE_LOCK(adapter);
1492         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1493                 IGB_CORE_UNLOCK(adapter);
1494                 return POLL_RETURN_COUNT(rx_done);
1495         }
1496
1497         if (cmd == POLL_AND_CHECK_STATUS) {
1498                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1499                 /* Link status change */
1500                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1501                         igb_handle_link_locked(adapter);
1502
1503                 if (reg_icr & E1000_ICR_RXO)
1504                         adapter->rx_overruns++;
1505         }
1506         IGB_CORE_UNLOCK(adapter);
1507
1508         for (int i = 0; i < adapter->num_queues; i++) {
1509                 que = &adapter->queues[i];
1510                 txr = que->txr;
1511
1512                 igb_rxeof(que, count, &rx_done);
1513
1514                 IGB_TX_LOCK(txr);
1515                 do {
1516                         more = igb_txeof(txr);
1517                 } while (loop-- && more);
1518 #ifndef IGB_LEGACY_TX
1519                 if (!drbr_empty(ifp, txr->br))
1520                         igb_mq_start_locked(ifp, txr);
1521 #else
1522                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1523                         igb_start_locked(txr, ifp);
1524 #endif
1525                 IGB_TX_UNLOCK(txr);
1526         }
1527
1528         return POLL_RETURN_COUNT(rx_done);
1529 }
1530 #endif /* DEVICE_POLLING */
1531
1532 /*********************************************************************
1533  *
1534  *  MSIX Que Interrupt Service routine
1535  *
1536  **********************************************************************/
1537 static void
1538 igb_msix_que(void *arg)
1539 {
1540         struct igb_queue *que = arg;
1541         struct adapter *adapter = que->adapter;
1542         struct ifnet   *ifp = adapter->ifp;
1543         struct tx_ring *txr = que->txr;
1544         struct rx_ring *rxr = que->rxr;
1545         u32             newitr = 0;
1546         bool            more_rx;
1547
1548         /* Ignore spurious interrupts */
1549         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1550                 return;
1551
1552         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1553         ++que->irqs;
1554
1555         IGB_TX_LOCK(txr);
1556         igb_txeof(txr);
1557 #ifndef IGB_LEGACY_TX
1558         /* Process the stack queue only if not depleted */
1559         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1560             !drbr_empty(ifp, txr->br))
1561                 igb_mq_start_locked(ifp, txr);
1562 #else
1563         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1564                 igb_start_locked(txr, ifp);
1565 #endif
1566         IGB_TX_UNLOCK(txr);
1567
1568         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1569
1570         if (adapter->enable_aim == FALSE)
1571                 goto no_calc;
1572         /*
1573         ** Do Adaptive Interrupt Moderation:
1574         **  - Write out last calculated setting
1575         **  - Calculate based on average size over
1576         **    the last interval.
1577         */
1578         if (que->eitr_setting)
1579                 E1000_WRITE_REG(&adapter->hw,
1580                     E1000_EITR(que->msix), que->eitr_setting);
1581  
1582         que->eitr_setting = 0;
1583
1584         /* Idle, do nothing */
1585         if ((txr->bytes == 0) && (rxr->bytes == 0))
1586                 goto no_calc;
1587                                 
1588         /* Used half Default if sub-gig */
1589         if (adapter->link_speed != 1000)
1590                 newitr = IGB_DEFAULT_ITR / 2;
1591         else {
1592                 if ((txr->bytes) && (txr->packets))
1593                         newitr = txr->bytes/txr->packets;
1594                 if ((rxr->bytes) && (rxr->packets))
1595                         newitr = max(newitr,
1596                             (rxr->bytes / rxr->packets));
1597                 newitr += 24; /* account for hardware frame, crc */
1598                 /* set an upper boundary */
1599                 newitr = min(newitr, 3000);
1600                 /* Be nice to the mid range */
1601                 if ((newitr > 300) && (newitr < 1200))
1602                         newitr = (newitr / 3);
1603                 else
1604                         newitr = (newitr / 2);
1605         }
1606         newitr &= 0x7FFC;  /* Mask invalid bits */
1607         if (adapter->hw.mac.type == e1000_82575)
1608                 newitr |= newitr << 16;
1609         else
1610                 newitr |= E1000_EITR_CNT_IGNR;
1611                  
1612         /* save for next interrupt */
1613         que->eitr_setting = newitr;
1614
1615         /* Reset state */
1616         txr->bytes = 0;
1617         txr->packets = 0;
1618         rxr->bytes = 0;
1619         rxr->packets = 0;
1620
1621 no_calc:
1622         /* Schedule a clean task if needed*/
1623         if (more_rx)
1624                 taskqueue_enqueue(que->tq, &que->que_task);
1625         else
1626                 /* Reenable this interrupt */
1627                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1628         return;
1629 }
1630
1631
1632 /*********************************************************************
1633  *
1634  *  MSIX Link Interrupt Service routine
1635  *
1636  **********************************************************************/
1637
1638 static void
1639 igb_msix_link(void *arg)
1640 {
1641         struct adapter  *adapter = arg;
1642         u32             icr;
1643
1644         ++adapter->link_irq;
1645         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1646         if (!(icr & E1000_ICR_LSC))
1647                 goto spurious;
1648         igb_handle_link(adapter, 0);
1649
1650 spurious:
1651         /* Rearm */
1652         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1653         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1654         return;
1655 }
1656
1657
1658 /*********************************************************************
1659  *
1660  *  Media Ioctl callback
1661  *
1662  *  This routine is called whenever the user queries the status of
1663  *  the interface using ifconfig.
1664  *
1665  **********************************************************************/
1666 static void
1667 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1668 {
1669         struct adapter *adapter = ifp->if_softc;
1670
1671         INIT_DEBUGOUT("igb_media_status: begin");
1672
1673         IGB_CORE_LOCK(adapter);
1674         igb_update_link_status(adapter);
1675
1676         ifmr->ifm_status = IFM_AVALID;
1677         ifmr->ifm_active = IFM_ETHER;
1678
1679         if (!adapter->link_active) {
1680                 IGB_CORE_UNLOCK(adapter);
1681                 return;
1682         }
1683
1684         ifmr->ifm_status |= IFM_ACTIVE;
1685
1686         switch (adapter->link_speed) {
1687         case 10:
1688                 ifmr->ifm_active |= IFM_10_T;
1689                 break;
1690         case 100:
1691                 /*
1692                 ** Support for 100Mb SFP - these are Fiber 
1693                 ** but the media type appears as serdes
1694                 */
1695                 if (adapter->hw.phy.media_type ==
1696                     e1000_media_type_internal_serdes)
1697                         ifmr->ifm_active |= IFM_100_FX;
1698                 else
1699                         ifmr->ifm_active |= IFM_100_TX;
1700                 break;
1701         case 1000:
1702                 ifmr->ifm_active |= IFM_1000_T;
1703                 break;
1704         case 2500:
1705                 ifmr->ifm_active |= IFM_2500_SX;
1706                 break;
1707         }
1708
1709         if (adapter->link_duplex == FULL_DUPLEX)
1710                 ifmr->ifm_active |= IFM_FDX;
1711         else
1712                 ifmr->ifm_active |= IFM_HDX;
1713
1714         IGB_CORE_UNLOCK(adapter);
1715 }
1716
1717 /*********************************************************************
1718  *
1719  *  Media Ioctl callback
1720  *
1721  *  This routine is called when the user changes speed/duplex using
1722  *  media/mediopt option with ifconfig.
1723  *
1724  **********************************************************************/
1725 static int
1726 igb_media_change(struct ifnet *ifp)
1727 {
1728         struct adapter *adapter = ifp->if_softc;
1729         struct ifmedia  *ifm = &adapter->media;
1730
1731         INIT_DEBUGOUT("igb_media_change: begin");
1732
1733         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1734                 return (EINVAL);
1735
1736         IGB_CORE_LOCK(adapter);
1737         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1738         case IFM_AUTO:
1739                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1740                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1741                 break;
1742         case IFM_1000_LX:
1743         case IFM_1000_SX:
1744         case IFM_1000_T:
1745                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1746                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1747                 break;
1748         case IFM_100_TX:
1749                 adapter->hw.mac.autoneg = FALSE;
1750                 adapter->hw.phy.autoneg_advertised = 0;
1751                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1752                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1753                 else
1754                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1755                 break;
1756         case IFM_10_T:
1757                 adapter->hw.mac.autoneg = FALSE;
1758                 adapter->hw.phy.autoneg_advertised = 0;
1759                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1760                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1761                 else
1762                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1763                 break;
1764         default:
1765                 device_printf(adapter->dev, "Unsupported media type\n");
1766         }
1767
1768         igb_init_locked(adapter);
1769         IGB_CORE_UNLOCK(adapter);
1770
1771         return (0);
1772 }
1773
1774
1775 /*********************************************************************
1776  *
1777  *  This routine maps the mbufs to Advanced TX descriptors.
1778  *  
1779  **********************************************************************/
1780 static int
1781 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1782 {
1783         struct adapter  *adapter = txr->adapter;
1784         u32             olinfo_status = 0, cmd_type_len;
1785         int             i, j, error, nsegs;
1786         int             first;
1787         bool            remap = TRUE;
1788         struct mbuf     *m_head;
1789         bus_dma_segment_t segs[IGB_MAX_SCATTER];
1790         bus_dmamap_t    map;
1791         struct igb_tx_buf *txbuf;
1792         union e1000_adv_tx_desc *txd = NULL;
1793
1794         m_head = *m_headp;
1795
1796         /* Basic descriptor defines */
1797         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1798             E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1799
1800         if (m_head->m_flags & M_VLANTAG)
1801                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1802
1803         /*
1804          * Important to capture the first descriptor
1805          * used because it will contain the index of
1806          * the one we tell the hardware to report back
1807          */
1808         first = txr->next_avail_desc;
1809         txbuf = &txr->tx_buffers[first];
1810         map = txbuf->map;
1811
1812         /*
1813          * Map the packet for DMA.
1814          */
1815 retry:
1816         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1817             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1818
1819         if (__predict_false(error)) {
1820                 struct mbuf *m;
1821
1822                 switch (error) {
1823                 case EFBIG:
1824                         /* Try it again? - one try */
1825                         if (remap == TRUE) {
1826                                 remap = FALSE;
1827                                 m = m_defrag(*m_headp, M_NOWAIT);
1828                                 if (m == NULL) {
1829                                         adapter->mbuf_defrag_failed++;
1830                                         m_freem(*m_headp);
1831                                         *m_headp = NULL;
1832                                         return (ENOBUFS);
1833                                 }
1834                                 *m_headp = m;
1835                                 goto retry;
1836                         } else
1837                                 return (error);
1838                 default:
1839                         txr->no_tx_dma_setup++;
1840                         m_freem(*m_headp);
1841                         *m_headp = NULL;
1842                         return (error);
1843                 }
1844         }
1845
1846         /* Make certain there are enough descriptors */
1847         if (nsegs > txr->tx_avail - 2) {
1848                 txr->no_desc_avail++;
1849                 bus_dmamap_unload(txr->txtag, map);
1850                 return (ENOBUFS);
1851         }
1852         m_head = *m_headp;
1853
1854         /*
1855         ** Set up the appropriate offload context
1856         ** this will consume the first descriptor
1857         */
1858         error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1859         if (__predict_false(error)) {
1860                 m_freem(*m_headp);
1861                 *m_headp = NULL;
1862                 return (error);
1863         }
1864
1865         /* 82575 needs the queue index added */
1866         if (adapter->hw.mac.type == e1000_82575)
1867                 olinfo_status |= txr->me << 4;
1868
1869         i = txr->next_avail_desc;
1870         for (j = 0; j < nsegs; j++) {
1871                 bus_size_t seglen;
1872                 bus_addr_t segaddr;
1873
1874                 txbuf = &txr->tx_buffers[i];
1875                 txd = &txr->tx_base[i];
1876                 seglen = segs[j].ds_len;
1877                 segaddr = htole64(segs[j].ds_addr);
1878
1879                 txd->read.buffer_addr = segaddr;
1880                 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1881                     cmd_type_len | seglen);
1882                 txd->read.olinfo_status = htole32(olinfo_status);
1883
1884                 if (++i == txr->num_desc)
1885                         i = 0;
1886         }
1887
1888         txd->read.cmd_type_len |=
1889             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1890         txr->tx_avail -= nsegs;
1891         txr->next_avail_desc = i;
1892
1893         txbuf->m_head = m_head;
1894         /*
1895         ** Here we swap the map so the last descriptor,
1896         ** which gets the completion interrupt has the
1897         ** real map, and the first descriptor gets the
1898         ** unused map from this descriptor.
1899         */
1900         txr->tx_buffers[first].map = txbuf->map;
1901         txbuf->map = map;
1902         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1903
1904         /* Set the EOP descriptor that will be marked done */
1905         txbuf = &txr->tx_buffers[first];
1906         txbuf->eop = txd;
1907
1908         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1909             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1910         /*
1911          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1912          * hardware that this frame is available to transmit.
1913          */
1914         ++txr->total_packets;
1915         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1916
1917         return (0);
1918 }
1919 static void
1920 igb_set_promisc(struct adapter *adapter)
1921 {
1922         struct ifnet    *ifp = adapter->ifp;
1923         struct e1000_hw *hw = &adapter->hw;
1924         u32             reg;
1925
1926         if (adapter->vf_ifp) {
1927                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1928                 return;
1929         }
1930
1931         reg = E1000_READ_REG(hw, E1000_RCTL);
1932         if (ifp->if_flags & IFF_PROMISC) {
1933                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1934                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1935         } else if (ifp->if_flags & IFF_ALLMULTI) {
1936                 reg |= E1000_RCTL_MPE;
1937                 reg &= ~E1000_RCTL_UPE;
1938                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1939         }
1940 }
1941
1942 static void
1943 igb_disable_promisc(struct adapter *adapter)
1944 {
1945         struct e1000_hw *hw = &adapter->hw;
1946         struct ifnet    *ifp = adapter->ifp;
1947         u32             reg;
1948         int             mcnt = 0;
1949
1950         if (adapter->vf_ifp) {
1951                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1952                 return;
1953         }
1954         reg = E1000_READ_REG(hw, E1000_RCTL);
1955         reg &=  (~E1000_RCTL_UPE);
1956         if (ifp->if_flags & IFF_ALLMULTI)
1957                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1958         else {
1959                 struct  ifmultiaddr *ifma;
1960 #if __FreeBSD_version < 800000
1961                 IF_ADDR_LOCK(ifp);
1962 #else   
1963                 if_maddr_rlock(ifp);
1964 #endif
1965                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1966                         if (ifma->ifma_addr->sa_family != AF_LINK)
1967                                 continue;
1968                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1969                                 break;
1970                         mcnt++;
1971                 }
1972 #if __FreeBSD_version < 800000
1973                 IF_ADDR_UNLOCK(ifp);
1974 #else
1975                 if_maddr_runlock(ifp);
1976 #endif
1977         }
1978         /* Don't disable if in MAX groups */
1979         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1980                 reg &=  (~E1000_RCTL_MPE);
1981         E1000_WRITE_REG(hw, E1000_RCTL, reg);
1982 }
1983
1984
1985 /*********************************************************************
1986  *  Multicast Update
1987  *
1988  *  This routine is called whenever multicast address list is updated.
1989  *
1990  **********************************************************************/
1991
1992 static void
1993 igb_set_multi(struct adapter *adapter)
1994 {
1995         struct ifnet    *ifp = adapter->ifp;
1996         struct ifmultiaddr *ifma;
1997         u32 reg_rctl = 0;
1998         u8  *mta;
1999
2000         int mcnt = 0;
2001
2002         IOCTL_DEBUGOUT("igb_set_multi: begin");
2003
2004         mta = adapter->mta;
2005         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2006             MAX_NUM_MULTICAST_ADDRESSES);
2007
2008 #if __FreeBSD_version < 800000
2009         IF_ADDR_LOCK(ifp);
2010 #else
2011         if_maddr_rlock(ifp);
2012 #endif
2013         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2014                 if (ifma->ifma_addr->sa_family != AF_LINK)
2015                         continue;
2016
2017                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2018                         break;
2019
2020                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2021                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2022                 mcnt++;
2023         }
2024 #if __FreeBSD_version < 800000
2025         IF_ADDR_UNLOCK(ifp);
2026 #else
2027         if_maddr_runlock(ifp);
2028 #endif
2029
2030         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2031                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2032                 reg_rctl |= E1000_RCTL_MPE;
2033                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2034         } else
2035                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2036 }
2037
2038
2039 /*********************************************************************
2040  *  Timer routine:
2041  *      This routine checks for link status,
2042  *      updates statistics, and does the watchdog.
2043  *
2044  **********************************************************************/
2045
2046 static void
2047 igb_local_timer(void *arg)
2048 {
2049         struct adapter          *adapter = arg;
2050         device_t                dev = adapter->dev;
2051         struct ifnet            *ifp = adapter->ifp;
2052         struct tx_ring          *txr = adapter->tx_rings;
2053         struct igb_queue        *que = adapter->queues;
2054         int                     hung = 0, busy = 0;
2055
2056
2057         IGB_CORE_LOCK_ASSERT(adapter);
2058
2059         igb_update_link_status(adapter);
2060         igb_update_stats_counters(adapter);
2061
2062         /*
2063         ** Check the TX queues status
2064         **      - central locked handling of OACTIVE
2065         **      - watchdog only if all queues show hung
2066         */
2067         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2068                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2069                     (adapter->pause_frames == 0))
2070                         ++hung;
2071                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2072                         ++busy;
2073                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2074                         taskqueue_enqueue(que->tq, &que->que_task);
2075         }
2076         if (hung == adapter->num_queues)
2077                 goto timeout;
2078         if (busy == adapter->num_queues)
2079                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2080         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2081             (busy < adapter->num_queues))
2082                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2083
2084         adapter->pause_frames = 0;
2085         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2086 #ifndef DEVICE_POLLING
2087         /* Schedule all queue interrupts - deadlock protection */
2088         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2089 #endif
2090         return;
2091
2092 timeout:
2093         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2094         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2095             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2096             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2097         device_printf(dev,"TX(%d) desc avail = %d,"
2098             "Next TX to Clean = %d\n",
2099             txr->me, txr->tx_avail, txr->next_to_clean);
2100         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2101         adapter->watchdog_events++;
2102         igb_init_locked(adapter);
2103 }
2104
2105 static void
2106 igb_update_link_status(struct adapter *adapter)
2107 {
2108         struct e1000_hw         *hw = &adapter->hw;
2109         struct e1000_fc_info    *fc = &hw->fc;
2110         struct ifnet            *ifp = adapter->ifp;
2111         device_t                dev = adapter->dev;
2112         struct tx_ring          *txr = adapter->tx_rings;
2113         u32                     link_check, thstat, ctrl;
2114         char                    *flowctl = NULL;
2115
2116         link_check = thstat = ctrl = 0;
2117
2118         /* Get the cached link value or read for real */
2119         switch (hw->phy.media_type) {
2120         case e1000_media_type_copper:
2121                 if (hw->mac.get_link_status) {
2122                         /* Do the work to read phy */
2123                         e1000_check_for_link(hw);
2124                         link_check = !hw->mac.get_link_status;
2125                 } else
2126                         link_check = TRUE;
2127                 break;
2128         case e1000_media_type_fiber:
2129                 e1000_check_for_link(hw);
2130                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2131                                  E1000_STATUS_LU);
2132                 break;
2133         case e1000_media_type_internal_serdes:
2134                 e1000_check_for_link(hw);
2135                 link_check = adapter->hw.mac.serdes_has_link;
2136                 break;
2137         /* VF device is type_unknown */
2138         case e1000_media_type_unknown:
2139                 e1000_check_for_link(hw);
2140                 link_check = !hw->mac.get_link_status;
2141                 /* Fall thru */
2142         default:
2143                 break;
2144         }
2145
2146         /* Check for thermal downshift or shutdown */
2147         if (hw->mac.type == e1000_i350) {
2148                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2149                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2150         }
2151
2152         /* Get the flow control for display */
2153         switch (fc->current_mode) {
2154         case e1000_fc_rx_pause:
2155                 flowctl = "RX";
2156                 break;  
2157         case e1000_fc_tx_pause:
2158                 flowctl = "TX";
2159                 break;  
2160         case e1000_fc_full:
2161                 flowctl = "Full";
2162                 break;  
2163         case e1000_fc_none:
2164         default:
2165                 flowctl = "None";
2166                 break;  
2167         }
2168
2169         /* Now we check if a transition has happened */
2170         if (link_check && (adapter->link_active == 0)) {
2171                 e1000_get_speed_and_duplex(&adapter->hw, 
2172                     &adapter->link_speed, &adapter->link_duplex);
2173                 if (bootverbose)
2174                         device_printf(dev, "Link is up %d Mbps %s,"
2175                             " Flow Control: %s\n",
2176                             adapter->link_speed,
2177                             ((adapter->link_duplex == FULL_DUPLEX) ?
2178                             "Full Duplex" : "Half Duplex"), flowctl);
2179                 adapter->link_active = 1;
2180                 ifp->if_baudrate = adapter->link_speed * 1000000;
2181                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2182                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2183                         device_printf(dev, "Link: thermal downshift\n");
2184                 /* Delay Link Up for Phy update */
2185                 if (((hw->mac.type == e1000_i210) ||
2186                     (hw->mac.type == e1000_i211)) &&
2187                     (hw->phy.id == I210_I_PHY_ID))
2188                         msec_delay(I210_LINK_DELAY);
2189                 /* Reset if the media type changed. */
2190                 if (hw->dev_spec._82575.media_changed) {
2191                         hw->dev_spec._82575.media_changed = false;
2192                         adapter->flags |= IGB_MEDIA_RESET;
2193                         igb_reset(adapter);
2194                 }       
2195                 /* This can sleep */
2196                 if_link_state_change(ifp, LINK_STATE_UP);
2197         } else if (!link_check && (adapter->link_active == 1)) {
2198                 ifp->if_baudrate = adapter->link_speed = 0;
2199                 adapter->link_duplex = 0;
2200                 if (bootverbose)
2201                         device_printf(dev, "Link is Down\n");
2202                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2203                     (thstat & E1000_THSTAT_PWR_DOWN))
2204                         device_printf(dev, "Link: thermal shutdown\n");
2205                 adapter->link_active = 0;
2206                 /* This can sleep */
2207                 if_link_state_change(ifp, LINK_STATE_DOWN);
2208                 /* Reset queue state */
2209                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2210                         txr->queue_status = IGB_QUEUE_IDLE;
2211         }
2212 }
2213
2214 /*********************************************************************
2215  *
2216  *  This routine disables all traffic on the adapter by issuing a
2217  *  global reset on the MAC and deallocates TX/RX buffers.
2218  *
2219  **********************************************************************/
2220
2221 static void
2222 igb_stop(void *arg)
2223 {
2224         struct adapter  *adapter = arg;
2225         struct ifnet    *ifp = adapter->ifp;
2226         struct tx_ring *txr = adapter->tx_rings;
2227
2228         IGB_CORE_LOCK_ASSERT(adapter);
2229
2230         INIT_DEBUGOUT("igb_stop: begin");
2231
2232         igb_disable_intr(adapter);
2233
2234         callout_stop(&adapter->timer);
2235
2236         /* Tell the stack that the interface is no longer active */
2237         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2238         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2239
2240         /* Disarm watchdog timer. */
2241         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2242                 IGB_TX_LOCK(txr);
2243                 txr->queue_status = IGB_QUEUE_IDLE;
2244                 IGB_TX_UNLOCK(txr);
2245         }
2246
2247         e1000_reset_hw(&adapter->hw);
2248         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2249
2250         e1000_led_off(&adapter->hw);
2251         e1000_cleanup_led(&adapter->hw);
2252 }
2253
2254
2255 /*********************************************************************
2256  *
2257  *  Determine hardware revision.
2258  *
2259  **********************************************************************/
2260 static void
2261 igb_identify_hardware(struct adapter *adapter)
2262 {
2263         device_t dev = adapter->dev;
2264
2265         /* Make sure our PCI config space has the necessary stuff set */
2266         pci_enable_busmaster(dev);
2267         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2268
2269         /* Save off the information about this board */
2270         adapter->hw.vendor_id = pci_get_vendor(dev);
2271         adapter->hw.device_id = pci_get_device(dev);
2272         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2273         adapter->hw.subsystem_vendor_id =
2274             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2275         adapter->hw.subsystem_device_id =
2276             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2277
2278         /* Set MAC type early for PCI setup */
2279         e1000_set_mac_type(&adapter->hw);
2280
2281         /* Are we a VF device? */
2282         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2283             (adapter->hw.mac.type == e1000_vfadapt_i350))
2284                 adapter->vf_ifp = 1;
2285         else
2286                 adapter->vf_ifp = 0;
2287 }
2288
2289 static int
2290 igb_allocate_pci_resources(struct adapter *adapter)
2291 {
2292         device_t        dev = adapter->dev;
2293         int             rid;
2294
2295         rid = PCIR_BAR(0);
2296         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2297             &rid, RF_ACTIVE);
2298         if (adapter->pci_mem == NULL) {
2299                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2300                 return (ENXIO);
2301         }
2302         adapter->osdep.mem_bus_space_tag =
2303             rman_get_bustag(adapter->pci_mem);
2304         adapter->osdep.mem_bus_space_handle =
2305             rman_get_bushandle(adapter->pci_mem);
2306         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2307
2308         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2309
2310         /* This will setup either MSI/X or MSI */
2311         adapter->msix = igb_setup_msix(adapter);
2312         adapter->hw.back = &adapter->osdep;
2313
2314         return (0);
2315 }
2316
2317 /*********************************************************************
2318  *
2319  *  Setup the Legacy or MSI Interrupt handler
2320  *
2321  **********************************************************************/
2322 static int
2323 igb_allocate_legacy(struct adapter *adapter)
2324 {
2325         device_t                dev = adapter->dev;
2326         struct igb_queue        *que = adapter->queues;
2327 #ifndef IGB_LEGACY_TX
2328         struct tx_ring          *txr = adapter->tx_rings;
2329 #endif
2330         int                     error, rid = 0;
2331
2332         /* Turn off all interrupts */
2333         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2334
2335         /* MSI RID is 1 */
2336         if (adapter->msix == 1)
2337                 rid = 1;
2338
2339         /* We allocate a single interrupt resource */
2340         adapter->res = bus_alloc_resource_any(dev,
2341             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2342         if (adapter->res == NULL) {
2343                 device_printf(dev, "Unable to allocate bus resource: "
2344                     "interrupt\n");
2345                 return (ENXIO);
2346         }
2347
2348 #ifndef IGB_LEGACY_TX
2349         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2350 #endif
2351
2352         /*
2353          * Try allocating a fast interrupt and the associated deferred
2354          * processing contexts.
2355          */
2356         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2357         /* Make tasklet for deferred link handling */
2358         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2359         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2360             taskqueue_thread_enqueue, &que->tq);
2361         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2362             device_get_nameunit(adapter->dev));
2363         if ((error = bus_setup_intr(dev, adapter->res,
2364             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2365             adapter, &adapter->tag)) != 0) {
2366                 device_printf(dev, "Failed to register fast interrupt "
2367                             "handler: %d\n", error);
2368                 taskqueue_free(que->tq);
2369                 que->tq = NULL;
2370                 return (error);
2371         }
2372
2373         return (0);
2374 }
2375
2376
2377 /*********************************************************************
2378  *
2379  *  Setup the MSIX Queue Interrupt handlers: 
2380  *
2381  **********************************************************************/
2382 static int
2383 igb_allocate_msix(struct adapter *adapter)
2384 {
2385         device_t                dev = adapter->dev;
2386         struct igb_queue        *que = adapter->queues;
2387         int                     error, rid, vector = 0;
2388         int                     cpu_id = 0;
2389 #ifdef  RSS
2390         cpuset_t cpu_mask;
2391 #endif
2392
2393         /* Be sure to start with all interrupts disabled */
2394         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2395         E1000_WRITE_FLUSH(&adapter->hw);
2396
2397 #ifdef  RSS
2398         /*
2399          * If we're doing RSS, the number of queues needs to
2400          * match the number of RSS buckets that are configured.
2401          *
2402          * + If there's more queues than RSS buckets, we'll end
2403          *   up with queues that get no traffic.
2404          *
2405          * + If there's more RSS buckets than queues, we'll end
2406          *   up having multiple RSS buckets map to the same queue,
2407          *   so there'll be some contention.
2408          */
2409         if (adapter->num_queues != rss_getnumbuckets()) {
2410                 device_printf(dev,
2411                     "%s: number of queues (%d) != number of RSS buckets (%d)"
2412                     "; performance will be impacted.\n",
2413                     __func__,
2414                     adapter->num_queues,
2415                     rss_getnumbuckets());
2416         }
2417 #endif
2418
2419         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2420                 rid = vector +1;
2421                 que->res = bus_alloc_resource_any(dev,
2422                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2423                 if (que->res == NULL) {
2424                         device_printf(dev,
2425                             "Unable to allocate bus resource: "
2426                             "MSIX Queue Interrupt\n");
2427                         return (ENXIO);
2428                 }
2429                 error = bus_setup_intr(dev, que->res,
2430                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2431                     igb_msix_que, que, &que->tag);
2432                 if (error) {
2433                         que->res = NULL;
2434                         device_printf(dev, "Failed to register Queue handler");
2435                         return (error);
2436                 }
2437 #if __FreeBSD_version >= 800504
2438                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2439 #endif
2440                 que->msix = vector;
2441                 if (adapter->hw.mac.type == e1000_82575)
2442                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2443                 else
2444                         que->eims = 1 << vector;
2445
2446 #ifdef  RSS
2447                 /*
2448                  * The queue ID is used as the RSS layer bucket ID.
2449                  * We look up the queue ID -> RSS CPU ID and select
2450                  * that.
2451                  */
2452                 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2453 #else
2454                 /*
2455                  * Bind the msix vector, and thus the
2456                  * rings to the corresponding cpu.
2457                  *
2458                  * This just happens to match the default RSS round-robin
2459                  * bucket -> queue -> CPU allocation.
2460                  */
2461                 if (adapter->num_queues > 1) {
2462                         if (igb_last_bind_cpu < 0)
2463                                 igb_last_bind_cpu = CPU_FIRST();
2464                         cpu_id = igb_last_bind_cpu;
2465                 }
2466 #endif
2467
2468                 if (adapter->num_queues > 1) {
2469                         bus_bind_intr(dev, que->res, cpu_id);
2470 #ifdef  RSS
2471                         device_printf(dev,
2472                                 "Bound queue %d to RSS bucket %d\n",
2473                                 i, cpu_id);
2474 #else
2475                         device_printf(dev,
2476                                 "Bound queue %d to cpu %d\n",
2477                                 i, cpu_id);
2478 #endif
2479                 }
2480
2481 #ifndef IGB_LEGACY_TX
2482                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2483                     que->txr);
2484 #endif
2485                 /* Make tasklet for deferred handling */
2486                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2487                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2488                     taskqueue_thread_enqueue, &que->tq);
2489                 if (adapter->num_queues > 1) {
2490                         /*
2491                          * Only pin the taskqueue thread to a CPU if
2492                          * RSS is in use.
2493                          *
2494                          * This again just happens to match the default RSS
2495                          * round-robin bucket -> queue -> CPU allocation.
2496                          */
2497 #ifdef  RSS
2498                         CPU_SETOF(cpu_id, &cpu_mask);
2499                         taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2500                             &cpu_mask,
2501                             "%s que (bucket %d)",
2502                             device_get_nameunit(adapter->dev),
2503                             cpu_id);
2504 #else
2505                         taskqueue_start_threads(&que->tq, 1, PI_NET,
2506                             "%s que (qid %d)",
2507                             device_get_nameunit(adapter->dev),
2508                             cpu_id);
2509 #endif
2510                 } else {
2511                         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2512                             device_get_nameunit(adapter->dev));
2513                 }
2514
2515                 /* Finally update the last bound CPU id */
2516                 if (adapter->num_queues > 1)
2517                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2518         }
2519
2520         /* And Link */
2521         rid = vector + 1;
2522         adapter->res = bus_alloc_resource_any(dev,
2523             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2524         if (adapter->res == NULL) {
2525                 device_printf(dev,
2526                     "Unable to allocate bus resource: "
2527                     "MSIX Link Interrupt\n");
2528                 return (ENXIO);
2529         }
2530         if ((error = bus_setup_intr(dev, adapter->res,
2531             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2532             igb_msix_link, adapter, &adapter->tag)) != 0) {
2533                 device_printf(dev, "Failed to register Link handler");
2534                 return (error);
2535         }
2536 #if __FreeBSD_version >= 800504
2537         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2538 #endif
2539         adapter->linkvec = vector;
2540
2541         return (0);
2542 }
2543
2544
2545 static void
2546 igb_configure_queues(struct adapter *adapter)
2547 {
2548         struct  e1000_hw        *hw = &adapter->hw;
2549         struct  igb_queue       *que;
2550         u32                     tmp, ivar = 0, newitr = 0;
2551
2552         /* First turn on RSS capability */
2553         if (adapter->hw.mac.type != e1000_82575)
2554                 E1000_WRITE_REG(hw, E1000_GPIE,
2555                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2556                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2557
2558         /* Turn on MSIX */
2559         switch (adapter->hw.mac.type) {
2560         case e1000_82580:
2561         case e1000_i350:
2562         case e1000_i354:
2563         case e1000_i210:
2564         case e1000_i211:
2565         case e1000_vfadapt:
2566         case e1000_vfadapt_i350:
2567                 /* RX entries */
2568                 for (int i = 0; i < adapter->num_queues; i++) {
2569                         u32 index = i >> 1;
2570                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2571                         que = &adapter->queues[i];
2572                         if (i & 1) {
2573                                 ivar &= 0xFF00FFFF;
2574                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2575                         } else {
2576                                 ivar &= 0xFFFFFF00;
2577                                 ivar |= que->msix | E1000_IVAR_VALID;
2578                         }
2579                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2580                 }
2581                 /* TX entries */
2582                 for (int i = 0; i < adapter->num_queues; i++) {
2583                         u32 index = i >> 1;
2584                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2585                         que = &adapter->queues[i];
2586                         if (i & 1) {
2587                                 ivar &= 0x00FFFFFF;
2588                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2589                         } else {
2590                                 ivar &= 0xFFFF00FF;
2591                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2592                         }
2593                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2594                         adapter->que_mask |= que->eims;
2595                 }
2596
2597                 /* And for the link interrupt */
2598                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2599                 adapter->link_mask = 1 << adapter->linkvec;
2600                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2601                 break;
2602         case e1000_82576:
2603                 /* RX entries */
2604                 for (int i = 0; i < adapter->num_queues; i++) {
2605                         u32 index = i & 0x7; /* Each IVAR has two entries */
2606                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2607                         que = &adapter->queues[i];
2608                         if (i < 8) {
2609                                 ivar &= 0xFFFFFF00;
2610                                 ivar |= que->msix | E1000_IVAR_VALID;
2611                         } else {
2612                                 ivar &= 0xFF00FFFF;
2613                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2614                         }
2615                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2616                         adapter->que_mask |= que->eims;
2617                 }
2618                 /* TX entries */
2619                 for (int i = 0; i < adapter->num_queues; i++) {
2620                         u32 index = i & 0x7; /* Each IVAR has two entries */
2621                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2622                         que = &adapter->queues[i];
2623                         if (i < 8) {
2624                                 ivar &= 0xFFFF00FF;
2625                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2626                         } else {
2627                                 ivar &= 0x00FFFFFF;
2628                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2629                         }
2630                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2631                         adapter->que_mask |= que->eims;
2632                 }
2633
2634                 /* And for the link interrupt */
2635                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2636                 adapter->link_mask = 1 << adapter->linkvec;
2637                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2638                 break;
2639
2640         case e1000_82575:
2641                 /* enable MSI-X support*/
2642                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2643                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2644                 /* Auto-Mask interrupts upon ICR read. */
2645                 tmp |= E1000_CTRL_EXT_EIAME;
2646                 tmp |= E1000_CTRL_EXT_IRCA;
2647                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2648
2649                 /* Queues */
2650                 for (int i = 0; i < adapter->num_queues; i++) {
2651                         que = &adapter->queues[i];
2652                         tmp = E1000_EICR_RX_QUEUE0 << i;
2653                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2654                         que->eims = tmp;
2655                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2656                             i, que->eims);
2657                         adapter->que_mask |= que->eims;
2658                 }
2659
2660                 /* Link */
2661                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2662                     E1000_EIMS_OTHER);
2663                 adapter->link_mask |= E1000_EIMS_OTHER;
2664         default:
2665                 break;
2666         }
2667
2668         /* Set the starting interrupt rate */
2669         if (igb_max_interrupt_rate > 0)
2670                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2671
2672         if (hw->mac.type == e1000_82575)
2673                 newitr |= newitr << 16;
2674         else
2675                 newitr |= E1000_EITR_CNT_IGNR;
2676
2677         for (int i = 0; i < adapter->num_queues; i++) {
2678                 que = &adapter->queues[i];
2679                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2680         }
2681
2682         return;
2683 }
2684
2685
2686 static void
2687 igb_free_pci_resources(struct adapter *adapter)
2688 {
2689         struct          igb_queue *que = adapter->queues;
2690         device_t        dev = adapter->dev;
2691         int             rid;
2692
2693         /*
2694         ** There is a slight possibility of a failure mode
2695         ** in attach that will result in entering this function
2696         ** before interrupt resources have been initialized, and
2697         ** in that case we do not want to execute the loops below
2698         ** We can detect this reliably by the state of the adapter
2699         ** res pointer.
2700         */
2701         if (adapter->res == NULL)
2702                 goto mem;
2703
2704         /*
2705          * First release all the interrupt resources:
2706          */
2707         for (int i = 0; i < adapter->num_queues; i++, que++) {
2708                 rid = que->msix + 1;
2709                 if (que->tag != NULL) {
2710                         bus_teardown_intr(dev, que->res, que->tag);
2711                         que->tag = NULL;
2712                 }
2713                 if (que->res != NULL)
2714                         bus_release_resource(dev,
2715                             SYS_RES_IRQ, rid, que->res);
2716         }
2717
2718         /* Clean the Legacy or Link interrupt last */
2719         if (adapter->linkvec) /* we are doing MSIX */
2720                 rid = adapter->linkvec + 1;
2721         else
2722                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2723
2724         que = adapter->queues;
2725         if (adapter->tag != NULL) {
2726                 taskqueue_drain(que->tq, &adapter->link_task);
2727                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2728                 adapter->tag = NULL;
2729         }
2730         if (adapter->res != NULL)
2731                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2732
2733         for (int i = 0; i < adapter->num_queues; i++, que++) {
2734                 if (que->tq != NULL) {
2735 #ifndef IGB_LEGACY_TX
2736                         taskqueue_drain(que->tq, &que->txr->txq_task);
2737 #endif
2738                         taskqueue_drain(que->tq, &que->que_task);
2739                         taskqueue_free(que->tq);
2740                 }
2741         }
2742 mem:
2743         if (adapter->msix)
2744                 pci_release_msi(dev);
2745
2746         if (adapter->msix_mem != NULL)
2747                 bus_release_resource(dev, SYS_RES_MEMORY,
2748                     adapter->memrid, adapter->msix_mem);
2749
2750         if (adapter->pci_mem != NULL)
2751                 bus_release_resource(dev, SYS_RES_MEMORY,
2752                     PCIR_BAR(0), adapter->pci_mem);
2753
2754 }
2755
2756 /*
2757  * Setup Either MSI/X or MSI
2758  */
2759 static int
2760 igb_setup_msix(struct adapter *adapter)
2761 {
2762         device_t        dev = adapter->dev;
2763         int             bar, want, queues, msgs, maxqueues;
2764
2765         /* tuneable override */
2766         if (igb_enable_msix == 0)
2767                 goto msi;
2768
2769         /* First try MSI/X */
2770         msgs = pci_msix_count(dev); 
2771         if (msgs == 0)
2772                 goto msi;
2773         /*
2774         ** Some new devices, as with ixgbe, now may
2775         ** use a different BAR, so we need to keep
2776         ** track of which is used.
2777         */
2778         adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2779         bar = pci_read_config(dev, adapter->memrid, 4);
2780         if (bar == 0) /* use next bar */
2781                 adapter->memrid += 4;
2782         adapter->msix_mem = bus_alloc_resource_any(dev,
2783             SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2784         if (adapter->msix_mem == NULL) {
2785                 /* May not be enabled */
2786                 device_printf(adapter->dev,
2787                     "Unable to map MSIX table \n");
2788                 goto msi;
2789         }
2790
2791         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2792
2793         /* Override via tuneable */
2794         if (igb_num_queues != 0)
2795                 queues = igb_num_queues;
2796
2797 #ifdef  RSS
2798         /* If we're doing RSS, clamp at the number of RSS buckets */
2799         if (queues > rss_getnumbuckets())
2800                 queues = rss_getnumbuckets();
2801 #endif
2802
2803
2804         /* Sanity check based on HW */
2805         switch (adapter->hw.mac.type) {
2806                 case e1000_82575:
2807                         maxqueues = 4;
2808                         break;
2809                 case e1000_82576:
2810                 case e1000_82580:
2811                 case e1000_i350:
2812                 case e1000_i354:
2813                         maxqueues = 8;
2814                         break;
2815                 case e1000_i210:
2816                         maxqueues = 4;
2817                         break;
2818                 case e1000_i211:
2819                         maxqueues = 2;
2820                         break;
2821                 default:  /* VF interfaces */
2822                         maxqueues = 1;
2823                         break;
2824         }
2825
2826         /* Final clamp on the actual hardware capability */
2827         if (queues > maxqueues)
2828                 queues = maxqueues;
2829
2830         /*
2831         ** One vector (RX/TX pair) per queue
2832         ** plus an additional for Link interrupt
2833         */
2834         want = queues + 1;
2835         if (msgs >= want)
2836                 msgs = want;
2837         else {
2838                 device_printf(adapter->dev,
2839                     "MSIX Configuration Problem, "
2840                     "%d vectors configured, but %d queues wanted!\n",
2841                     msgs, want);
2842                 goto msi;
2843         }
2844         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2845                 device_printf(adapter->dev,
2846                     "Using MSIX interrupts with %d vectors\n", msgs);
2847                 adapter->num_queues = queues;
2848                 return (msgs);
2849         }
2850         /*
2851         ** If MSIX alloc failed or provided us with
2852         ** less than needed, free and fall through to MSI
2853         */
2854         pci_release_msi(dev);
2855
2856 msi:
2857         if (adapter->msix_mem != NULL) {
2858                 bus_release_resource(dev, SYS_RES_MEMORY,
2859                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2860                 adapter->msix_mem = NULL;
2861         }
2862         msgs = 1;
2863         if (pci_alloc_msi(dev, &msgs) == 0) {
2864                 device_printf(adapter->dev," Using an MSI interrupt\n");
2865                 return (msgs);
2866         }
2867         device_printf(adapter->dev," Using a Legacy interrupt\n");
2868         return (0);
2869 }
2870
2871 /*********************************************************************
2872  *
2873  *  Initialize the DMA Coalescing feature
2874  *
2875  **********************************************************************/
2876 static void
2877 igb_init_dmac(struct adapter *adapter, u32 pba)
2878 {
2879         device_t        dev = adapter->dev;
2880         struct e1000_hw *hw = &adapter->hw;
2881         u32             dmac, reg = ~E1000_DMACR_DMAC_EN;
2882         u16             hwm;
2883
2884         if (hw->mac.type == e1000_i211)
2885                 return;
2886
2887         if (hw->mac.type > e1000_82580) {
2888
2889                 if (adapter->dmac == 0) { /* Disabling it */
2890                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2891                         return;
2892                 } else
2893                         device_printf(dev, "DMA Coalescing enabled\n");
2894
2895                 /* Set starting threshold */
2896                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2897
2898                 hwm = 64 * pba - adapter->max_frame_size / 16;
2899                 if (hwm < 64 * (pba - 6))
2900                         hwm = 64 * (pba - 6);
2901                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2902                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2903                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2904                     & E1000_FCRTC_RTH_COAL_MASK);
2905                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2906
2907
2908                 dmac = pba - adapter->max_frame_size / 512;
2909                 if (dmac < pba - 10)
2910                         dmac = pba - 10;
2911                 reg = E1000_READ_REG(hw, E1000_DMACR);
2912                 reg &= ~E1000_DMACR_DMACTHR_MASK;
2913                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2914                     & E1000_DMACR_DMACTHR_MASK);
2915
2916                 /* transition to L0x or L1 if available..*/
2917                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2918
2919                 /* Check if status is 2.5Gb backplane connection
2920                 * before configuration of watchdog timer, which is
2921                 * in msec values in 12.8usec intervals
2922                 * watchdog timer= msec values in 32usec intervals
2923                 * for non 2.5Gb connection
2924                 */
2925                 if (hw->mac.type == e1000_i354) {
2926                         int status = E1000_READ_REG(hw, E1000_STATUS);
2927                         if ((status & E1000_STATUS_2P5_SKU) &&
2928                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2929                                 reg |= ((adapter->dmac * 5) >> 6);
2930                         else
2931                                 reg |= (adapter->dmac >> 5);
2932                 } else {
2933                         reg |= (adapter->dmac >> 5);
2934                 }
2935
2936                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2937
2938                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2939
2940                 /* Set the interval before transition */
2941                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2942                 if (hw->mac.type == e1000_i350)
2943                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
2944                 /*
2945                 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2946                 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2947                 */
2948                 if (hw->mac.type == e1000_i354) {
2949                         int status = E1000_READ_REG(hw, E1000_STATUS);
2950                         if ((status & E1000_STATUS_2P5_SKU) &&
2951                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2952                                 reg |= 0xA;
2953                         else
2954                                 reg |= 0x4;
2955                 } else {
2956                         reg |= 0x4;
2957                 }
2958
2959                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2960
2961                 /* free space in tx packet buffer to wake from DMA coal */
2962                 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2963                     (2 * adapter->max_frame_size)) >> 6);
2964
2965                 /* make low power state decision controlled by DMA coal */
2966                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2967                 reg &= ~E1000_PCIEMISC_LX_DECISION;
2968                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2969
2970         } else if (hw->mac.type == e1000_82580) {
2971                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2972                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2973                     reg & ~E1000_PCIEMISC_LX_DECISION);
2974                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
2975         }
2976 }
2977
2978
2979 /*********************************************************************
2980  *
2981  *  Set up an fresh starting state
2982  *
2983  **********************************************************************/
2984 static void
2985 igb_reset(struct adapter *adapter)
2986 {
2987         device_t        dev = adapter->dev;
2988         struct e1000_hw *hw = &adapter->hw;
2989         struct e1000_fc_info *fc = &hw->fc;
2990         struct ifnet    *ifp = adapter->ifp;
2991         u32             pba = 0;
2992         u16             hwm;
2993
2994         INIT_DEBUGOUT("igb_reset: begin");
2995
2996         /* Let the firmware know the OS is in control */
2997         igb_get_hw_control(adapter);
2998
2999         /*
3000          * Packet Buffer Allocation (PBA)
3001          * Writing PBA sets the receive portion of the buffer
3002          * the remainder is used for the transmit buffer.
3003          */
3004         switch (hw->mac.type) {
3005         case e1000_82575:
3006                 pba = E1000_PBA_32K;
3007                 break;
3008         case e1000_82576:
3009         case e1000_vfadapt:
3010                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3011                 pba &= E1000_RXPBS_SIZE_MASK_82576;
3012                 break;
3013         case e1000_82580:
3014         case e1000_i350:
3015         case e1000_i354:
3016         case e1000_vfadapt_i350:
3017                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3018                 pba = e1000_rxpbs_adjust_82580(pba);
3019                 break;
3020         case e1000_i210:
3021         case e1000_i211:
3022                 pba = E1000_PBA_34K;
3023         default:
3024                 break;
3025         }
3026
3027         /* Special needs in case of Jumbo frames */
3028         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3029                 u32 tx_space, min_tx, min_rx;
3030                 pba = E1000_READ_REG(hw, E1000_PBA);
3031                 tx_space = pba >> 16;
3032                 pba &= 0xffff;
3033                 min_tx = (adapter->max_frame_size +
3034                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3035                 min_tx = roundup2(min_tx, 1024);
3036                 min_tx >>= 10;
3037                 min_rx = adapter->max_frame_size;
3038                 min_rx = roundup2(min_rx, 1024);
3039                 min_rx >>= 10;
3040                 if (tx_space < min_tx &&
3041                     ((min_tx - tx_space) < pba)) {
3042                         pba = pba - (min_tx - tx_space);
3043                         /*
3044                          * if short on rx space, rx wins
3045                          * and must trump tx adjustment
3046                          */
3047                         if (pba < min_rx)
3048                                 pba = min_rx;
3049                 }
3050                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3051         }
3052
3053         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3054
3055         /*
3056          * These parameters control the automatic generation (Tx) and
3057          * response (Rx) to Ethernet PAUSE frames.
3058          * - High water mark should allow for at least two frames to be
3059          *   received after sending an XOFF.
3060          * - Low water mark works best when it is very near the high water mark.
3061          *   This allows the receiver to restart by sending XON when it has
3062          *   drained a bit.
3063          */
3064         hwm = min(((pba << 10) * 9 / 10),
3065             ((pba << 10) - 2 * adapter->max_frame_size));
3066
3067         if (hw->mac.type < e1000_82576) {
3068                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3069                 fc->low_water = fc->high_water - 8;
3070         } else {
3071                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3072                 fc->low_water = fc->high_water - 16;
3073         }
3074
3075         fc->pause_time = IGB_FC_PAUSE_TIME;
3076         fc->send_xon = TRUE;
3077         if (adapter->fc)
3078                 fc->requested_mode = adapter->fc;
3079         else
3080                 fc->requested_mode = e1000_fc_default;
3081
3082         /* Issue a global reset */
3083         e1000_reset_hw(hw);
3084         E1000_WRITE_REG(hw, E1000_WUC, 0);
3085
3086         /* Reset for AutoMediaDetect */
3087         if (adapter->flags & IGB_MEDIA_RESET) {
3088                 e1000_setup_init_funcs(hw, TRUE);
3089                 e1000_get_bus_info(hw);
3090                 adapter->flags &= ~IGB_MEDIA_RESET;
3091         }
3092
3093         if (e1000_init_hw(hw) < 0)
3094                 device_printf(dev, "Hardware Initialization Failed\n");
3095
3096         /* Setup DMA Coalescing */
3097         igb_init_dmac(adapter, pba);
3098
3099         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3100         e1000_get_phy_info(hw);
3101         e1000_check_for_link(hw);
3102         return;
3103 }
3104
3105 /*********************************************************************
3106  *
3107  *  Setup networking device structure and register an interface.
3108  *
3109  **********************************************************************/
3110 static int
3111 igb_setup_interface(device_t dev, struct adapter *adapter)
3112 {
3113         struct ifnet   *ifp;
3114
3115         INIT_DEBUGOUT("igb_setup_interface: begin");
3116
3117         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3118         if (ifp == NULL) {
3119                 device_printf(dev, "can not allocate ifnet structure\n");
3120                 return (-1);
3121         }
3122         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3123         ifp->if_init =  igb_init;
3124         ifp->if_softc = adapter;
3125         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3126         ifp->if_ioctl = igb_ioctl;
3127         ifp->if_get_counter = igb_get_counter;
3128 #ifndef IGB_LEGACY_TX
3129         ifp->if_transmit = igb_mq_start;
3130         ifp->if_qflush = igb_qflush;
3131 #else
3132         ifp->if_start = igb_start;
3133         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3134         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3135         IFQ_SET_READY(&ifp->if_snd);
3136 #endif
3137
3138         ether_ifattach(ifp, adapter->hw.mac.addr);
3139
3140         ifp->if_capabilities = ifp->if_capenable = 0;
3141
3142         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3143         ifp->if_capabilities |= IFCAP_TSO;
3144         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3145         ifp->if_capenable = ifp->if_capabilities;
3146
3147         /* Don't enable LRO by default */
3148         ifp->if_capabilities |= IFCAP_LRO;
3149
3150 #ifdef DEVICE_POLLING
3151         ifp->if_capabilities |= IFCAP_POLLING;
3152 #endif
3153
3154         /*
3155          * Tell the upper layer(s) we
3156          * support full VLAN capability.
3157          */
3158         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3159         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3160                              |  IFCAP_VLAN_HWTSO
3161                              |  IFCAP_VLAN_MTU;
3162         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3163                           |  IFCAP_VLAN_HWTSO
3164                           |  IFCAP_VLAN_MTU;
3165
3166         /*
3167         ** Don't turn this on by default, if vlans are
3168         ** created on another pseudo device (eg. lagg)
3169         ** then vlan events are not passed thru, breaking
3170         ** operation, but with HW FILTER off it works. If
3171         ** using vlans directly on the igb driver you can
3172         ** enable this and get full hardware tag filtering.
3173         */
3174         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3175
3176         /*
3177          * Specify the media types supported by this adapter and register
3178          * callbacks to update media and link information
3179          */
3180         ifmedia_init(&adapter->media, IFM_IMASK,
3181             igb_media_change, igb_media_status);
3182         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3183             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3184                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3185                             0, NULL);
3186                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3187         } else {
3188                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3189                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3190                             0, NULL);
3191                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3192                             0, NULL);
3193                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3194                             0, NULL);
3195                 if (adapter->hw.phy.type != e1000_phy_ife) {
3196                         ifmedia_add(&adapter->media,
3197                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3198                         ifmedia_add(&adapter->media,
3199                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3200                 }
3201         }
3202         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3203         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3204         return (0);
3205 }
3206
3207
3208 /*
3209  * Manage DMA'able memory.
3210  */
3211 static void
3212 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3213 {
3214         if (error)
3215                 return;
3216         *(bus_addr_t *) arg = segs[0].ds_addr;
3217 }
3218
3219 static int
3220 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3221         struct igb_dma_alloc *dma, int mapflags)
3222 {
3223         int error;
3224
3225         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3226                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3227                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3228                                 BUS_SPACE_MAXADDR,      /* highaddr */
3229                                 NULL, NULL,             /* filter, filterarg */
3230                                 size,                   /* maxsize */
3231                                 1,                      /* nsegments */
3232                                 size,                   /* maxsegsize */
3233                                 0,                      /* flags */
3234                                 NULL,                   /* lockfunc */
3235                                 NULL,                   /* lockarg */
3236                                 &dma->dma_tag);
3237         if (error) {
3238                 device_printf(adapter->dev,
3239                     "%s: bus_dma_tag_create failed: %d\n",
3240                     __func__, error);
3241                 goto fail_0;
3242         }
3243
3244         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3245             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3246         if (error) {
3247                 device_printf(adapter->dev,
3248                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3249                     __func__, (uintmax_t)size, error);
3250                 goto fail_2;
3251         }
3252
3253         dma->dma_paddr = 0;
3254         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3255             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3256         if (error || dma->dma_paddr == 0) {
3257                 device_printf(adapter->dev,
3258                     "%s: bus_dmamap_load failed: %d\n",
3259                     __func__, error);
3260                 goto fail_3;
3261         }
3262
3263         return (0);
3264
3265 fail_3:
3266         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3267 fail_2:
3268         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3269         bus_dma_tag_destroy(dma->dma_tag);
3270 fail_0:
3271         dma->dma_tag = NULL;
3272
3273         return (error);
3274 }
3275
3276 static void
3277 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3278 {
3279         if (dma->dma_tag == NULL)
3280                 return;
3281         if (dma->dma_paddr != 0) {
3282                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3283                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3284                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3285                 dma->dma_paddr = 0;
3286         }
3287         if (dma->dma_vaddr != NULL) {
3288                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3289                 dma->dma_vaddr = NULL;
3290         }
3291         bus_dma_tag_destroy(dma->dma_tag);
3292         dma->dma_tag = NULL;
3293 }
3294
3295
3296 /*********************************************************************
3297  *
3298  *  Allocate memory for the transmit and receive rings, and then
3299  *  the descriptors associated with each, called only once at attach.
3300  *
3301  **********************************************************************/
3302 static int
3303 igb_allocate_queues(struct adapter *adapter)
3304 {
3305         device_t dev = adapter->dev;
3306         struct igb_queue        *que = NULL;
3307         struct tx_ring          *txr = NULL;
3308         struct rx_ring          *rxr = NULL;
3309         int rsize, tsize, error = E1000_SUCCESS;
3310         int txconf = 0, rxconf = 0;
3311
3312         /* First allocate the top level queue structs */
3313         if (!(adapter->queues =
3314             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3315             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3316                 device_printf(dev, "Unable to allocate queue memory\n");
3317                 error = ENOMEM;
3318                 goto fail;
3319         }
3320
3321         /* Next allocate the TX ring struct memory */
3322         if (!(adapter->tx_rings =
3323             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3324             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3325                 device_printf(dev, "Unable to allocate TX ring memory\n");
3326                 error = ENOMEM;
3327                 goto tx_fail;
3328         }
3329
3330         /* Now allocate the RX */
3331         if (!(adapter->rx_rings =
3332             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3333             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3334                 device_printf(dev, "Unable to allocate RX ring memory\n");
3335                 error = ENOMEM;
3336                 goto rx_fail;
3337         }
3338
3339         tsize = roundup2(adapter->num_tx_desc *
3340             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3341         /*
3342          * Now set up the TX queues, txconf is needed to handle the
3343          * possibility that things fail midcourse and we need to
3344          * undo memory gracefully
3345          */ 
3346         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3347                 /* Set up some basics */
3348                 txr = &adapter->tx_rings[i];
3349                 txr->adapter = adapter;
3350                 txr->me = i;
3351                 txr->num_desc = adapter->num_tx_desc;
3352
3353                 /* Initialize the TX lock */
3354                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3355                     device_get_nameunit(dev), txr->me);
3356                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3357
3358                 if (igb_dma_malloc(adapter, tsize,
3359                         &txr->txdma, BUS_DMA_NOWAIT)) {
3360                         device_printf(dev,
3361                             "Unable to allocate TX Descriptor memory\n");
3362                         error = ENOMEM;
3363                         goto err_tx_desc;
3364                 }
3365                 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3366                 bzero((void *)txr->tx_base, tsize);
3367
3368                 /* Now allocate transmit buffers for the ring */
3369                 if (igb_allocate_transmit_buffers(txr)) {
3370                         device_printf(dev,
3371                             "Critical Failure setting up transmit buffers\n");
3372                         error = ENOMEM;
3373                         goto err_tx_desc;
3374                 }
3375 #ifndef IGB_LEGACY_TX
3376                 /* Allocate a buf ring */
3377                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3378                     M_WAITOK, &txr->tx_mtx);
3379 #endif
3380         }
3381
3382         /*
3383          * Next the RX queues...
3384          */ 
3385         rsize = roundup2(adapter->num_rx_desc *
3386             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3387         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3388                 rxr = &adapter->rx_rings[i];
3389                 rxr->adapter = adapter;
3390                 rxr->me = i;
3391
3392                 /* Initialize the RX lock */
3393                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3394                     device_get_nameunit(dev), txr->me);
3395                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3396
3397                 if (igb_dma_malloc(adapter, rsize,
3398                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3399                         device_printf(dev,
3400                             "Unable to allocate RxDescriptor memory\n");
3401                         error = ENOMEM;
3402                         goto err_rx_desc;
3403                 }
3404                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3405                 bzero((void *)rxr->rx_base, rsize);
3406
3407                 /* Allocate receive buffers for the ring*/
3408                 if (igb_allocate_receive_buffers(rxr)) {
3409                         device_printf(dev,
3410                             "Critical Failure setting up receive buffers\n");
3411                         error = ENOMEM;
3412                         goto err_rx_desc;
3413                 }
3414         }
3415
3416         /*
3417         ** Finally set up the queue holding structs
3418         */
3419         for (int i = 0; i < adapter->num_queues; i++) {
3420                 que = &adapter->queues[i];
3421                 que->adapter = adapter;
3422                 que->txr = &adapter->tx_rings[i];
3423                 que->rxr = &adapter->rx_rings[i];
3424         }
3425
3426         return (0);
3427
3428 err_rx_desc:
3429         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3430                 igb_dma_free(adapter, &rxr->rxdma);
3431 err_tx_desc:
3432         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3433                 igb_dma_free(adapter, &txr->txdma);
3434         free(adapter->rx_rings, M_DEVBUF);
3435 rx_fail:
3436 #ifndef IGB_LEGACY_TX
3437         buf_ring_free(txr->br, M_DEVBUF);
3438 #endif
3439         free(adapter->tx_rings, M_DEVBUF);
3440 tx_fail:
3441         free(adapter->queues, M_DEVBUF);
3442 fail:
3443         return (error);
3444 }
3445
3446 /*********************************************************************
3447  *
3448  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3449  *  the information needed to transmit a packet on the wire. This is
3450  *  called only once at attach, setup is done every reset.
3451  *
3452  **********************************************************************/
3453 static int
3454 igb_allocate_transmit_buffers(struct tx_ring *txr)
3455 {
3456         struct adapter *adapter = txr->adapter;
3457         device_t dev = adapter->dev;
3458         struct igb_tx_buf *txbuf;
3459         int error, i;
3460
3461         /*
3462          * Setup DMA descriptor areas.
3463          */
3464         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3465                                1, 0,                    /* alignment, bounds */
3466                                BUS_SPACE_MAXADDR,       /* lowaddr */
3467                                BUS_SPACE_MAXADDR,       /* highaddr */
3468                                NULL, NULL,              /* filter, filterarg */
3469                                IGB_TSO_SIZE,            /* maxsize */
3470                                IGB_MAX_SCATTER,         /* nsegments */
3471                                PAGE_SIZE,               /* maxsegsize */
3472                                0,                       /* flags */
3473                                NULL,                    /* lockfunc */
3474                                NULL,                    /* lockfuncarg */
3475                                &txr->txtag))) {
3476                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3477                 goto fail;
3478         }
3479
3480         if (!(txr->tx_buffers =
3481             (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3482             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3483                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3484                 error = ENOMEM;
3485                 goto fail;
3486         }
3487
3488         /* Create the descriptor buffer dma maps */
3489         txbuf = txr->tx_buffers;
3490         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3491                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3492                 if (error != 0) {
3493                         device_printf(dev, "Unable to create TX DMA map\n");
3494                         goto fail;
3495                 }
3496         }
3497
3498         return 0;
3499 fail:
3500         /* We free all, it handles case where we are in the middle */
3501         igb_free_transmit_structures(adapter);
3502         return (error);
3503 }
3504
3505 /*********************************************************************
3506  *
3507  *  Initialize a transmit ring.
3508  *
3509  **********************************************************************/
3510 static void
3511 igb_setup_transmit_ring(struct tx_ring *txr)
3512 {
3513         struct adapter *adapter = txr->adapter;
3514         struct igb_tx_buf *txbuf;
3515         int i;
3516 #ifdef DEV_NETMAP
3517         struct netmap_adapter *na = NA(adapter->ifp);
3518         struct netmap_slot *slot;
3519 #endif /* DEV_NETMAP */
3520
3521         /* Clear the old descriptor contents */
3522         IGB_TX_LOCK(txr);
3523 #ifdef DEV_NETMAP
3524         slot = netmap_reset(na, NR_TX, txr->me, 0);
3525 #endif /* DEV_NETMAP */
3526         bzero((void *)txr->tx_base,
3527               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3528         /* Reset indices */
3529         txr->next_avail_desc = 0;
3530         txr->next_to_clean = 0;
3531
3532         /* Free any existing tx buffers. */
3533         txbuf = txr->tx_buffers;
3534         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3535                 if (txbuf->m_head != NULL) {
3536                         bus_dmamap_sync(txr->txtag, txbuf->map,
3537                             BUS_DMASYNC_POSTWRITE);
3538                         bus_dmamap_unload(txr->txtag, txbuf->map);
3539                         m_freem(txbuf->m_head);
3540                         txbuf->m_head = NULL;
3541                 }
3542 #ifdef DEV_NETMAP
3543                 if (slot) {
3544                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3545                         /* no need to set the address */
3546                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3547                 }
3548 #endif /* DEV_NETMAP */
3549                 /* clear the watch index */
3550                 txbuf->eop = NULL;
3551         }
3552
3553         /* Set number of descriptors available */
3554         txr->tx_avail = adapter->num_tx_desc;
3555
3556         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3557             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3558         IGB_TX_UNLOCK(txr);
3559 }
3560
3561 /*********************************************************************
3562  *
3563  *  Initialize all transmit rings.
3564  *
3565  **********************************************************************/
3566 static void
3567 igb_setup_transmit_structures(struct adapter *adapter)
3568 {
3569         struct tx_ring *txr = adapter->tx_rings;
3570
3571         for (int i = 0; i < adapter->num_queues; i++, txr++)
3572                 igb_setup_transmit_ring(txr);
3573
3574         return;
3575 }
3576
3577 /*********************************************************************
3578  *
3579  *  Enable transmit unit.
3580  *
3581  **********************************************************************/
3582 static void
3583 igb_initialize_transmit_units(struct adapter *adapter)
3584 {
3585         struct tx_ring  *txr = adapter->tx_rings;
3586         struct e1000_hw *hw = &adapter->hw;
3587         u32             tctl, txdctl;
3588
3589         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3590         tctl = txdctl = 0;
3591
3592         /* Setup the Tx Descriptor Rings */
3593         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3594                 u64 bus_addr = txr->txdma.dma_paddr;
3595
3596                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3597                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3598                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3599                     (uint32_t)(bus_addr >> 32));
3600                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3601                     (uint32_t)bus_addr);
3602
3603                 /* Setup the HW Tx Head and Tail descriptor pointers */
3604                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3605                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3606
3607                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3608                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3609                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3610
3611                 txr->queue_status = IGB_QUEUE_IDLE;
3612
3613                 txdctl |= IGB_TX_PTHRESH;
3614                 txdctl |= IGB_TX_HTHRESH << 8;
3615                 txdctl |= IGB_TX_WTHRESH << 16;
3616                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3617                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3618         }
3619
3620         if (adapter->vf_ifp)
3621                 return;
3622
3623         e1000_config_collision_dist(hw);
3624
3625         /* Program the Transmit Control Register */
3626         tctl = E1000_READ_REG(hw, E1000_TCTL);
3627         tctl &= ~E1000_TCTL_CT;
3628         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3629                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3630
3631         /* This write will effectively turn on the transmit unit. */
3632         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3633 }
3634
3635 /*********************************************************************
3636  *
3637  *  Free all transmit rings.
3638  *
3639  **********************************************************************/
3640 static void
3641 igb_free_transmit_structures(struct adapter *adapter)
3642 {
3643         struct tx_ring *txr = adapter->tx_rings;
3644
3645         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3646                 IGB_TX_LOCK(txr);
3647                 igb_free_transmit_buffers(txr);
3648                 igb_dma_free(adapter, &txr->txdma);
3649                 IGB_TX_UNLOCK(txr);
3650                 IGB_TX_LOCK_DESTROY(txr);
3651         }
3652         free(adapter->tx_rings, M_DEVBUF);
3653 }
3654
3655 /*********************************************************************
3656  *
3657  *  Free transmit ring related data structures.
3658  *
3659  **********************************************************************/
3660 static void
3661 igb_free_transmit_buffers(struct tx_ring *txr)
3662 {
3663         struct adapter *adapter = txr->adapter;
3664         struct igb_tx_buf *tx_buffer;
3665         int             i;
3666
3667         INIT_DEBUGOUT("free_transmit_ring: begin");
3668
3669         if (txr->tx_buffers == NULL)
3670                 return;
3671
3672         tx_buffer = txr->tx_buffers;
3673         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3674                 if (tx_buffer->m_head != NULL) {
3675                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3676                             BUS_DMASYNC_POSTWRITE);
3677                         bus_dmamap_unload(txr->txtag,
3678                             tx_buffer->map);
3679                         m_freem(tx_buffer->m_head);
3680                         tx_buffer->m_head = NULL;
3681                         if (tx_buffer->map != NULL) {
3682                                 bus_dmamap_destroy(txr->txtag,
3683                                     tx_buffer->map);
3684                                 tx_buffer->map = NULL;
3685                         }
3686                 } else if (tx_buffer->map != NULL) {
3687                         bus_dmamap_unload(txr->txtag,
3688                             tx_buffer->map);
3689                         bus_dmamap_destroy(txr->txtag,
3690                             tx_buffer->map);
3691                         tx_buffer->map = NULL;
3692                 }
3693         }
3694 #ifndef IGB_LEGACY_TX
3695         if (txr->br != NULL)
3696                 buf_ring_free(txr->br, M_DEVBUF);
3697 #endif
3698         if (txr->tx_buffers != NULL) {
3699                 free(txr->tx_buffers, M_DEVBUF);
3700                 txr->tx_buffers = NULL;
3701         }
3702         if (txr->txtag != NULL) {
3703                 bus_dma_tag_destroy(txr->txtag);
3704                 txr->txtag = NULL;
3705         }
3706         return;
3707 }
3708
3709 /**********************************************************************
3710  *
3711  *  Setup work for hardware segmentation offload (TSO) on
3712  *  adapters using advanced tx descriptors
3713  *
3714  **********************************************************************/
3715 static int
3716 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3717     u32 *cmd_type_len, u32 *olinfo_status)
3718 {
3719         struct adapter *adapter = txr->adapter;
3720         struct e1000_adv_tx_context_desc *TXD;
3721         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3722         u32 mss_l4len_idx = 0, paylen;
3723         u16 vtag = 0, eh_type;
3724         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3725         struct ether_vlan_header *eh;
3726 #ifdef INET6
3727         struct ip6_hdr *ip6;
3728 #endif
3729 #ifdef INET
3730         struct ip *ip;
3731 #endif
3732         struct tcphdr *th;
3733
3734
3735         /*
3736          * Determine where frame payload starts.
3737          * Jump over vlan headers if already present
3738          */
3739         eh = mtod(mp, struct ether_vlan_header *);
3740         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3741                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3742                 eh_type = eh->evl_proto;
3743         } else {
3744                 ehdrlen = ETHER_HDR_LEN;
3745                 eh_type = eh->evl_encap_proto;
3746         }
3747
3748         switch (ntohs(eh_type)) {
3749 #ifdef INET6
3750         case ETHERTYPE_IPV6:
3751                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3752                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3753                 if (ip6->ip6_nxt != IPPROTO_TCP)
3754                         return (ENXIO);
3755                 ip_hlen = sizeof(struct ip6_hdr);
3756                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3757                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3758                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3759                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3760                 break;
3761 #endif
3762 #ifdef INET
3763         case ETHERTYPE_IP:
3764                 ip = (struct ip *)(mp->m_data + ehdrlen);
3765                 if (ip->ip_p != IPPROTO_TCP)
3766                         return (ENXIO);
3767                 ip->ip_sum = 0;
3768                 ip_hlen = ip->ip_hl << 2;
3769                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3770                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3771                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3772                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3773                 /* Tell transmit desc to also do IPv4 checksum. */
3774                 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3775                 break;
3776 #endif
3777         default:
3778                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3779                     __func__, ntohs(eh_type));
3780                 break;
3781         }
3782
3783         ctxd = txr->next_avail_desc;
3784         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3785
3786         tcp_hlen = th->th_off << 2;
3787
3788         /* This is used in the transmit desc in encap */
3789         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3790
3791         /* VLAN MACLEN IPLEN */
3792         if (mp->m_flags & M_VLANTAG) {
3793                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3794                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3795         }
3796
3797         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3798         vlan_macip_lens |= ip_hlen;
3799         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3800
3801         /* ADV DTYPE TUCMD */
3802         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3803         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3804         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3805
3806         /* MSS L4LEN IDX */
3807         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3808         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3809         /* 82575 needs the queue index added */
3810         if (adapter->hw.mac.type == e1000_82575)
3811                 mss_l4len_idx |= txr->me << 4;
3812         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3813
3814         TXD->seqnum_seed = htole32(0);
3815
3816         if (++ctxd == txr->num_desc)
3817                 ctxd = 0;
3818
3819         txr->tx_avail--;
3820         txr->next_avail_desc = ctxd;
3821         *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3822         *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3823         *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3824         ++txr->tso_tx;
3825         return (0);
3826 }
3827
3828 /*********************************************************************
3829  *
3830  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3831  *
3832  **********************************************************************/
3833
3834 static int
3835 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3836     u32 *cmd_type_len, u32 *olinfo_status)
3837 {
3838         struct e1000_adv_tx_context_desc *TXD;
3839         struct adapter *adapter = txr->adapter;
3840         struct ether_vlan_header *eh;
3841         struct ip *ip;
3842         struct ip6_hdr *ip6;
3843         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3844         int     ehdrlen, ip_hlen = 0;
3845         u16     etype;
3846         u8      ipproto = 0;
3847         int     offload = TRUE;
3848         int     ctxd = txr->next_avail_desc;
3849         u16     vtag = 0;
3850
3851         /* First check if TSO is to be used */
3852         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3853                 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3854
3855         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3856                 offload = FALSE;
3857
3858         /* Indicate the whole packet as payload when not doing TSO */
3859         *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3860
3861         /* Now ready a context descriptor */
3862         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3863
3864         /*
3865         ** In advanced descriptors the vlan tag must 
3866         ** be placed into the context descriptor. Hence
3867         ** we need to make one even if not doing offloads.
3868         */
3869         if (mp->m_flags & M_VLANTAG) {
3870                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3871                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3872         } else if (offload == FALSE) /* ... no offload to do */
3873                 return (0);
3874
3875         /*
3876          * Determine where frame payload starts.
3877          * Jump over vlan headers if already present,
3878          * helpful for QinQ too.
3879          */
3880         eh = mtod(mp, struct ether_vlan_header *);
3881         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3882                 etype = ntohs(eh->evl_proto);
3883                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3884         } else {
3885                 etype = ntohs(eh->evl_encap_proto);
3886                 ehdrlen = ETHER_HDR_LEN;
3887         }
3888
3889         /* Set the ether header length */
3890         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3891
3892         switch (etype) {
3893                 case ETHERTYPE_IP:
3894                         ip = (struct ip *)(mp->m_data + ehdrlen);
3895                         ip_hlen = ip->ip_hl << 2;
3896                         ipproto = ip->ip_p;
3897                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3898                         break;
3899                 case ETHERTYPE_IPV6:
3900                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3901                         ip_hlen = sizeof(struct ip6_hdr);
3902                         /* XXX-BZ this will go badly in case of ext hdrs. */
3903                         ipproto = ip6->ip6_nxt;
3904                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3905                         break;
3906                 default:
3907                         offload = FALSE;
3908                         break;
3909         }
3910
3911         vlan_macip_lens |= ip_hlen;
3912         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3913
3914         switch (ipproto) {
3915                 case IPPROTO_TCP:
3916                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3917                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3918                         break;
3919                 case IPPROTO_UDP:
3920                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3921                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3922                         break;
3923
3924 #if __FreeBSD_version >= 800000
3925                 case IPPROTO_SCTP:
3926                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3927                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3928                         break;
3929 #endif
3930                 default:
3931                         offload = FALSE;
3932                         break;
3933         }
3934
3935         if (offload) /* For the TX descriptor setup */
3936                 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3937
3938         /* 82575 needs the queue index added */
3939         if (adapter->hw.mac.type == e1000_82575)
3940                 mss_l4len_idx = txr->me << 4;
3941
3942         /* Now copy bits into descriptor */
3943         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3944         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3945         TXD->seqnum_seed = htole32(0);
3946         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3947
3948         /* We've consumed the first desc, adjust counters */
3949         if (++ctxd == txr->num_desc)
3950                 ctxd = 0;
3951         txr->next_avail_desc = ctxd;
3952         --txr->tx_avail;
3953
3954         return (0);
3955 }
3956
3957 /**********************************************************************
3958  *
3959  *  Examine each tx_buffer in the used queue. If the hardware is done
3960  *  processing the packet then free associated resources. The
3961  *  tx_buffer is put back on the free queue.
3962  *
3963  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3964  **********************************************************************/
3965 static bool
3966 igb_txeof(struct tx_ring *txr)
3967 {
3968         struct adapter          *adapter = txr->adapter;
3969 #ifdef DEV_NETMAP
3970         struct ifnet            *ifp = adapter->ifp;
3971 #endif /* DEV_NETMAP */
3972         u32                     work, processed = 0;
3973         u16                     limit = txr->process_limit;
3974         struct igb_tx_buf       *buf;
3975         union e1000_adv_tx_desc *txd;
3976
3977         mtx_assert(&txr->tx_mtx, MA_OWNED);
3978
3979 #ifdef DEV_NETMAP
3980         if (netmap_tx_irq(ifp, txr->me))
3981                 return (FALSE);
3982 #endif /* DEV_NETMAP */
3983
3984         if (txr->tx_avail == txr->num_desc) {
3985                 txr->queue_status = IGB_QUEUE_IDLE;
3986                 return FALSE;
3987         }
3988
3989         /* Get work starting point */
3990         work = txr->next_to_clean;
3991         buf = &txr->tx_buffers[work];
3992         txd = &txr->tx_base[work];
3993         work -= txr->num_desc; /* The distance to ring end */
3994         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3995             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3996         do {
3997                 union e1000_adv_tx_desc *eop = buf->eop;
3998                 if (eop == NULL) /* No work */
3999                         break;
4000
4001                 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4002                         break;  /* I/O not complete */
4003
4004                 if (buf->m_head) {
4005                         txr->bytes +=
4006                             buf->m_head->m_pkthdr.len;
4007                         bus_dmamap_sync(txr->txtag,
4008                             buf->map,
4009                             BUS_DMASYNC_POSTWRITE);
4010                         bus_dmamap_unload(txr->txtag,
4011                             buf->map);
4012                         m_freem(buf->m_head);
4013                         buf->m_head = NULL;
4014                 }
4015                 buf->eop = NULL;
4016                 ++txr->tx_avail;
4017
4018                 /* We clean the range if multi segment */
4019                 while (txd != eop) {
4020                         ++txd;
4021                         ++buf;
4022                         ++work;
4023                         /* wrap the ring? */
4024                         if (__predict_false(!work)) {
4025                                 work -= txr->num_desc;
4026                                 buf = txr->tx_buffers;
4027                                 txd = txr->tx_base;
4028                         }
4029                         if (buf->m_head) {
4030                                 txr->bytes +=
4031                                     buf->m_head->m_pkthdr.len;
4032                                 bus_dmamap_sync(txr->txtag,
4033                                     buf->map,
4034                                     BUS_DMASYNC_POSTWRITE);
4035                                 bus_dmamap_unload(txr->txtag,
4036                                     buf->map);
4037                                 m_freem(buf->m_head);
4038                                 buf->m_head = NULL;
4039                         }
4040                         ++txr->tx_avail;
4041                         buf->eop = NULL;
4042
4043                 }
4044                 ++txr->packets;
4045                 ++processed;
4046                 txr->watchdog_time = ticks;
4047
4048                 /* Try the next packet */
4049                 ++txd;
4050                 ++buf;
4051                 ++work;
4052                 /* reset with a wrap */
4053                 if (__predict_false(!work)) {
4054                         work -= txr->num_desc;
4055                         buf = txr->tx_buffers;
4056                         txd = txr->tx_base;
4057                 }
4058                 prefetch(txd);
4059         } while (__predict_true(--limit));
4060
4061         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4062             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4063
4064         work += txr->num_desc;
4065         txr->next_to_clean = work;
4066
4067         /*
4068         ** Watchdog calculation, we know there's
4069         ** work outstanding or the first return
4070         ** would have been taken, so none processed
4071         ** for too long indicates a hang.
4072         */
4073         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4074                 txr->queue_status |= IGB_QUEUE_HUNG;
4075
4076         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4077                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;       
4078
4079         if (txr->tx_avail == txr->num_desc) {
4080                 txr->queue_status = IGB_QUEUE_IDLE;
4081                 return (FALSE);
4082         }
4083
4084         return (TRUE);
4085 }
4086
4087 /*********************************************************************
4088  *
4089  *  Refresh mbuf buffers for RX descriptor rings
4090  *   - now keeps its own state so discards due to resource
4091  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4092  *     it just returns, keeping its placeholder, thus it can simply
4093  *     be recalled to try again.
4094  *
4095  **********************************************************************/
4096 static void
4097 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4098 {
4099         struct adapter          *adapter = rxr->adapter;
4100         bus_dma_segment_t       hseg[1];
4101         bus_dma_segment_t       pseg[1];
4102         struct igb_rx_buf       *rxbuf;
4103         struct mbuf             *mh, *mp;
4104         int                     i, j, nsegs, error;
4105         bool                    refreshed = FALSE;
4106
4107         i = j = rxr->next_to_refresh;
4108         /*
4109         ** Get one descriptor beyond
4110         ** our work mark to control
4111         ** the loop.
4112         */
4113         if (++j == adapter->num_rx_desc)
4114                 j = 0;
4115
4116         while (j != limit) {
4117                 rxbuf = &rxr->rx_buffers[i];
4118                 /* No hdr mbuf used with header split off */
4119                 if (rxr->hdr_split == FALSE)
4120                         goto no_split;
4121                 if (rxbuf->m_head == NULL) {
4122                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4123                         if (mh == NULL)
4124                                 goto update;
4125                 } else
4126                         mh = rxbuf->m_head;
4127
4128                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4129                 mh->m_len = MHLEN;
4130                 mh->m_flags |= M_PKTHDR;
4131                 /* Get the memory mapping */
4132                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4133                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4134                 if (error != 0) {
4135                         printf("Refresh mbufs: hdr dmamap load"
4136                             " failure - %d\n", error);
4137                         m_free(mh);
4138                         rxbuf->m_head = NULL;
4139                         goto update;
4140                 }
4141                 rxbuf->m_head = mh;
4142                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4143                     BUS_DMASYNC_PREREAD);
4144                 rxr->rx_base[i].read.hdr_addr =
4145                     htole64(hseg[0].ds_addr);
4146 no_split:
4147                 if (rxbuf->m_pack == NULL) {
4148                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4149                             M_PKTHDR, adapter->rx_mbuf_sz);
4150                         if (mp == NULL)
4151                                 goto update;
4152                 } else
4153                         mp = rxbuf->m_pack;
4154
4155                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4156                 /* Get the memory mapping */
4157                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4158                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4159                 if (error != 0) {
4160                         printf("Refresh mbufs: payload dmamap load"
4161                             " failure - %d\n", error);
4162                         m_free(mp);
4163                         rxbuf->m_pack = NULL;
4164                         goto update;
4165                 }
4166                 rxbuf->m_pack = mp;
4167                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4168                     BUS_DMASYNC_PREREAD);
4169                 rxr->rx_base[i].read.pkt_addr =
4170                     htole64(pseg[0].ds_addr);
4171                 refreshed = TRUE; /* I feel wefreshed :) */
4172
4173                 i = j; /* our next is precalculated */
4174                 rxr->next_to_refresh = i;
4175                 if (++j == adapter->num_rx_desc)
4176                         j = 0;
4177         }
4178 update:
4179         if (refreshed) /* update tail */
4180                 E1000_WRITE_REG(&adapter->hw,
4181                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4182         return;
4183 }
4184
4185
4186 /*********************************************************************
4187  *
4188  *  Allocate memory for rx_buffer structures. Since we use one
4189  *  rx_buffer per received packet, the maximum number of rx_buffer's
4190  *  that we'll need is equal to the number of receive descriptors
4191  *  that we've allocated.
4192  *
4193  **********************************************************************/
4194 static int
4195 igb_allocate_receive_buffers(struct rx_ring *rxr)
4196 {
4197         struct  adapter         *adapter = rxr->adapter;
4198         device_t                dev = adapter->dev;
4199         struct igb_rx_buf       *rxbuf;
4200         int                     i, bsize, error;
4201
4202         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4203         if (!(rxr->rx_buffers =
4204             (struct igb_rx_buf *) malloc(bsize,
4205             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4206                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4207                 error = ENOMEM;
4208                 goto fail;
4209         }
4210
4211         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4212                                    1, 0,                /* alignment, bounds */
4213                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4214                                    BUS_SPACE_MAXADDR,   /* highaddr */
4215                                    NULL, NULL,          /* filter, filterarg */
4216                                    MSIZE,               /* maxsize */
4217                                    1,                   /* nsegments */
4218                                    MSIZE,               /* maxsegsize */
4219                                    0,                   /* flags */
4220                                    NULL,                /* lockfunc */
4221                                    NULL,                /* lockfuncarg */
4222                                    &rxr->htag))) {
4223                 device_printf(dev, "Unable to create RX DMA tag\n");
4224                 goto fail;
4225         }
4226
4227         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4228                                    1, 0,                /* alignment, bounds */
4229                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4230                                    BUS_SPACE_MAXADDR,   /* highaddr */
4231                                    NULL, NULL,          /* filter, filterarg */
4232                                    MJUM9BYTES,          /* maxsize */
4233                                    1,                   /* nsegments */
4234                                    MJUM9BYTES,          /* maxsegsize */
4235                                    0,                   /* flags */
4236                                    NULL,                /* lockfunc */
4237                                    NULL,                /* lockfuncarg */
4238                                    &rxr->ptag))) {
4239                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4240                 goto fail;
4241         }
4242
4243         for (i = 0; i < adapter->num_rx_desc; i++) {
4244                 rxbuf = &rxr->rx_buffers[i];
4245                 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4246                 if (error) {
4247                         device_printf(dev,
4248                             "Unable to create RX head DMA maps\n");
4249                         goto fail;
4250                 }
4251                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4252                 if (error) {
4253                         device_printf(dev,
4254                             "Unable to create RX packet DMA maps\n");
4255                         goto fail;
4256                 }
4257         }
4258
4259         return (0);
4260
4261 fail:
4262         /* Frees all, but can handle partial completion */
4263         igb_free_receive_structures(adapter);
4264         return (error);
4265 }
4266
4267
4268 static void
4269 igb_free_receive_ring(struct rx_ring *rxr)
4270 {
4271         struct  adapter         *adapter = rxr->adapter;
4272         struct igb_rx_buf       *rxbuf;
4273
4274
4275         for (int i = 0; i < adapter->num_rx_desc; i++) {
4276                 rxbuf = &rxr->rx_buffers[i];
4277                 if (rxbuf->m_head != NULL) {
4278                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4279                             BUS_DMASYNC_POSTREAD);
4280                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4281                         rxbuf->m_head->m_flags |= M_PKTHDR;
4282                         m_freem(rxbuf->m_head);
4283                 }
4284                 if (rxbuf->m_pack != NULL) {
4285                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4286                             BUS_DMASYNC_POSTREAD);
4287                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4288                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4289                         m_freem(rxbuf->m_pack);
4290                 }
4291                 rxbuf->m_head = NULL;
4292                 rxbuf->m_pack = NULL;
4293         }
4294 }
4295
4296
4297 /*********************************************************************
4298  *
4299  *  Initialize a receive ring and its buffers.
4300  *
4301  **********************************************************************/
4302 static int
4303 igb_setup_receive_ring(struct rx_ring *rxr)
4304 {
4305         struct  adapter         *adapter;
4306         struct  ifnet           *ifp;
4307         device_t                dev;
4308         struct igb_rx_buf       *rxbuf;
4309         bus_dma_segment_t       pseg[1], hseg[1];
4310         struct lro_ctrl         *lro = &rxr->lro;
4311         int                     rsize, nsegs, error = 0;
4312 #ifdef DEV_NETMAP
4313         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4314         struct netmap_slot *slot;
4315 #endif /* DEV_NETMAP */
4316
4317         adapter = rxr->adapter;
4318         dev = adapter->dev;
4319         ifp = adapter->ifp;
4320
4321         /* Clear the ring contents */
4322         IGB_RX_LOCK(rxr);
4323 #ifdef DEV_NETMAP
4324         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4325 #endif /* DEV_NETMAP */
4326         rsize = roundup2(adapter->num_rx_desc *
4327             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4328         bzero((void *)rxr->rx_base, rsize);
4329
4330         /*
4331         ** Free current RX buffer structures and their mbufs
4332         */
4333         igb_free_receive_ring(rxr);
4334
4335         /* Configure for header split? */
4336         if (igb_header_split)
4337                 rxr->hdr_split = TRUE;
4338
4339         /* Now replenish the ring mbufs */
4340         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4341                 struct mbuf     *mh, *mp;
4342
4343                 rxbuf = &rxr->rx_buffers[j];
4344 #ifdef DEV_NETMAP
4345                 if (slot) {
4346                         /* slot sj is mapped to the j-th NIC-ring entry */
4347                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4348                         uint64_t paddr;
4349                         void *addr;
4350
4351                         addr = PNMB(na, slot + sj, &paddr);
4352                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4353                         /* Update descriptor */
4354                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4355                         continue;
4356                 }
4357 #endif /* DEV_NETMAP */
4358                 if (rxr->hdr_split == FALSE)
4359                         goto skip_head;
4360
4361                 /* First the header */
4362                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4363                 if (rxbuf->m_head == NULL) {
4364                         error = ENOBUFS;
4365                         goto fail;
4366                 }
4367                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4368                 mh = rxbuf->m_head;
4369                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4370                 mh->m_flags |= M_PKTHDR;
4371                 /* Get the memory mapping */
4372                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4373                     rxbuf->hmap, rxbuf->m_head, hseg,
4374                     &nsegs, BUS_DMA_NOWAIT);
4375                 if (error != 0) /* Nothing elegant to do here */
4376                         goto fail;
4377                 bus_dmamap_sync(rxr->htag,
4378                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4379                 /* Update descriptor */
4380                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4381
4382 skip_head:
4383                 /* Now the payload cluster */
4384                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4385                     M_PKTHDR, adapter->rx_mbuf_sz);
4386                 if (rxbuf->m_pack == NULL) {
4387                         error = ENOBUFS;
4388                         goto fail;
4389                 }
4390                 mp = rxbuf->m_pack;
4391                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4392                 /* Get the memory mapping */
4393                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4394                     rxbuf->pmap, mp, pseg,
4395                     &nsegs, BUS_DMA_NOWAIT);
4396                 if (error != 0)
4397                         goto fail;
4398                 bus_dmamap_sync(rxr->ptag,
4399                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4400                 /* Update descriptor */
4401                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4402         }
4403
4404         /* Setup our descriptor indices */
4405         rxr->next_to_check = 0;
4406         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4407         rxr->lro_enabled = FALSE;
4408         rxr->rx_split_packets = 0;
4409         rxr->rx_bytes = 0;
4410
4411         rxr->fmp = NULL;
4412         rxr->lmp = NULL;
4413
4414         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4415             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4416
4417         /*
4418         ** Now set up the LRO interface, we
4419         ** also only do head split when LRO
4420         ** is enabled, since so often they
4421         ** are undesireable in similar setups.
4422         */
4423         if (ifp->if_capenable & IFCAP_LRO) {
4424                 error = tcp_lro_init(lro);
4425                 if (error) {
4426                         device_printf(dev, "LRO Initialization failed!\n");
4427                         goto fail;
4428                 }
4429                 INIT_DEBUGOUT("RX LRO Initialized\n");
4430                 rxr->lro_enabled = TRUE;
4431                 lro->ifp = adapter->ifp;
4432         }
4433
4434         IGB_RX_UNLOCK(rxr);
4435         return (0);
4436
4437 fail:
4438         igb_free_receive_ring(rxr);
4439         IGB_RX_UNLOCK(rxr);
4440         return (error);
4441 }
4442
4443
4444 /*********************************************************************
4445  *
4446  *  Initialize all receive rings.
4447  *
4448  **********************************************************************/
4449 static int
4450 igb_setup_receive_structures(struct adapter *adapter)
4451 {
4452         struct rx_ring *rxr = adapter->rx_rings;
4453         int i;
4454
4455         for (i = 0; i < adapter->num_queues; i++, rxr++)
4456                 if (igb_setup_receive_ring(rxr))
4457                         goto fail;
4458
4459         return (0);
4460 fail:
4461         /*
4462          * Free RX buffers allocated so far, we will only handle
4463          * the rings that completed, the failing case will have
4464          * cleaned up for itself. 'i' is the endpoint.
4465          */
4466         for (int j = 0; j < i; ++j) {
4467                 rxr = &adapter->rx_rings[j];
4468                 IGB_RX_LOCK(rxr);
4469                 igb_free_receive_ring(rxr);
4470                 IGB_RX_UNLOCK(rxr);
4471         }
4472
4473         return (ENOBUFS);
4474 }
4475
4476 /*
4477  * Initialise the RSS mapping for NICs that support multiple transmit/
4478  * receive rings.
4479  */
4480 static void
4481 igb_initialise_rss_mapping(struct adapter *adapter)
4482 {
4483         struct e1000_hw *hw = &adapter->hw;
4484         int i;
4485         int queue_id;
4486         u32 reta;
4487         u32 rss_key[10], mrqc, shift = 0;
4488
4489         /* XXX? */
4490         if (adapter->hw.mac.type == e1000_82575)
4491                 shift = 6;
4492
4493         /*
4494          * The redirection table controls which destination
4495          * queue each bucket redirects traffic to.
4496          * Each DWORD represents four queues, with the LSB
4497          * being the first queue in the DWORD.
4498          *
4499          * This just allocates buckets to queues using round-robin
4500          * allocation.
4501          *
4502          * NOTE: It Just Happens to line up with the default
4503          * RSS allocation method.
4504          */
4505
4506         /* Warning FM follows */
4507         reta = 0;
4508         for (i = 0; i < 128; i++) {
4509 #ifdef  RSS
4510                 queue_id = rss_get_indirection_to_bucket(i);
4511                 /*
4512                  * If we have more queues than buckets, we'll
4513                  * end up mapping buckets to a subset of the
4514                  * queues.
4515                  *
4516                  * If we have more buckets than queues, we'll
4517                  * end up instead assigning multiple buckets
4518                  * to queues.
4519                  *
4520                  * Both are suboptimal, but we need to handle
4521                  * the case so we don't go out of bounds
4522                  * indexing arrays and such.
4523                  */
4524                 queue_id = queue_id % adapter->num_queues;
4525 #else
4526                 queue_id = (i % adapter->num_queues);
4527 #endif
4528                 /* Adjust if required */
4529                 queue_id = queue_id << shift;
4530
4531                 /*
4532                  * The low 8 bits are for hash value (n+0);
4533                  * The next 8 bits are for hash value (n+1), etc.
4534                  */
4535                 reta = reta >> 8;
4536                 reta = reta | ( ((uint32_t) queue_id) << 24);
4537                 if ((i & 3) == 3) {
4538                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4539                         reta = 0;
4540                 }
4541         }
4542
4543         /* Now fill in hash table */
4544
4545         /*
4546          * MRQC: Multiple Receive Queues Command
4547          * Set queuing to RSS control, number depends on the device.
4548          */
4549         mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4550
4551 #ifdef  RSS
4552         /* XXX ew typecasting */
4553         rss_getkey((uint8_t *) &rss_key);
4554 #else
4555         arc4rand(&rss_key, sizeof(rss_key), 0);
4556 #endif
4557         for (i = 0; i < 10; i++)
4558                 E1000_WRITE_REG_ARRAY(hw,
4559                     E1000_RSSRK(0), i, rss_key[i]);
4560
4561         /*
4562          * Configure the RSS fields to hash upon.
4563          */
4564         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4565             E1000_MRQC_RSS_FIELD_IPV4_TCP);
4566         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4567             E1000_MRQC_RSS_FIELD_IPV6_TCP);
4568         mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4569             E1000_MRQC_RSS_FIELD_IPV6_UDP);
4570         mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4571             E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4572
4573         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4574 }
4575
4576 /*********************************************************************
4577  *
4578  *  Enable receive unit.
4579  *
4580  **********************************************************************/
4581 static void
4582 igb_initialize_receive_units(struct adapter *adapter)
4583 {
4584         struct rx_ring  *rxr = adapter->rx_rings;
4585         struct ifnet    *ifp = adapter->ifp;
4586         struct e1000_hw *hw = &adapter->hw;
4587         u32             rctl, rxcsum, psize, srrctl = 0;
4588
4589         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4590
4591         /*
4592          * Make sure receives are disabled while setting
4593          * up the descriptor ring
4594          */
4595         rctl = E1000_READ_REG(hw, E1000_RCTL);
4596         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4597
4598         /*
4599         ** Set up for header split
4600         */
4601         if (igb_header_split) {
4602                 /* Use a standard mbuf for the header */
4603                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4604                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4605         } else
4606                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4607
4608         /*
4609         ** Set up for jumbo frames
4610         */
4611         if (ifp->if_mtu > ETHERMTU) {
4612                 rctl |= E1000_RCTL_LPE;
4613                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4614                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4615                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4616                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4617                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4618                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4619                 }
4620                 /* Set maximum packet len */
4621                 psize = adapter->max_frame_size;
4622                 /* are we on a vlan? */
4623                 if (adapter->ifp->if_vlantrunk != NULL)
4624                         psize += VLAN_TAG_SIZE;
4625                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4626         } else {
4627                 rctl &= ~E1000_RCTL_LPE;
4628                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4629                 rctl |= E1000_RCTL_SZ_2048;
4630         }
4631
4632         /*
4633          * If TX flow control is disabled and there's >1 queue defined,
4634          * enable DROP.
4635          *
4636          * This drops frames rather than hanging the RX MAC for all queues.
4637          */
4638         if ((adapter->num_queues > 1) &&
4639             (adapter->fc == e1000_fc_none ||
4640              adapter->fc == e1000_fc_rx_pause)) {
4641                 srrctl |= E1000_SRRCTL_DROP_EN;
4642         }
4643
4644         /* Setup the Base and Length of the Rx Descriptor Rings */
4645         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4646                 u64 bus_addr = rxr->rxdma.dma_paddr;
4647                 u32 rxdctl;
4648
4649                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4650                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4651                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4652                     (uint32_t)(bus_addr >> 32));
4653                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4654                     (uint32_t)bus_addr);
4655                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4656                 /* Enable this Queue */
4657                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4658                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4659                 rxdctl &= 0xFFF00000;
4660                 rxdctl |= IGB_RX_PTHRESH;
4661                 rxdctl |= IGB_RX_HTHRESH << 8;
4662                 rxdctl |= IGB_RX_WTHRESH << 16;
4663                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4664         }
4665
4666         /*
4667         ** Setup for RX MultiQueue
4668         */
4669         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4670         if (adapter->num_queues >1) {
4671
4672                 /* rss setup */
4673                 igb_initialise_rss_mapping(adapter);
4674
4675                 /*
4676                 ** NOTE: Receive Full-Packet Checksum Offload 
4677                 ** is mutually exclusive with Multiqueue. However
4678                 ** this is not the same as TCP/IP checksums which
4679                 ** still work.
4680                 */
4681                 rxcsum |= E1000_RXCSUM_PCSD;
4682 #if __FreeBSD_version >= 800000
4683                 /* For SCTP Offload */
4684                 if ((hw->mac.type == e1000_82576)
4685                     && (ifp->if_capenable & IFCAP_RXCSUM))
4686                         rxcsum |= E1000_RXCSUM_CRCOFL;
4687 #endif
4688         } else {
4689                 /* Non RSS setup */
4690                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4691                         rxcsum |= E1000_RXCSUM_IPPCSE;
4692 #if __FreeBSD_version >= 800000
4693                         if (adapter->hw.mac.type == e1000_82576)
4694                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4695 #endif
4696                 } else
4697                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4698         }
4699         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4700
4701         /* Setup the Receive Control Register */
4702         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4703         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4704                    E1000_RCTL_RDMTS_HALF |
4705                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4706         /* Strip CRC bytes. */
4707         rctl |= E1000_RCTL_SECRC;
4708         /* Make sure VLAN Filters are off */
4709         rctl &= ~E1000_RCTL_VFE;
4710         /* Don't store bad packets */
4711         rctl &= ~E1000_RCTL_SBP;
4712
4713         /* Enable Receives */
4714         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4715
4716         /*
4717          * Setup the HW Rx Head and Tail Descriptor Pointers
4718          *   - needs to be after enable
4719          */
4720         for (int i = 0; i < adapter->num_queues; i++) {
4721                 rxr = &adapter->rx_rings[i];
4722                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4723 #ifdef DEV_NETMAP
4724                 /*
4725                  * an init() while a netmap client is active must
4726                  * preserve the rx buffers passed to userspace.
4727                  * In this driver it means we adjust RDT to
4728                  * something different from next_to_refresh
4729                  * (which is not used in netmap mode).
4730                  */
4731                 if (ifp->if_capenable & IFCAP_NETMAP) {
4732                         struct netmap_adapter *na = NA(adapter->ifp);
4733                         struct netmap_kring *kring = &na->rx_rings[i];
4734                         int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4735
4736                         if (t >= adapter->num_rx_desc)
4737                                 t -= adapter->num_rx_desc;
4738                         else if (t < 0)
4739                                 t += adapter->num_rx_desc;
4740                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4741                 } else
4742 #endif /* DEV_NETMAP */
4743                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4744         }
4745         return;
4746 }
4747
4748 /*********************************************************************
4749  *
4750  *  Free receive rings.
4751  *
4752  **********************************************************************/
4753 static void
4754 igb_free_receive_structures(struct adapter *adapter)
4755 {
4756         struct rx_ring *rxr = adapter->rx_rings;
4757
4758         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4759                 struct lro_ctrl *lro = &rxr->lro;
4760                 igb_free_receive_buffers(rxr);
4761                 tcp_lro_free(lro);
4762                 igb_dma_free(adapter, &rxr->rxdma);
4763         }
4764
4765         free(adapter->rx_rings, M_DEVBUF);
4766 }
4767
4768 /*********************************************************************
4769  *
4770  *  Free receive ring data structures.
4771  *
4772  **********************************************************************/
4773 static void
4774 igb_free_receive_buffers(struct rx_ring *rxr)
4775 {
4776         struct adapter          *adapter = rxr->adapter;
4777         struct igb_rx_buf       *rxbuf;
4778         int i;
4779
4780         INIT_DEBUGOUT("free_receive_structures: begin");
4781
4782         /* Cleanup any existing buffers */
4783         if (rxr->rx_buffers != NULL) {
4784                 for (i = 0; i < adapter->num_rx_desc; i++) {
4785                         rxbuf = &rxr->rx_buffers[i];
4786                         if (rxbuf->m_head != NULL) {
4787                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4788                                     BUS_DMASYNC_POSTREAD);
4789                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4790                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4791                                 m_freem(rxbuf->m_head);
4792                         }
4793                         if (rxbuf->m_pack != NULL) {
4794                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4795                                     BUS_DMASYNC_POSTREAD);
4796                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4797                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4798                                 m_freem(rxbuf->m_pack);
4799                         }
4800                         rxbuf->m_head = NULL;
4801                         rxbuf->m_pack = NULL;
4802                         if (rxbuf->hmap != NULL) {
4803                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4804                                 rxbuf->hmap = NULL;
4805                         }
4806                         if (rxbuf->pmap != NULL) {
4807                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4808                                 rxbuf->pmap = NULL;
4809                         }
4810                 }
4811                 if (rxr->rx_buffers != NULL) {
4812                         free(rxr->rx_buffers, M_DEVBUF);
4813                         rxr->rx_buffers = NULL;
4814                 }
4815         }
4816
4817         if (rxr->htag != NULL) {
4818                 bus_dma_tag_destroy(rxr->htag);
4819                 rxr->htag = NULL;
4820         }
4821         if (rxr->ptag != NULL) {
4822                 bus_dma_tag_destroy(rxr->ptag);
4823                 rxr->ptag = NULL;
4824         }
4825 }
4826
4827 static __inline void
4828 igb_rx_discard(struct rx_ring *rxr, int i)
4829 {
4830         struct igb_rx_buf       *rbuf;
4831
4832         rbuf = &rxr->rx_buffers[i];
4833
4834         /* Partially received? Free the chain */
4835         if (rxr->fmp != NULL) {
4836                 rxr->fmp->m_flags |= M_PKTHDR;
4837                 m_freem(rxr->fmp);
4838                 rxr->fmp = NULL;
4839                 rxr->lmp = NULL;
4840         }
4841
4842         /*
4843         ** With advanced descriptors the writeback
4844         ** clobbers the buffer addrs, so its easier
4845         ** to just free the existing mbufs and take
4846         ** the normal refresh path to get new buffers
4847         ** and mapping.
4848         */
4849         if (rbuf->m_head) {
4850                 m_free(rbuf->m_head);
4851                 rbuf->m_head = NULL;
4852                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4853         }
4854
4855         if (rbuf->m_pack) {
4856                 m_free(rbuf->m_pack);
4857                 rbuf->m_pack = NULL;
4858                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4859         }
4860
4861         return;
4862 }
4863
4864 static __inline void
4865 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4866 {
4867
4868         /*
4869          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4870          * should be computed by hardware. Also it should not have VLAN tag in
4871          * ethernet header.
4872          */
4873         if (rxr->lro_enabled &&
4874             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4875             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4876             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4877             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4878             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4879             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4880                 /*
4881                  * Send to the stack if:
4882                  **  - LRO not enabled, or
4883                  **  - no LRO resources, or
4884                  **  - lro enqueue fails
4885                  */
4886                 if (rxr->lro.lro_cnt != 0)
4887                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4888                                 return;
4889         }
4890         IGB_RX_UNLOCK(rxr);
4891         (*ifp->if_input)(ifp, m);
4892         IGB_RX_LOCK(rxr);
4893 }
4894
4895 /*********************************************************************
4896  *
4897  *  This routine executes in interrupt context. It replenishes
4898  *  the mbufs in the descriptor and sends data which has been
4899  *  dma'ed into host memory to upper layer.
4900  *
4901  *  We loop at most count times if count is > 0, or until done if
4902  *  count < 0.
4903  *
4904  *  Return TRUE if more to clean, FALSE otherwise
4905  *********************************************************************/
4906 static bool
4907 igb_rxeof(struct igb_queue *que, int count, int *done)
4908 {
4909         struct adapter          *adapter = que->adapter;
4910         struct rx_ring          *rxr = que->rxr;
4911         struct ifnet            *ifp = adapter->ifp;
4912         struct lro_ctrl         *lro = &rxr->lro;
4913         struct lro_entry        *queued;
4914         int                     i, processed = 0, rxdone = 0;
4915         u32                     ptype, staterr = 0;
4916         union e1000_adv_rx_desc *cur;
4917
4918         IGB_RX_LOCK(rxr);
4919         /* Sync the ring. */
4920         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4921             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4922
4923 #ifdef DEV_NETMAP
4924         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4925                 IGB_RX_UNLOCK(rxr);
4926                 return (FALSE);
4927         }
4928 #endif /* DEV_NETMAP */
4929
4930         /* Main clean loop */
4931         for (i = rxr->next_to_check; count != 0;) {
4932                 struct mbuf             *sendmp, *mh, *mp;
4933                 struct igb_rx_buf       *rxbuf;
4934                 u16                     hlen, plen, hdr, vtag, pkt_info;
4935                 bool                    eop = FALSE;
4936  
4937                 cur = &rxr->rx_base[i];
4938                 staterr = le32toh(cur->wb.upper.status_error);
4939                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4940                         break;
4941                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4942                         break;
4943                 count--;
4944                 sendmp = mh = mp = NULL;
4945                 cur->wb.upper.status_error = 0;
4946                 rxbuf = &rxr->rx_buffers[i];
4947                 plen = le16toh(cur->wb.upper.length);
4948                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4949                 if (((adapter->hw.mac.type == e1000_i350) ||
4950                     (adapter->hw.mac.type == e1000_i354)) &&
4951                     (staterr & E1000_RXDEXT_STATERR_LB))
4952                         vtag = be16toh(cur->wb.upper.vlan);
4953                 else
4954                         vtag = le16toh(cur->wb.upper.vlan);
4955                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4956                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
4957                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4958
4959                 /*
4960                  * Free the frame (all segments) if we're at EOP and
4961                  * it's an error.
4962                  *
4963                  * The datasheet states that EOP + status is only valid for
4964                  * the final segment in a multi-segment frame.
4965                  */
4966                 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4967                         adapter->dropped_pkts++;
4968                         ++rxr->rx_discarded;
4969                         igb_rx_discard(rxr, i);
4970                         goto next_desc;
4971                 }
4972
4973                 /*
4974                 ** The way the hardware is configured to
4975                 ** split, it will ONLY use the header buffer
4976                 ** when header split is enabled, otherwise we
4977                 ** get normal behavior, ie, both header and
4978                 ** payload are DMA'd into the payload buffer.
4979                 **
4980                 ** The fmp test is to catch the case where a
4981                 ** packet spans multiple descriptors, in that
4982                 ** case only the first header is valid.
4983                 */
4984                 if (rxr->hdr_split && rxr->fmp == NULL) {
4985                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4986                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4987                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4988                         if (hlen > IGB_HDR_BUF)
4989                                 hlen = IGB_HDR_BUF;
4990                         mh = rxr->rx_buffers[i].m_head;
4991                         mh->m_len = hlen;
4992                         /* clear buf pointer for refresh */
4993                         rxbuf->m_head = NULL;
4994                         /*
4995                         ** Get the payload length, this
4996                         ** could be zero if its a small
4997                         ** packet.
4998                         */
4999                         if (plen > 0) {
5000                                 mp = rxr->rx_buffers[i].m_pack;
5001                                 mp->m_len = plen;
5002                                 mh->m_next = mp;
5003                                 /* clear buf pointer */
5004                                 rxbuf->m_pack = NULL;
5005                                 rxr->rx_split_packets++;
5006                         }
5007                 } else {
5008                         /*
5009                         ** Either no header split, or a
5010                         ** secondary piece of a fragmented
5011                         ** split packet.
5012                         */
5013                         mh = rxr->rx_buffers[i].m_pack;
5014                         mh->m_len = plen;
5015                         /* clear buf info for refresh */
5016                         rxbuf->m_pack = NULL;
5017                 }
5018                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5019
5020                 ++processed; /* So we know when to refresh */
5021
5022                 /* Initial frame - setup */
5023                 if (rxr->fmp == NULL) {
5024                         mh->m_pkthdr.len = mh->m_len;
5025                         /* Save the head of the chain */
5026                         rxr->fmp = mh;
5027                         rxr->lmp = mh;
5028                         if (mp != NULL) {
5029                                 /* Add payload if split */
5030                                 mh->m_pkthdr.len += mp->m_len;
5031                                 rxr->lmp = mh->m_next;
5032                         }
5033                 } else {
5034                         /* Chain mbuf's together */
5035                         rxr->lmp->m_next = mh;
5036                         rxr->lmp = rxr->lmp->m_next;
5037                         rxr->fmp->m_pkthdr.len += mh->m_len;
5038                 }
5039
5040                 if (eop) {
5041                         rxr->fmp->m_pkthdr.rcvif = ifp;
5042                         rxr->rx_packets++;
5043                         /* capture data for AIM */
5044                         rxr->packets++;
5045                         rxr->bytes += rxr->fmp->m_pkthdr.len;
5046                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5047
5048                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5049                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
5050
5051                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5052                             (staterr & E1000_RXD_STAT_VP) != 0) {
5053                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5054                                 rxr->fmp->m_flags |= M_VLANTAG;
5055                         }
5056
5057                         /*
5058                          * In case of multiqueue, we have RXCSUM.PCSD bit set
5059                          * and never cleared. This means we have RSS hash
5060                          * available to be used.
5061                          */
5062                         if (adapter->num_queues > 1) {
5063                                 rxr->fmp->m_pkthdr.flowid = 
5064                                     le32toh(cur->wb.lower.hi_dword.rss);
5065                                 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5066                                         case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5067                                                 M_HASHTYPE_SET(rxr->fmp,
5068                                                     M_HASHTYPE_RSS_TCP_IPV4);
5069                                         break;
5070                                         case E1000_RXDADV_RSSTYPE_IPV4:
5071                                                 M_HASHTYPE_SET(rxr->fmp,
5072                                                     M_HASHTYPE_RSS_IPV4);
5073                                         break;
5074                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5075                                                 M_HASHTYPE_SET(rxr->fmp,
5076                                                     M_HASHTYPE_RSS_TCP_IPV6);
5077                                         break;
5078                                         case E1000_RXDADV_RSSTYPE_IPV6_EX:
5079                                                 M_HASHTYPE_SET(rxr->fmp,
5080                                                     M_HASHTYPE_RSS_IPV6_EX);
5081                                         break;
5082                                         case E1000_RXDADV_RSSTYPE_IPV6:
5083                                                 M_HASHTYPE_SET(rxr->fmp,
5084                                                     M_HASHTYPE_RSS_IPV6);
5085                                         break;
5086                                         case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5087                                                 M_HASHTYPE_SET(rxr->fmp,
5088                                                     M_HASHTYPE_RSS_TCP_IPV6_EX);
5089                                         break;
5090                                         default:
5091                                                 /* XXX fallthrough */
5092                                                 M_HASHTYPE_SET(rxr->fmp,
5093                                                     M_HASHTYPE_OPAQUE);
5094                                 }
5095                         } else {
5096 #ifndef IGB_LEGACY_TX
5097                                 rxr->fmp->m_pkthdr.flowid = que->msix;
5098                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5099 #endif
5100                         }
5101                         sendmp = rxr->fmp;
5102                         /* Make sure to set M_PKTHDR. */
5103                         sendmp->m_flags |= M_PKTHDR;
5104                         rxr->fmp = NULL;
5105                         rxr->lmp = NULL;
5106                 }
5107
5108 next_desc:
5109                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5110                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5111
5112                 /* Advance our pointers to the next descriptor. */
5113                 if (++i == adapter->num_rx_desc)
5114                         i = 0;
5115                 /*
5116                 ** Send to the stack or LRO
5117                 */
5118                 if (sendmp != NULL) {
5119                         rxr->next_to_check = i;
5120                         igb_rx_input(rxr, ifp, sendmp, ptype);
5121                         i = rxr->next_to_check;
5122                         rxdone++;
5123                 }
5124
5125                 /* Every 8 descriptors we go to refresh mbufs */
5126                 if (processed == 8) {
5127                         igb_refresh_mbufs(rxr, i);
5128                         processed = 0;
5129                 }
5130         }
5131
5132         /* Catch any remainders */
5133         if (igb_rx_unrefreshed(rxr))
5134                 igb_refresh_mbufs(rxr, i);
5135
5136         rxr->next_to_check = i;
5137
5138         /*
5139          * Flush any outstanding LRO work
5140          */
5141         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5142                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5143                 tcp_lro_flush(lro, queued);
5144         }
5145
5146         if (done != NULL)
5147                 *done += rxdone;
5148
5149         IGB_RX_UNLOCK(rxr);
5150         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5151 }
5152
5153 /*********************************************************************
5154  *
5155  *  Verify that the hardware indicated that the checksum is valid.
5156  *  Inform the stack about the status of checksum so that stack
5157  *  doesn't spend time verifying the checksum.
5158  *
5159  *********************************************************************/
5160 static void
5161 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5162 {
5163         u16 status = (u16)staterr;
5164         u8  errors = (u8) (staterr >> 24);
5165         int sctp;
5166
5167         /* Ignore Checksum bit is set */
5168         if (status & E1000_RXD_STAT_IXSM) {
5169                 mp->m_pkthdr.csum_flags = 0;
5170                 return;
5171         }
5172
5173         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5174             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5175                 sctp = 1;
5176         else
5177                 sctp = 0;
5178         if (status & E1000_RXD_STAT_IPCS) {
5179                 /* Did it pass? */
5180                 if (!(errors & E1000_RXD_ERR_IPE)) {
5181                         /* IP Checksum Good */
5182                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5183                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5184                 } else
5185                         mp->m_pkthdr.csum_flags = 0;
5186         }
5187
5188         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5189                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5190 #if __FreeBSD_version >= 800000
5191                 if (sctp) /* reassign */
5192                         type = CSUM_SCTP_VALID;
5193 #endif
5194                 /* Did it pass? */
5195                 if (!(errors & E1000_RXD_ERR_TCPE)) {
5196                         mp->m_pkthdr.csum_flags |= type;
5197                         if (sctp == 0)
5198                                 mp->m_pkthdr.csum_data = htons(0xffff);
5199                 }
5200         }
5201         return;
5202 }
5203
5204 /*
5205  * This routine is run via an vlan
5206  * config EVENT
5207  */
5208 static void
5209 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5210 {
5211         struct adapter  *adapter = ifp->if_softc;
5212         u32             index, bit;
5213
5214         if (ifp->if_softc !=  arg)   /* Not our event */
5215                 return;
5216
5217         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5218                 return;
5219
5220         IGB_CORE_LOCK(adapter);
5221         index = (vtag >> 5) & 0x7F;
5222         bit = vtag & 0x1F;
5223         adapter->shadow_vfta[index] |= (1 << bit);
5224         ++adapter->num_vlans;
5225         /* Change hw filter setting */
5226         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5227                 igb_setup_vlan_hw_support(adapter);
5228         IGB_CORE_UNLOCK(adapter);
5229 }
5230
5231 /*
5232  * This routine is run via an vlan
5233  * unconfig EVENT
5234  */
5235 static void
5236 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5237 {
5238         struct adapter  *adapter = ifp->if_softc;
5239         u32             index, bit;
5240
5241         if (ifp->if_softc !=  arg)
5242                 return;
5243
5244         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5245                 return;
5246
5247         IGB_CORE_LOCK(adapter);
5248         index = (vtag >> 5) & 0x7F;
5249         bit = vtag & 0x1F;
5250         adapter->shadow_vfta[index] &= ~(1 << bit);
5251         --adapter->num_vlans;
5252         /* Change hw filter setting */
5253         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5254                 igb_setup_vlan_hw_support(adapter);
5255         IGB_CORE_UNLOCK(adapter);
5256 }
5257
5258 static void
5259 igb_setup_vlan_hw_support(struct adapter *adapter)
5260 {
5261         struct e1000_hw *hw = &adapter->hw;
5262         struct ifnet    *ifp = adapter->ifp;
5263         u32             reg;
5264
5265         if (adapter->vf_ifp) {
5266                 e1000_rlpml_set_vf(hw,
5267                     adapter->max_frame_size + VLAN_TAG_SIZE);
5268                 return;
5269         }
5270
5271         reg = E1000_READ_REG(hw, E1000_CTRL);
5272         reg |= E1000_CTRL_VME;
5273         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5274
5275         /* Enable the Filter Table */
5276         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5277                 reg = E1000_READ_REG(hw, E1000_RCTL);
5278                 reg &= ~E1000_RCTL_CFIEN;
5279                 reg |= E1000_RCTL_VFE;
5280                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5281         }
5282
5283         /* Update the frame size */
5284         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5285             adapter->max_frame_size + VLAN_TAG_SIZE);
5286
5287         /* Don't bother with table if no vlans */
5288         if ((adapter->num_vlans == 0) ||
5289             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5290                 return;
5291         /*
5292         ** A soft reset zero's out the VFTA, so
5293         ** we need to repopulate it now.
5294         */
5295         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5296                 if (adapter->shadow_vfta[i] != 0) {
5297                         if (adapter->vf_ifp)
5298                                 e1000_vfta_set_vf(hw,
5299                                     adapter->shadow_vfta[i], TRUE);
5300                         else
5301                                 e1000_write_vfta(hw,
5302                                     i, adapter->shadow_vfta[i]);
5303                 }
5304 }
5305
5306 static void
5307 igb_enable_intr(struct adapter *adapter)
5308 {
5309         /* With RSS set up what to auto clear */
5310         if (adapter->msix_mem) {
5311                 u32 mask = (adapter->que_mask | adapter->link_mask);
5312                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5313                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5314                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5315                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5316                     E1000_IMS_LSC);
5317         } else {
5318                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5319                     IMS_ENABLE_MASK);
5320         }
5321         E1000_WRITE_FLUSH(&adapter->hw);
5322
5323         return;
5324 }
5325
5326 static void
5327 igb_disable_intr(struct adapter *adapter)
5328 {
5329         if (adapter->msix_mem) {
5330                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5331                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5332         } 
5333         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5334         E1000_WRITE_FLUSH(&adapter->hw);
5335         return;
5336 }
5337
5338 /*
5339  * Bit of a misnomer, what this really means is
5340  * to enable OS management of the system... aka
5341  * to disable special hardware management features 
5342  */
5343 static void
5344 igb_init_manageability(struct adapter *adapter)
5345 {
5346         if (adapter->has_manage) {
5347                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5348                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5349
5350                 /* disable hardware interception of ARP */
5351                 manc &= ~(E1000_MANC_ARP_EN);
5352
5353                 /* enable receiving management packets to the host */
5354                 manc |= E1000_MANC_EN_MNG2HOST;
5355                 manc2h |= 1 << 5;  /* Mng Port 623 */
5356                 manc2h |= 1 << 6;  /* Mng Port 664 */
5357                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5358                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5359         }
5360 }
5361
5362 /*
5363  * Give control back to hardware management
5364  * controller if there is one.
5365  */
5366 static void
5367 igb_release_manageability(struct adapter *adapter)
5368 {
5369         if (adapter->has_manage) {
5370                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5371
5372                 /* re-enable hardware interception of ARP */
5373                 manc |= E1000_MANC_ARP_EN;
5374                 manc &= ~E1000_MANC_EN_MNG2HOST;
5375
5376                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5377         }
5378 }
5379
5380 /*
5381  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5382  * For ASF and Pass Through versions of f/w this means that
5383  * the driver is loaded. 
5384  *
5385  */
5386 static void
5387 igb_get_hw_control(struct adapter *adapter)
5388 {
5389         u32 ctrl_ext;
5390
5391         if (adapter->vf_ifp)
5392                 return;
5393
5394         /* Let firmware know the driver has taken over */
5395         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5396         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5397             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5398 }
5399
5400 /*
5401  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5402  * For ASF and Pass Through versions of f/w this means that the
5403  * driver is no longer loaded.
5404  *
5405  */
5406 static void
5407 igb_release_hw_control(struct adapter *adapter)
5408 {
5409         u32 ctrl_ext;
5410
5411         if (adapter->vf_ifp)
5412                 return;
5413
5414         /* Let firmware taken over control of h/w */
5415         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5416         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5417             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5418 }
5419
5420 static int
5421 igb_is_valid_ether_addr(uint8_t *addr)
5422 {
5423         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5424
5425         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5426                 return (FALSE);
5427         }
5428
5429         return (TRUE);
5430 }
5431
5432
5433 /*
5434  * Enable PCI Wake On Lan capability
5435  */
5436 static void
5437 igb_enable_wakeup(device_t dev)
5438 {
5439         u16     cap, status;
5440         u8      id;
5441
5442         /* First find the capabilities pointer*/
5443         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5444         /* Read the PM Capabilities */
5445         id = pci_read_config(dev, cap, 1);
5446         if (id != PCIY_PMG)     /* Something wrong */
5447                 return;
5448         /* OK, we have the power capabilities, so
5449            now get the status register */
5450         cap += PCIR_POWER_STATUS;
5451         status = pci_read_config(dev, cap, 2);
5452         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5453         pci_write_config(dev, cap, status, 2);
5454         return;
5455 }
5456
5457 static void
5458 igb_led_func(void *arg, int onoff)
5459 {
5460         struct adapter  *adapter = arg;
5461
5462         IGB_CORE_LOCK(adapter);
5463         if (onoff) {
5464                 e1000_setup_led(&adapter->hw);
5465                 e1000_led_on(&adapter->hw);
5466         } else {
5467                 e1000_led_off(&adapter->hw);
5468                 e1000_cleanup_led(&adapter->hw);
5469         }
5470         IGB_CORE_UNLOCK(adapter);
5471 }
5472
5473 static uint64_t
5474 igb_get_vf_counter(if_t ifp, ift_counter cnt)
5475 {
5476         struct adapter *adapter;
5477         struct e1000_vf_stats *stats;
5478 #ifndef IGB_LEGACY_TX
5479         struct tx_ring *txr;
5480         uint64_t rv;
5481 #endif
5482
5483         adapter = if_getsoftc(ifp);
5484         stats = (struct e1000_vf_stats *)adapter->stats;
5485
5486         switch (cnt) {
5487         case IFCOUNTER_IPACKETS:
5488                 return (stats->gprc);
5489         case IFCOUNTER_OPACKETS:
5490                 return (stats->gptc);
5491         case IFCOUNTER_IBYTES:
5492                 return (stats->gorc);
5493         case IFCOUNTER_OBYTES:
5494                 return (stats->gotc);
5495         case IFCOUNTER_IMCASTS:
5496                 return (stats->mprc);
5497         case IFCOUNTER_IERRORS:
5498                 return (adapter->dropped_pkts);
5499         case IFCOUNTER_OERRORS:
5500                 return (adapter->watchdog_events);
5501 #ifndef IGB_LEGACY_TX
5502         case IFCOUNTER_OQDROPS:
5503                 rv = 0;
5504                 txr = adapter->tx_rings;
5505                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5506                         rv += txr->br->br_drops;
5507                 return (rv);
5508 #endif
5509         default:
5510                 return (if_get_counter_default(ifp, cnt));
5511         }
5512 }
5513
5514 static uint64_t
5515 igb_get_counter(if_t ifp, ift_counter cnt)
5516 {
5517         struct adapter *adapter;
5518         struct e1000_hw_stats *stats;
5519 #ifndef IGB_LEGACY_TX
5520         struct tx_ring *txr;
5521         uint64_t rv;
5522 #endif
5523
5524         adapter = if_getsoftc(ifp);
5525         if (adapter->vf_ifp)
5526                 return (igb_get_vf_counter(ifp, cnt));
5527
5528         stats = (struct e1000_hw_stats *)adapter->stats;
5529
5530         switch (cnt) {
5531         case IFCOUNTER_IPACKETS:
5532                 return (stats->gprc);
5533         case IFCOUNTER_OPACKETS:
5534                 return (stats->gptc);
5535         case IFCOUNTER_IBYTES:
5536                 return (stats->gorc);
5537         case IFCOUNTER_OBYTES:
5538                 return (stats->gotc);
5539         case IFCOUNTER_IMCASTS:
5540                 return (stats->mprc);
5541         case IFCOUNTER_OMCASTS:
5542                 return (stats->mptc);
5543         case IFCOUNTER_IERRORS:
5544                 return (adapter->dropped_pkts + stats->rxerrc +
5545                     stats->crcerrs + stats->algnerrc +
5546                     stats->ruc + stats->roc + stats->cexterr);
5547         case IFCOUNTER_OERRORS:
5548                 return (stats->ecol + stats->latecol +
5549                     adapter->watchdog_events);
5550         case IFCOUNTER_COLLISIONS:
5551                 return (stats->colc);
5552         case IFCOUNTER_IQDROPS:
5553                 return (stats->mpc);
5554 #ifndef IGB_LEGACY_TX
5555         case IFCOUNTER_OQDROPS:
5556                 rv = 0;
5557                 txr = adapter->tx_rings;
5558                 for (int i = 0; i < adapter->num_queues; i++, txr++)
5559                         rv += txr->br->br_drops;
5560                 return (rv);
5561 #endif
5562         default:
5563                 return (if_get_counter_default(ifp, cnt));
5564         }
5565 }
5566
5567 /**********************************************************************
5568  *
5569  *  Update the board statistics counters.
5570  *
5571  **********************************************************************/
5572 static void
5573 igb_update_stats_counters(struct adapter *adapter)
5574 {
5575         struct e1000_hw         *hw = &adapter->hw;
5576         struct e1000_hw_stats   *stats;
5577
5578         /* 
5579         ** The virtual function adapter has only a
5580         ** small controlled set of stats, do only 
5581         ** those and return.
5582         */
5583         if (adapter->vf_ifp) {
5584                 igb_update_vf_stats_counters(adapter);
5585                 return;
5586         }
5587
5588         stats = (struct e1000_hw_stats  *)adapter->stats;
5589
5590         if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5591            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5592                 stats->symerrs +=
5593                     E1000_READ_REG(hw,E1000_SYMERRS);
5594                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5595         }
5596
5597         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5598         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5599         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5600         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5601
5602         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5603         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5604         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5605         stats->dc += E1000_READ_REG(hw, E1000_DC);
5606         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5607         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5608         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5609         /*
5610         ** For watchdog management we need to know if we have been
5611         ** paused during the last interval, so capture that here.
5612         */ 
5613         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5614         stats->xoffrxc += adapter->pause_frames;
5615         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5616         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5617         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5618         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5619         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5620         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5621         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5622         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5623         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5624         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5625         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5626         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5627
5628         /* For the 64-bit byte counters the low dword must be read first. */
5629         /* Both registers clear on the read of the high dword */
5630
5631         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5632             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5633         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5634             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5635
5636         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5637         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5638         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5639         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5640         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5641
5642         stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5643         stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5644         stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5645
5646         stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5647             ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5648         stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5649             ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5650
5651         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5652         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5653         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5654         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5655         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5656         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5657         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5658         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5659         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5660         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5661
5662         /* Interrupt Counts */
5663
5664         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5665         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5666         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5667         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5668         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5669         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5670         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5671         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5672         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5673
5674         /* Host to Card Statistics */
5675
5676         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5677         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5678         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5679         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5680         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5681         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5682         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5683         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5684             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5685         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5686             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5687         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5688         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5689         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5690
5691         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5692         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5693         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5694         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5695         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5696         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5697
5698         /* Driver specific counters */
5699         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5700         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5701         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5702         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5703         adapter->packet_buf_alloc_tx =
5704             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5705         adapter->packet_buf_alloc_rx =
5706             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5707 }
5708
5709
5710 /**********************************************************************
5711  *
5712  *  Initialize the VF board statistics counters.
5713  *
5714  **********************************************************************/
5715 static void
5716 igb_vf_init_stats(struct adapter *adapter)
5717 {
5718         struct e1000_hw *hw = &adapter->hw;
5719         struct e1000_vf_stats   *stats;
5720
5721         stats = (struct e1000_vf_stats  *)adapter->stats;
5722         if (stats == NULL)
5723                 return;
5724         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5725         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5726         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5727         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5728         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5729 }
5730  
5731 /**********************************************************************
5732  *
5733  *  Update the VF board statistics counters.
5734  *
5735  **********************************************************************/
5736 static void
5737 igb_update_vf_stats_counters(struct adapter *adapter)
5738 {
5739         struct e1000_hw *hw = &adapter->hw;
5740         struct e1000_vf_stats   *stats;
5741
5742         if (adapter->link_speed == 0)
5743                 return;
5744
5745         stats = (struct e1000_vf_stats  *)adapter->stats;
5746
5747         UPDATE_VF_REG(E1000_VFGPRC,
5748             stats->last_gprc, stats->gprc);
5749         UPDATE_VF_REG(E1000_VFGORC,
5750             stats->last_gorc, stats->gorc);
5751         UPDATE_VF_REG(E1000_VFGPTC,
5752             stats->last_gptc, stats->gptc);
5753         UPDATE_VF_REG(E1000_VFGOTC,
5754             stats->last_gotc, stats->gotc);
5755         UPDATE_VF_REG(E1000_VFMPRC,
5756             stats->last_mprc, stats->mprc);
5757 }
5758
5759 /* Export a single 32-bit register via a read-only sysctl. */
5760 static int
5761 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5762 {
5763         struct adapter *adapter;
5764         u_int val;
5765
5766         adapter = oidp->oid_arg1;
5767         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5768         return (sysctl_handle_int(oidp, &val, 0, req));
5769 }
5770
5771 /*
5772 **  Tuneable interrupt rate handler
5773 */
5774 static int
5775 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5776 {
5777         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5778         int                     error;
5779         u32                     reg, usec, rate;
5780                         
5781         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5782         usec = ((reg & 0x7FFC) >> 2);
5783         if (usec > 0)
5784                 rate = 1000000 / usec;
5785         else
5786                 rate = 0;
5787         error = sysctl_handle_int(oidp, &rate, 0, req);
5788         if (error || !req->newptr)
5789                 return error;
5790         return 0;
5791 }
5792
5793 /*
5794  * Add sysctl variables, one per statistic, to the system.
5795  */
5796 static void
5797 igb_add_hw_stats(struct adapter *adapter)
5798 {
5799         device_t dev = adapter->dev;
5800
5801         struct tx_ring *txr = adapter->tx_rings;
5802         struct rx_ring *rxr = adapter->rx_rings;
5803
5804         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5805         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5806         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5807         struct e1000_hw_stats *stats = adapter->stats;
5808
5809         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5810         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5811
5812 #define QUEUE_NAME_LEN 32
5813         char namebuf[QUEUE_NAME_LEN];
5814
5815         /* Driver Statistics */
5816         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 
5817                         CTLFLAG_RD, &adapter->link_irq,
5818                         "Link MSIX IRQ Handled");
5819         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5820                         CTLFLAG_RD, &adapter->dropped_pkts,
5821                         "Driver dropped packets");
5822         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5823                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5824                         "Driver tx dma failure in xmit");
5825         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5826                         CTLFLAG_RD, &adapter->rx_overruns,
5827                         "RX overruns");
5828         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5829                         CTLFLAG_RD, &adapter->watchdog_events,
5830                         "Watchdog timeouts");
5831
5832         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5833                         CTLFLAG_RD, &adapter->device_control,
5834                         "Device Control Register");
5835         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5836                         CTLFLAG_RD, &adapter->rx_control,
5837                         "Receiver Control Register");
5838         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5839                         CTLFLAG_RD, &adapter->int_mask,
5840                         "Interrupt Mask");
5841         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5842                         CTLFLAG_RD, &adapter->eint_mask,
5843                         "Extended Interrupt Mask");
5844         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5845                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5846                         "Transmit Buffer Packet Allocation");
5847         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5848                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5849                         "Receive Buffer Packet Allocation");
5850         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5851                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5852                         "Flow Control High Watermark");
5853         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5854                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5855                         "Flow Control Low Watermark");
5856
5857         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5858                 struct lro_ctrl *lro = &rxr->lro;
5859
5860                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5861                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5862                                             CTLFLAG_RD, NULL, "Queue Name");
5863                 queue_list = SYSCTL_CHILDREN(queue_node);
5864
5865                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5866                                 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5867                                 sizeof(&adapter->queues[i]),
5868                                 igb_sysctl_interrupt_rate_handler,
5869                                 "IU", "Interrupt Rate");
5870
5871                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5872                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5873                                 igb_sysctl_reg_handler, "IU",
5874                                 "Transmit Descriptor Head");
5875                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5876                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5877                                 igb_sysctl_reg_handler, "IU",
5878                                 "Transmit Descriptor Tail");
5879                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5880                                 CTLFLAG_RD, &txr->no_desc_avail,
5881                                 "Queue Descriptors Unavailable");
5882                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5883                                 CTLFLAG_RD, &txr->total_packets,
5884                                 "Queue Packets Transmitted");
5885
5886                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5887                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5888                                 igb_sysctl_reg_handler, "IU",
5889                                 "Receive Descriptor Head");
5890                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5891                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5892                                 igb_sysctl_reg_handler, "IU",
5893                                 "Receive Descriptor Tail");
5894                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5895                                 CTLFLAG_RD, &rxr->rx_packets,
5896                                 "Queue Packets Received");
5897                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5898                                 CTLFLAG_RD, &rxr->rx_bytes,
5899                                 "Queue Bytes Received");
5900                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5901                                 CTLFLAG_RD, &lro->lro_queued, 0,
5902                                 "LRO Queued");
5903                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5904                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5905                                 "LRO Flushed");
5906         }
5907
5908         /* MAC stats get their own sub node */
5909
5910         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5911                                     CTLFLAG_RD, NULL, "MAC Statistics");
5912         stat_list = SYSCTL_CHILDREN(stat_node);
5913
5914         /*
5915         ** VF adapter has a very limited set of stats
5916         ** since its not managing the metal, so to speak.
5917         */
5918         if (adapter->vf_ifp) {
5919         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5920                         CTLFLAG_RD, &stats->gprc,
5921                         "Good Packets Received");
5922         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5923                         CTLFLAG_RD, &stats->gptc,
5924                         "Good Packets Transmitted");
5925         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5926                         CTLFLAG_RD, &stats->gorc, 
5927                         "Good Octets Received"); 
5928         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5929                         CTLFLAG_RD, &stats->gotc, 
5930                         "Good Octets Transmitted"); 
5931         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5932                         CTLFLAG_RD, &stats->mprc,
5933                         "Multicast Packets Received");
5934                 return;
5935         }
5936
5937         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5938                         CTLFLAG_RD, &stats->ecol,
5939                         "Excessive collisions");
5940         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5941                         CTLFLAG_RD, &stats->scc,
5942                         "Single collisions");
5943         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5944                         CTLFLAG_RD, &stats->mcc,
5945                         "Multiple collisions");
5946         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5947                         CTLFLAG_RD, &stats->latecol,
5948                         "Late collisions");
5949         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5950                         CTLFLAG_RD, &stats->colc,
5951                         "Collision Count");
5952         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5953                         CTLFLAG_RD, &stats->symerrs,
5954                         "Symbol Errors");
5955         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5956                         CTLFLAG_RD, &stats->sec,
5957                         "Sequence Errors");
5958         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5959                         CTLFLAG_RD, &stats->dc,
5960                         "Defer Count");
5961         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5962                         CTLFLAG_RD, &stats->mpc,
5963                         "Missed Packets");
5964         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5965                         CTLFLAG_RD, &stats->rlec,
5966                         "Receive Length Errors");
5967         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5968                         CTLFLAG_RD, &stats->rnbc,
5969                         "Receive No Buffers");
5970         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5971                         CTLFLAG_RD, &stats->ruc,
5972                         "Receive Undersize");
5973         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5974                         CTLFLAG_RD, &stats->rfc,
5975                         "Fragmented Packets Received");
5976         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5977                         CTLFLAG_RD, &stats->roc,
5978                         "Oversized Packets Received");
5979         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5980                         CTLFLAG_RD, &stats->rjc,
5981                         "Recevied Jabber");
5982         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5983                         CTLFLAG_RD, &stats->rxerrc,
5984                         "Receive Errors");
5985         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5986                         CTLFLAG_RD, &stats->crcerrs,
5987                         "CRC errors");
5988         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5989                         CTLFLAG_RD, &stats->algnerrc,
5990                         "Alignment Errors");
5991         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
5992                         CTLFLAG_RD, &stats->tncrs,
5993                         "Transmit with No CRS");
5994         /* On 82575 these are collision counts */
5995         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5996                         CTLFLAG_RD, &stats->cexterr,
5997                         "Collision/Carrier extension errors");
5998         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5999                         CTLFLAG_RD, &stats->xonrxc,
6000                         "XON Received");
6001         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6002                         CTLFLAG_RD, &stats->xontxc,
6003                         "XON Transmitted");
6004         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6005                         CTLFLAG_RD, &stats->xoffrxc,
6006                         "XOFF Received");
6007         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6008                         CTLFLAG_RD, &stats->xofftxc,
6009                         "XOFF Transmitted");
6010         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6011                         CTLFLAG_RD, &stats->fcruc,
6012                         "Unsupported Flow Control Received");
6013         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6014                         CTLFLAG_RD, &stats->mgprc,
6015                         "Management Packets Received");
6016         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6017                         CTLFLAG_RD, &stats->mgpdc,
6018                         "Management Packets Dropped");
6019         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6020                         CTLFLAG_RD, &stats->mgptc,
6021                         "Management Packets Transmitted");
6022         /* Packet Reception Stats */
6023         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6024                         CTLFLAG_RD, &stats->tpr,
6025                         "Total Packets Received");
6026         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6027                         CTLFLAG_RD, &stats->gprc,
6028                         "Good Packets Received");
6029         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6030                         CTLFLAG_RD, &stats->bprc,
6031                         "Broadcast Packets Received");
6032         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6033                         CTLFLAG_RD, &stats->mprc,
6034                         "Multicast Packets Received");
6035         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6036                         CTLFLAG_RD, &stats->prc64,
6037                         "64 byte frames received");
6038         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6039                         CTLFLAG_RD, &stats->prc127,
6040                         "65-127 byte frames received");
6041         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6042                         CTLFLAG_RD, &stats->prc255,
6043                         "128-255 byte frames received");
6044         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6045                         CTLFLAG_RD, &stats->prc511,
6046                         "256-511 byte frames received");
6047         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6048                         CTLFLAG_RD, &stats->prc1023,
6049                         "512-1023 byte frames received");
6050         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6051                         CTLFLAG_RD, &stats->prc1522,
6052                         "1023-1522 byte frames received");
6053         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
6054                         CTLFLAG_RD, &stats->gorc, 
6055                         "Good Octets Received");
6056         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 
6057                         CTLFLAG_RD, &stats->tor, 
6058                         "Total Octets Received");
6059
6060         /* Packet Transmission Stats */
6061         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
6062                         CTLFLAG_RD, &stats->gotc, 
6063                         "Good Octets Transmitted"); 
6064         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 
6065                         CTLFLAG_RD, &stats->tot, 
6066                         "Total Octets Transmitted");
6067         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6068                         CTLFLAG_RD, &stats->tpt,
6069                         "Total Packets Transmitted");
6070         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6071                         CTLFLAG_RD, &stats->gptc,
6072                         "Good Packets Transmitted");
6073         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6074                         CTLFLAG_RD, &stats->bptc,
6075                         "Broadcast Packets Transmitted");
6076         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6077                         CTLFLAG_RD, &stats->mptc,
6078                         "Multicast Packets Transmitted");
6079         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6080                         CTLFLAG_RD, &stats->ptc64,
6081                         "64 byte frames transmitted");
6082         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6083                         CTLFLAG_RD, &stats->ptc127,
6084                         "65-127 byte frames transmitted");
6085         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6086                         CTLFLAG_RD, &stats->ptc255,
6087                         "128-255 byte frames transmitted");
6088         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6089                         CTLFLAG_RD, &stats->ptc511,
6090                         "256-511 byte frames transmitted");
6091         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6092                         CTLFLAG_RD, &stats->ptc1023,
6093                         "512-1023 byte frames transmitted");
6094         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6095                         CTLFLAG_RD, &stats->ptc1522,
6096                         "1024-1522 byte frames transmitted");
6097         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6098                         CTLFLAG_RD, &stats->tsctc,
6099                         "TSO Contexts Transmitted");
6100         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6101                         CTLFLAG_RD, &stats->tsctfc,
6102                         "TSO Contexts Failed");
6103
6104
6105         /* Interrupt Stats */
6106
6107         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
6108                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
6109         int_list = SYSCTL_CHILDREN(int_node);
6110
6111         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6112                         CTLFLAG_RD, &stats->iac,
6113                         "Interrupt Assertion Count");
6114
6115         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6116                         CTLFLAG_RD, &stats->icrxptc,
6117                         "Interrupt Cause Rx Pkt Timer Expire Count");
6118
6119         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6120                         CTLFLAG_RD, &stats->icrxatc,
6121                         "Interrupt Cause Rx Abs Timer Expire Count");
6122
6123         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6124                         CTLFLAG_RD, &stats->ictxptc,
6125                         "Interrupt Cause Tx Pkt Timer Expire Count");
6126
6127         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6128                         CTLFLAG_RD, &stats->ictxatc,
6129                         "Interrupt Cause Tx Abs Timer Expire Count");
6130
6131         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6132                         CTLFLAG_RD, &stats->ictxqec,
6133                         "Interrupt Cause Tx Queue Empty Count");
6134
6135         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6136                         CTLFLAG_RD, &stats->ictxqmtc,
6137                         "Interrupt Cause Tx Queue Min Thresh Count");
6138
6139         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6140                         CTLFLAG_RD, &stats->icrxdmtc,
6141                         "Interrupt Cause Rx Desc Min Thresh Count");
6142
6143         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6144                         CTLFLAG_RD, &stats->icrxoc,
6145                         "Interrupt Cause Receiver Overrun Count");
6146
6147         /* Host to Card Stats */
6148
6149         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
6150                                     CTLFLAG_RD, NULL, 
6151                                     "Host to Card Statistics");
6152
6153         host_list = SYSCTL_CHILDREN(host_node);
6154
6155         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6156                         CTLFLAG_RD, &stats->cbtmpc,
6157                         "Circuit Breaker Tx Packet Count");
6158
6159         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6160                         CTLFLAG_RD, &stats->htdpmc,
6161                         "Host Transmit Discarded Packets");
6162
6163         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6164                         CTLFLAG_RD, &stats->rpthc,
6165                         "Rx Packets To Host");
6166
6167         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6168                         CTLFLAG_RD, &stats->cbrmpc,
6169                         "Circuit Breaker Rx Packet Count");
6170
6171         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6172                         CTLFLAG_RD, &stats->cbrdpc,
6173                         "Circuit Breaker Rx Dropped Count");
6174
6175         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6176                         CTLFLAG_RD, &stats->hgptc,
6177                         "Host Good Packets Tx Count");
6178
6179         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6180                         CTLFLAG_RD, &stats->htcbdpc,
6181                         "Host Tx Circuit Breaker Dropped Count");
6182
6183         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6184                         CTLFLAG_RD, &stats->hgorc,
6185                         "Host Good Octets Received Count");
6186
6187         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6188                         CTLFLAG_RD, &stats->hgotc,
6189                         "Host Good Octets Transmit Count");
6190
6191         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6192                         CTLFLAG_RD, &stats->lenerrs,
6193                         "Length Errors");
6194
6195         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6196                         CTLFLAG_RD, &stats->scvpc,
6197                         "SerDes/SGMII Code Violation Pkt Count");
6198
6199         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6200                         CTLFLAG_RD, &stats->hrmpc,
6201                         "Header Redirection Missed Packet Count");
6202 }
6203
6204
6205 /**********************************************************************
6206  *
6207  *  This routine provides a way to dump out the adapter eeprom,
6208  *  often a useful debug/service tool. This only dumps the first
6209  *  32 words, stuff that matters is in that extent.
6210  *
6211  **********************************************************************/
6212 static int
6213 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6214 {
6215         struct adapter *adapter;
6216         int error;
6217         int result;
6218
6219         result = -1;
6220         error = sysctl_handle_int(oidp, &result, 0, req);
6221
6222         if (error || !req->newptr)
6223                 return (error);
6224
6225         /*
6226          * This value will cause a hex dump of the
6227          * first 32 16-bit words of the EEPROM to
6228          * the screen.
6229          */
6230         if (result == 1) {
6231                 adapter = (struct adapter *)arg1;
6232                 igb_print_nvm_info(adapter);
6233         }
6234
6235         return (error);
6236 }
6237
6238 static void
6239 igb_print_nvm_info(struct adapter *adapter)
6240 {
6241         u16     eeprom_data;
6242         int     i, j, row = 0;
6243
6244         /* Its a bit crude, but it gets the job done */
6245         printf("\nInterface EEPROM Dump:\n");
6246         printf("Offset\n0x0000  ");
6247         for (i = 0, j = 0; i < 32; i++, j++) {
6248                 if (j == 8) { /* Make the offset block */
6249                         j = 0; ++row;
6250                         printf("\n0x00%x0  ",row);
6251                 }
6252                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6253                 printf("%04x ", eeprom_data);
6254         }
6255         printf("\n");
6256 }
6257
6258 static void
6259 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6260         const char *description, int *limit, int value)
6261 {
6262         *limit = value;
6263         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6264             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6265             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6266 }
6267
6268 /*
6269 ** Set flow control using sysctl:
6270 ** Flow control values:
6271 **      0 - off
6272 **      1 - rx pause
6273 **      2 - tx pause
6274 **      3 - full
6275 */
6276 static int
6277 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6278 {
6279         int             error;
6280         static int      input = 3; /* default is full */
6281         struct adapter  *adapter = (struct adapter *) arg1;
6282
6283         error = sysctl_handle_int(oidp, &input, 0, req);
6284
6285         if ((error) || (req->newptr == NULL))
6286                 return (error);
6287
6288         switch (input) {
6289                 case e1000_fc_rx_pause:
6290                 case e1000_fc_tx_pause:
6291                 case e1000_fc_full:
6292                 case e1000_fc_none:
6293                         adapter->hw.fc.requested_mode = input;
6294                         adapter->fc = input;
6295                         break;
6296                 default:
6297                         /* Do nothing */
6298                         return (error);
6299         }
6300
6301         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6302         e1000_force_mac_fc(&adapter->hw);
6303         /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6304         return (error);
6305 }
6306
6307 /*
6308 ** Manage DMA Coalesce:
6309 ** Control values:
6310 **      0/1 - off/on
6311 **      Legal timer values are:
6312 **      250,500,1000-10000 in thousands
6313 */
6314 static int
6315 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6316 {
6317         struct adapter *adapter = (struct adapter *) arg1;
6318         int             error;
6319
6320         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6321
6322         if ((error) || (req->newptr == NULL))
6323                 return (error);
6324
6325         switch (adapter->dmac) {
6326                 case 0:
6327                         /* Disabling */
6328                         break;
6329                 case 1: /* Just enable and use default */
6330                         adapter->dmac = 1000;
6331                         break;
6332                 case 250:
6333                 case 500:
6334                 case 1000:
6335                 case 2000:
6336                 case 3000:
6337                 case 4000:
6338                 case 5000:
6339                 case 6000:
6340                 case 7000:
6341                 case 8000:
6342                 case 9000:
6343                 case 10000:
6344                         /* Legal values - allow */
6345                         break;
6346                 default:
6347                         /* Do nothing, illegal value */
6348                         adapter->dmac = 0;
6349                         return (EINVAL);
6350         }
6351         /* Reinit the interface */
6352         igb_init(adapter);
6353         return (error);
6354 }
6355
6356 /*
6357 ** Manage Energy Efficient Ethernet:
6358 ** Control values:
6359 **     0/1 - enabled/disabled
6360 */
6361 static int
6362 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6363 {
6364         struct adapter  *adapter = (struct adapter *) arg1;
6365         int             error, value;
6366
6367         value = adapter->hw.dev_spec._82575.eee_disable;
6368         error = sysctl_handle_int(oidp, &value, 0, req);
6369         if (error || req->newptr == NULL)
6370                 return (error);
6371         IGB_CORE_LOCK(adapter);
6372         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6373         igb_init_locked(adapter);
6374         IGB_CORE_UNLOCK(adapter);
6375         return (0);
6376 }