]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/e1000/if_igb.c
MFC - r302384 to 10-STABLE
[FreeBSD/stable/10.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38
39 #ifdef HAVE_KERNEL_OPTION_HEADERS
40 #include "opt_device_polling.h"
41 #include "opt_altq.h"
42 #endif
43
44 #include "if_igb.h"
45
46 /*********************************************************************
47  *  Driver version:
48  *********************************************************************/
49 char igb_driver_version[] = "2.5.3-k";
50
51
52 /*********************************************************************
53  *  PCI Device ID Table
54  *
55  *  Used by probe to select devices to load on
56  *  Last field stores an index into e1000_strings
57  *  Last entry must be all 0s
58  *
59  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
60  *********************************************************************/
61
62 static igb_vendor_info_t igb_vendor_info_array[] =
63 {
64         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
65         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
66         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
67         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
68         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
69         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
70         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
71         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
72         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
73         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
74         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
75         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
76         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
77         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
78         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
79         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
80         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
81         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
82         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
83         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
84         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
85         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
86         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
87         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,  0, 0, 0},
88         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
89         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,  0, 0, 0},
90         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
91         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
92         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
93         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
94         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
95         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
96         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,  0, 0, 0},
97         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
98         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,  0, 0, 0},
99         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
100         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
101         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
102         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,  0, 0, 0},
103         /* required last entry */
104         {0, 0, 0, 0, 0}
105 };
106
107 /*********************************************************************
108  *  Table of branding strings for all supported NICs.
109  *********************************************************************/
110
111 static char *igb_strings[] = {
112         "Intel(R) PRO/1000 Network Connection"
113 };
114
115 /*********************************************************************
116  *  Function prototypes
117  *********************************************************************/
118 static int      igb_probe(device_t);
119 static int      igb_attach(device_t);
120 static int      igb_detach(device_t);
121 static int      igb_shutdown(device_t);
122 static int      igb_suspend(device_t);
123 static int      igb_resume(device_t);
124 #ifndef IGB_LEGACY_TX
125 static int      igb_mq_start(struct ifnet *, struct mbuf *);
126 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
127 static void     igb_qflush(struct ifnet *);
128 static void     igb_deferred_mq_start(void *, int);
129 #else
130 static void     igb_start(struct ifnet *);
131 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
132 #endif
133 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
134 static void     igb_init(void *);
135 static void     igb_init_locked(struct adapter *);
136 static void     igb_stop(void *);
137 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
138 static int      igb_media_change(struct ifnet *);
139 static void     igb_identify_hardware(struct adapter *);
140 static int      igb_allocate_pci_resources(struct adapter *);
141 static int      igb_allocate_msix(struct adapter *);
142 static int      igb_allocate_legacy(struct adapter *);
143 static int      igb_setup_msix(struct adapter *);
144 static void     igb_free_pci_resources(struct adapter *);
145 static void     igb_local_timer(void *);
146 static void     igb_reset(struct adapter *);
147 static int      igb_setup_interface(device_t, struct adapter *);
148 static int      igb_allocate_queues(struct adapter *);
149 static void     igb_configure_queues(struct adapter *);
150
151 static int      igb_allocate_transmit_buffers(struct tx_ring *);
152 static void     igb_setup_transmit_structures(struct adapter *);
153 static void     igb_setup_transmit_ring(struct tx_ring *);
154 static void     igb_initialize_transmit_units(struct adapter *);
155 static void     igb_free_transmit_structures(struct adapter *);
156 static void     igb_free_transmit_buffers(struct tx_ring *);
157
158 static int      igb_allocate_receive_buffers(struct rx_ring *);
159 static int      igb_setup_receive_structures(struct adapter *);
160 static int      igb_setup_receive_ring(struct rx_ring *);
161 static void     igb_initialize_receive_units(struct adapter *);
162 static void     igb_free_receive_structures(struct adapter *);
163 static void     igb_free_receive_buffers(struct rx_ring *);
164 static void     igb_free_receive_ring(struct rx_ring *);
165
166 static void     igb_enable_intr(struct adapter *);
167 static void     igb_disable_intr(struct adapter *);
168 static void     igb_update_stats_counters(struct adapter *);
169 static bool     igb_txeof(struct tx_ring *);
170
171 static __inline void igb_rx_discard(struct rx_ring *, int);
172 static __inline void igb_rx_input(struct rx_ring *,
173                     struct ifnet *, struct mbuf *, u32);
174
175 static bool     igb_rxeof(struct igb_queue *, int, int *);
176 static void     igb_rx_checksum(u32, struct mbuf *, u32);
177 static int      igb_tx_ctx_setup(struct tx_ring *,
178                     struct mbuf *, u32 *, u32 *);
179 static int      igb_tso_setup(struct tx_ring *,
180                     struct mbuf *, u32 *, u32 *);
181 static void     igb_set_promisc(struct adapter *);
182 static void     igb_disable_promisc(struct adapter *);
183 static void     igb_set_multi(struct adapter *);
184 static void     igb_update_link_status(struct adapter *);
185 static void     igb_refresh_mbufs(struct rx_ring *, int);
186
187 static void     igb_register_vlan(void *, struct ifnet *, u16);
188 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
189 static void     igb_setup_vlan_hw_support(struct adapter *);
190
191 static int      igb_xmit(struct tx_ring *, struct mbuf **);
192 static int      igb_dma_malloc(struct adapter *, bus_size_t,
193                     struct igb_dma_alloc *, int);
194 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
195 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
196 static void     igb_print_nvm_info(struct adapter *);
197 static int      igb_is_valid_ether_addr(u8 *);
198 static void     igb_add_hw_stats(struct adapter *);
199
200 static void     igb_vf_init_stats(struct adapter *);
201 static void     igb_update_vf_stats_counters(struct adapter *);
202
203 /* Management and WOL Support */
204 static void     igb_init_manageability(struct adapter *);
205 static void     igb_release_manageability(struct adapter *);
206 static void     igb_get_hw_control(struct adapter *);
207 static void     igb_release_hw_control(struct adapter *);
208 static void     igb_enable_wakeup(device_t);
209 static void     igb_led_func(void *, int);
210
211 static int      igb_irq_fast(void *);
212 static void     igb_msix_que(void *);
213 static void     igb_msix_link(void *);
214 static void     igb_handle_que(void *context, int pending);
215 static void     igb_handle_link(void *context, int pending);
216 static void     igb_handle_link_locked(struct adapter *);
217
218 static void     igb_set_sysctl_value(struct adapter *, const char *,
219                     const char *, int *, int);
220 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
221 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
222 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
223
224 #ifdef DEVICE_POLLING
225 static poll_handler_t igb_poll;
226 #endif /* POLLING */
227
228 /*********************************************************************
229  *  FreeBSD Device Interface Entry Points
230  *********************************************************************/
231
232 static device_method_t igb_methods[] = {
233         /* Device interface */
234         DEVMETHOD(device_probe, igb_probe),
235         DEVMETHOD(device_attach, igb_attach),
236         DEVMETHOD(device_detach, igb_detach),
237         DEVMETHOD(device_shutdown, igb_shutdown),
238         DEVMETHOD(device_suspend, igb_suspend),
239         DEVMETHOD(device_resume, igb_resume),
240         DEVMETHOD_END
241 };
242
243 static driver_t igb_driver = {
244         "igb", igb_methods, sizeof(struct adapter),
245 };
246
247 static devclass_t igb_devclass;
248 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
249 MODULE_DEPEND(igb, pci, 1, 1, 1);
250 MODULE_DEPEND(igb, ether, 1, 1, 1);
251
252 /*********************************************************************
253  *  Tunable default values.
254  *********************************************************************/
255
256 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
257
258 /* Descriptor defaults */
259 static int igb_rxd = IGB_DEFAULT_RXD;
260 static int igb_txd = IGB_DEFAULT_TXD;
261 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
262 TUNABLE_INT("hw.igb.txd", &igb_txd);
263 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
264     "Number of receive descriptors per queue");
265 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
266     "Number of transmit descriptors per queue");
267
268 /*
269 ** AIM: Adaptive Interrupt Moderation
270 ** which means that the interrupt rate
271 ** is varied over time based on the
272 ** traffic for that interrupt vector
273 */
274 static int igb_enable_aim = TRUE;
275 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
276 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
277     "Enable adaptive interrupt moderation");
278
279 /*
280  * MSIX should be the default for best performance,
281  * but this allows it to be forced off for testing.
282  */         
283 static int igb_enable_msix = 1;
284 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
285 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
286     "Enable MSI-X interrupts");
287
288 /*
289 ** Tuneable Interrupt rate
290 */
291 static int igb_max_interrupt_rate = 8000;
292 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
293 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296 #ifndef IGB_LEGACY_TX
297 /*
298 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
299 */
300 static int igb_buf_ring_size = IGB_BR_SIZE;
301 TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
302 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
303     &igb_buf_ring_size, 0, "Size of the bufring");
304 #endif
305
306 /*
307 ** Header split causes the packet header to
308 ** be dma'd to a seperate mbuf from the payload.
309 ** this can have memory alignment benefits. But
310 ** another plus is that small packets often fit
311 ** into the header and thus use no cluster. Its
312 ** a very workload dependent type feature.
313 */
314 static int igb_header_split = FALSE;
315 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
316 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
317     "Enable receive mbuf header split");
318
319 /*
320 ** This will autoconfigure based on the
321 ** number of CPUs and max supported
322 ** MSIX messages if left at 0.
323 */
324 static int igb_num_queues = 0;
325 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
326 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
327     "Number of queues to configure, 0 indicates autoconfigure");
328
329 /*
330 ** Global variable to store last used CPU when binding queues
331 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
332 ** queue is bound to a cpu.
333 */
334 static int igb_last_bind_cpu = -1;
335
336 /* How many packets rxeof tries to clean at a time */
337 static int igb_rx_process_limit = 100;
338 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
339 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
340     &igb_rx_process_limit, 0,
341     "Maximum number of received packets to process at a time, -1 means unlimited");
342
343 /* How many packets txeof tries to clean at a time */
344 static int igb_tx_process_limit = -1;
345 TUNABLE_INT("hw.igb.tx_process_limit", &igb_tx_process_limit);
346 SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
347     &igb_tx_process_limit, 0,
348     "Maximum number of sent packets to process at a time, -1 means unlimited");
349
350 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
351 #include <dev/netmap/if_igb_netmap.h>
352 #endif /* DEV_NETMAP */
353 /*********************************************************************
354  *  Device identification routine
355  *
356  *  igb_probe determines if the driver should be loaded on
357  *  adapter based on PCI vendor/device id of the adapter.
358  *
359  *  return BUS_PROBE_DEFAULT on success, positive on failure
360  *********************************************************************/
361
362 static int
363 igb_probe(device_t dev)
364 {
365         char            adapter_name[256];
366         uint16_t        pci_vendor_id = 0;
367         uint16_t        pci_device_id = 0;
368         uint16_t        pci_subvendor_id = 0;
369         uint16_t        pci_subdevice_id = 0;
370         igb_vendor_info_t *ent;
371
372         INIT_DEBUGOUT("igb_probe: begin");
373
374         pci_vendor_id = pci_get_vendor(dev);
375         if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
376                 return (ENXIO);
377
378         pci_device_id = pci_get_device(dev);
379         pci_subvendor_id = pci_get_subvendor(dev);
380         pci_subdevice_id = pci_get_subdevice(dev);
381
382         ent = igb_vendor_info_array;
383         while (ent->vendor_id != 0) {
384                 if ((pci_vendor_id == ent->vendor_id) &&
385                     (pci_device_id == ent->device_id) &&
386
387                     ((pci_subvendor_id == ent->subvendor_id) ||
388                     (ent->subvendor_id == 0)) &&
389
390                     ((pci_subdevice_id == ent->subdevice_id) ||
391                     (ent->subdevice_id == 0))) {
392                         sprintf(adapter_name, "%s, Version - %s",
393                                 igb_strings[ent->index],
394                                 igb_driver_version);
395                         device_set_desc_copy(dev, adapter_name);
396                         return (BUS_PROBE_DEFAULT);
397                 }
398                 ent++;
399         }
400         return (ENXIO);
401 }
402
403 /*********************************************************************
404  *  Device initialization routine
405  *
406  *  The attach entry point is called when the driver is being loaded.
407  *  This routine identifies the type of hardware, allocates all resources
408  *  and initializes the hardware.
409  *
410  *  return 0 on success, positive on failure
411  *********************************************************************/
412
413 static int
414 igb_attach(device_t dev)
415 {
416         struct adapter  *adapter;
417         int             error = 0;
418         u16             eeprom_data;
419
420         INIT_DEBUGOUT("igb_attach: begin");
421
422         if (resource_disabled("igb", device_get_unit(dev))) {
423                 device_printf(dev, "Disabled by device hint\n");
424                 return (ENXIO);
425         }
426
427         adapter = device_get_softc(dev);
428         adapter->dev = adapter->osdep.dev = dev;
429         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
430
431         /* SYSCTLs */
432         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
433             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
434             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
435             igb_sysctl_nvm_info, "I", "NVM Information");
436
437         igb_set_sysctl_value(adapter, "enable_aim",
438             "Interrupt Moderation", &adapter->enable_aim,
439             igb_enable_aim);
440
441         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
442             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
443             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
444             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
445
446         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
447
448         /* Determine hardware and mac info */
449         igb_identify_hardware(adapter);
450
451         /* Setup PCI resources */
452         if (igb_allocate_pci_resources(adapter)) {
453                 device_printf(dev, "Allocation of PCI resources failed\n");
454                 error = ENXIO;
455                 goto err_pci;
456         }
457
458         /* Do Shared Code initialization */
459         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
460                 device_printf(dev, "Setup of Shared code failed\n");
461                 error = ENXIO;
462                 goto err_pci;
463         }
464
465         e1000_get_bus_info(&adapter->hw);
466
467         /* Sysctls for limiting the amount of work done in the taskqueues */
468         igb_set_sysctl_value(adapter, "rx_processing_limit",
469             "max number of rx packets to process",
470             &adapter->rx_process_limit, igb_rx_process_limit);
471
472         igb_set_sysctl_value(adapter, "tx_processing_limit",
473             "max number of tx packets to process",
474             &adapter->tx_process_limit, igb_tx_process_limit);
475
476         /*
477          * Validate number of transmit and receive descriptors. It
478          * must not exceed hardware maximum, and must be multiple
479          * of E1000_DBA_ALIGN.
480          */
481         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
482             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
483                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
484                     IGB_DEFAULT_TXD, igb_txd);
485                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
486         } else
487                 adapter->num_tx_desc = igb_txd;
488         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
489             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
490                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
491                     IGB_DEFAULT_RXD, igb_rxd);
492                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
493         } else
494                 adapter->num_rx_desc = igb_rxd;
495
496         adapter->hw.mac.autoneg = DO_AUTO_NEG;
497         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
498         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
499
500         /* Copper options */
501         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
502                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
503                 adapter->hw.phy.disable_polarity_correction = FALSE;
504                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
505         }
506
507         /*
508          * Set the frame limits assuming
509          * standard ethernet sized frames.
510          */
511         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
512
513         /*
514         ** Allocate and Setup Queues
515         */
516         if (igb_allocate_queues(adapter)) {
517                 error = ENOMEM;
518                 goto err_pci;
519         }
520
521         /* Allocate the appropriate stats memory */
522         if (adapter->vf_ifp) {
523                 adapter->stats =
524                     (struct e1000_vf_stats *)malloc(sizeof \
525                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526                 igb_vf_init_stats(adapter);
527         } else
528                 adapter->stats =
529                     (struct e1000_hw_stats *)malloc(sizeof \
530                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
531         if (adapter->stats == NULL) {
532                 device_printf(dev, "Can not allocate stats memory\n");
533                 error = ENOMEM;
534                 goto err_late;
535         }
536
537         /* Allocate multicast array memory. */
538         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
539             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
540         if (adapter->mta == NULL) {
541                 device_printf(dev, "Can not allocate multicast setup array\n");
542                 error = ENOMEM;
543                 goto err_late;
544         }
545
546         /* Some adapter-specific advanced features */
547         if (adapter->hw.mac.type >= e1000_i350) {
548                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
549                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
550                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
551                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
552                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
553                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
554                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
555                     adapter, 0, igb_sysctl_eee, "I",
556                     "Disable Energy Efficient Ethernet");
557                 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558                         if (adapter->hw.mac.type == e1000_i354)
559                                 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
560                         else
561                                 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
562                 }
563         }
564
565         /*
566         ** Start from a known state, this is
567         ** important in reading the nvm and
568         ** mac from that.
569         */
570         e1000_reset_hw(&adapter->hw);
571
572         /* Make sure we have a good EEPROM before we read from it */
573         if (((adapter->hw.mac.type != e1000_i210) &&
574             (adapter->hw.mac.type != e1000_i211)) &&
575             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
576                 /*
577                 ** Some PCI-E parts fail the first check due to
578                 ** the link being in sleep state, call it again,
579                 ** if it fails a second time its a real issue.
580                 */
581                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
582                         device_printf(dev,
583                             "The EEPROM Checksum Is Not Valid\n");
584                         error = EIO;
585                         goto err_late;
586                 }
587         }
588
589         /*
590         ** Copy the permanent MAC address out of the EEPROM
591         */
592         if (e1000_read_mac_addr(&adapter->hw) < 0) {
593                 device_printf(dev, "EEPROM read error while reading MAC"
594                     " address\n");
595                 error = EIO;
596                 goto err_late;
597         }
598         /* Check its sanity */
599         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
600                 device_printf(dev, "Invalid MAC address\n");
601                 error = EIO;
602                 goto err_late;
603         }
604
605         /* Setup OS specific network interface */
606         if (igb_setup_interface(dev, adapter) != 0)
607                 goto err_late;
608
609         /* Now get a good starting state */
610         igb_reset(adapter);
611
612         /* Initialize statistics */
613         igb_update_stats_counters(adapter);
614
615         adapter->hw.mac.get_link_status = 1;
616         igb_update_link_status(adapter);
617
618         /* Indicate SOL/IDER usage */
619         if (e1000_check_reset_block(&adapter->hw))
620                 device_printf(dev,
621                     "PHY reset is blocked due to SOL/IDER session.\n");
622
623         /* Determine if we have to control management hardware */
624         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
625
626         /*
627          * Setup Wake-on-Lan
628          */
629         /* APME bit in EEPROM is mapped to WUC.APME */
630         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
631         if (eeprom_data)
632                 adapter->wol = E1000_WUFC_MAG;
633
634         /* Register for VLAN events */
635         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
636              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
637         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
638              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
639
640         igb_add_hw_stats(adapter);
641
642         /* Tell the stack that the interface is not active */
643         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
644         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
645
646         adapter->led_dev = led_create(igb_led_func, adapter,
647             device_get_nameunit(dev));
648
649         /* 
650         ** Configure Interrupts
651         */
652         if ((adapter->msix > 1) && (igb_enable_msix))
653                 error = igb_allocate_msix(adapter);
654         else /* MSI or Legacy */
655                 error = igb_allocate_legacy(adapter);
656         if (error)
657                 goto err_late;
658
659 #ifdef DEV_NETMAP
660         igb_netmap_attach(adapter);
661 #endif /* DEV_NETMAP */
662         INIT_DEBUGOUT("igb_attach: end");
663
664         return (0);
665
666 err_late:
667         igb_detach(dev);
668         igb_free_transmit_structures(adapter);
669         igb_free_receive_structures(adapter);
670         igb_release_hw_control(adapter);
671 err_pci:
672         igb_free_pci_resources(adapter);
673         if (adapter->ifp != NULL)
674                 if_free(adapter->ifp);
675         free(adapter->mta, M_DEVBUF);
676         IGB_CORE_LOCK_DESTROY(adapter);
677
678         return (error);
679 }
680
681 /*********************************************************************
682  *  Device removal routine
683  *
684  *  The detach entry point is called when the driver is being removed.
685  *  This routine stops the adapter and deallocates all the resources
686  *  that were allocated for driver operation.
687  *
688  *  return 0 on success, positive on failure
689  *********************************************************************/
690
691 static int
692 igb_detach(device_t dev)
693 {
694         struct adapter  *adapter = device_get_softc(dev);
695         struct ifnet    *ifp = adapter->ifp;
696
697         INIT_DEBUGOUT("igb_detach: begin");
698
699         /* Make sure VLANS are not using driver */
700         if (adapter->ifp->if_vlantrunk != NULL) {
701                 device_printf(dev,"Vlan in use, detach first\n");
702                 return (EBUSY);
703         }
704
705         ether_ifdetach(adapter->ifp);
706
707         if (adapter->led_dev != NULL)
708                 led_destroy(adapter->led_dev);
709
710 #ifdef DEVICE_POLLING
711         if (ifp->if_capenable & IFCAP_POLLING)
712                 ether_poll_deregister(ifp);
713 #endif
714
715         IGB_CORE_LOCK(adapter);
716         adapter->in_detach = 1;
717         igb_stop(adapter);
718         IGB_CORE_UNLOCK(adapter);
719
720         e1000_phy_hw_reset(&adapter->hw);
721
722         /* Give control back to firmware */
723         igb_release_manageability(adapter);
724         igb_release_hw_control(adapter);
725
726         if (adapter->wol) {
727                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
728                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
729                 igb_enable_wakeup(dev);
730         }
731
732         /* Unregister VLAN events */
733         if (adapter->vlan_attach != NULL)
734                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
735         if (adapter->vlan_detach != NULL)
736                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
737
738         callout_drain(&adapter->timer);
739
740 #ifdef DEV_NETMAP
741         netmap_detach(adapter->ifp);
742 #endif /* DEV_NETMAP */
743         igb_free_pci_resources(adapter);
744         bus_generic_detach(dev);
745         if_free(ifp);
746
747         igb_free_transmit_structures(adapter);
748         igb_free_receive_structures(adapter);
749         if (adapter->mta != NULL)
750                 free(adapter->mta, M_DEVBUF);
751
752         IGB_CORE_LOCK_DESTROY(adapter);
753
754         return (0);
755 }
756
757 /*********************************************************************
758  *
759  *  Shutdown entry point
760  *
761  **********************************************************************/
762
763 static int
764 igb_shutdown(device_t dev)
765 {
766         return igb_suspend(dev);
767 }
768
769 /*
770  * Suspend/resume device methods.
771  */
772 static int
773 igb_suspend(device_t dev)
774 {
775         struct adapter *adapter = device_get_softc(dev);
776
777         IGB_CORE_LOCK(adapter);
778
779         igb_stop(adapter);
780
781         igb_release_manageability(adapter);
782         igb_release_hw_control(adapter);
783
784         if (adapter->wol) {
785                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
786                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
787                 igb_enable_wakeup(dev);
788         }
789
790         IGB_CORE_UNLOCK(adapter);
791
792         return bus_generic_suspend(dev);
793 }
794
795 static int
796 igb_resume(device_t dev)
797 {
798         struct adapter *adapter = device_get_softc(dev);
799         struct tx_ring  *txr = adapter->tx_rings;
800         struct ifnet *ifp = adapter->ifp;
801
802         IGB_CORE_LOCK(adapter);
803         igb_init_locked(adapter);
804         igb_init_manageability(adapter);
805
806         if ((ifp->if_flags & IFF_UP) &&
807             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
808                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
809                         IGB_TX_LOCK(txr);
810 #ifndef IGB_LEGACY_TX
811                         /* Process the stack queue only if not depleted */
812                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
813                             !drbr_empty(ifp, txr->br))
814                                 igb_mq_start_locked(ifp, txr);
815 #else
816                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
817                                 igb_start_locked(txr, ifp);
818 #endif
819                         IGB_TX_UNLOCK(txr);
820                 }
821         }
822         IGB_CORE_UNLOCK(adapter);
823
824         return bus_generic_resume(dev);
825 }
826
827
828 #ifdef IGB_LEGACY_TX
829
830 /*********************************************************************
831  *  Transmit entry point
832  *
833  *  igb_start is called by the stack to initiate a transmit.
834  *  The driver will remain in this routine as long as there are
835  *  packets to transmit and transmit resources are available.
836  *  In case resources are not available stack is notified and
837  *  the packet is requeued.
838  **********************************************************************/
839
840 static void
841 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
842 {
843         struct adapter  *adapter = ifp->if_softc;
844         struct mbuf     *m_head;
845
846         IGB_TX_LOCK_ASSERT(txr);
847
848         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
849             IFF_DRV_RUNNING)
850                 return;
851         if (!adapter->link_active)
852                 return;
853
854         /* Call cleanup if number of TX descriptors low */
855         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
856                 igb_txeof(txr);
857
858         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
859                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
860                         txr->queue_status |= IGB_QUEUE_DEPLETED;
861                         break;
862                 }
863                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
864                 if (m_head == NULL)
865                         break;
866                 /*
867                  *  Encapsulation can modify our pointer, and or make it
868                  *  NULL on failure.  In that event, we can't requeue.
869                  */
870                 if (igb_xmit(txr, &m_head)) {
871                         if (m_head != NULL)
872                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
873                         if (txr->tx_avail <= IGB_MAX_SCATTER)
874                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
875                         break;
876                 }
877
878                 /* Send a copy of the frame to the BPF listener */
879                 ETHER_BPF_MTAP(ifp, m_head);
880
881                 /* Set watchdog on */
882                 txr->watchdog_time = ticks;
883                 txr->queue_status |= IGB_QUEUE_WORKING;
884         }
885 }
886  
887 /*
888  * Legacy TX driver routine, called from the
889  * stack, always uses tx[0], and spins for it.
890  * Should not be used with multiqueue tx
891  */
892 static void
893 igb_start(struct ifnet *ifp)
894 {
895         struct adapter  *adapter = ifp->if_softc;
896         struct tx_ring  *txr = adapter->tx_rings;
897
898         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
899                 IGB_TX_LOCK(txr);
900                 igb_start_locked(txr, ifp);
901                 IGB_TX_UNLOCK(txr);
902         }
903         return;
904 }
905
906 #else /* ~IGB_LEGACY_TX */
907
908 /*
909 ** Multiqueue Transmit Entry:
910 **  quick turnaround to the stack
911 **
912 */
913 static int
914 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
915 {
916         struct adapter          *adapter = ifp->if_softc;
917         struct igb_queue        *que;
918         struct tx_ring          *txr;
919         int                     i, err = 0;
920
921         /* Which queue to use */
922         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
923                 i = m->m_pkthdr.flowid % adapter->num_queues;
924         else
925                 i = curcpu % adapter->num_queues;
926         txr = &adapter->tx_rings[i];
927         que = &adapter->queues[i];
928
929         err = drbr_enqueue(ifp, txr->br, m);
930         if (err)
931                 return (err);
932         if (IGB_TX_TRYLOCK(txr)) {
933                 igb_mq_start_locked(ifp, txr);
934                 IGB_TX_UNLOCK(txr);
935         } else
936                 taskqueue_enqueue(que->tq, &txr->txq_task);
937
938         return (0);
939 }
940
941 static int
942 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
943 {
944         struct adapter  *adapter = txr->adapter;
945         struct mbuf     *next;
946         int             err = 0, enq = 0;
947
948         IGB_TX_LOCK_ASSERT(txr);
949
950         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
951             adapter->link_active == 0)
952                 return (ENETDOWN);
953
954         /* Process the queue */
955         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
956                 if ((err = igb_xmit(txr, &next)) != 0) {
957                         if (next == NULL) {
958                                 /* It was freed, move forward */
959                                 drbr_advance(ifp, txr->br);
960                         } else {
961                                 /* 
962                                  * Still have one left, it may not be
963                                  * the same since the transmit function
964                                  * may have changed it.
965                                  */
966                                 drbr_putback(ifp, txr->br, next);
967                         }
968                         break;
969                 }
970                 drbr_advance(ifp, txr->br);
971                 enq++;
972                 ifp->if_obytes += next->m_pkthdr.len;
973                 if (next->m_flags & M_MCAST)
974                         ifp->if_omcasts++;
975                 ETHER_BPF_MTAP(ifp, next);
976                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
977                         break;
978         }
979         if (enq > 0) {
980                 /* Set the watchdog */
981                 txr->queue_status |= IGB_QUEUE_WORKING;
982                 txr->watchdog_time = ticks;
983         }
984         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
985                 igb_txeof(txr);
986         if (txr->tx_avail <= IGB_MAX_SCATTER)
987                 txr->queue_status |= IGB_QUEUE_DEPLETED;
988         return (err);
989 }
990
991 /*
992  * Called from a taskqueue to drain queued transmit packets.
993  */
994 static void
995 igb_deferred_mq_start(void *arg, int pending)
996 {
997         struct tx_ring *txr = arg;
998         struct adapter *adapter = txr->adapter;
999         struct ifnet *ifp = adapter->ifp;
1000
1001         IGB_TX_LOCK(txr);
1002         if (!drbr_empty(ifp, txr->br))
1003                 igb_mq_start_locked(ifp, txr);
1004         IGB_TX_UNLOCK(txr);
1005 }
1006
1007 /*
1008 ** Flush all ring buffers
1009 */
1010 static void
1011 igb_qflush(struct ifnet *ifp)
1012 {
1013         struct adapter  *adapter = ifp->if_softc;
1014         struct tx_ring  *txr = adapter->tx_rings;
1015         struct mbuf     *m;
1016
1017         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1018                 IGB_TX_LOCK(txr);
1019                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1020                         m_freem(m);
1021                 IGB_TX_UNLOCK(txr);
1022         }
1023         if_qflush(ifp);
1024 }
1025 #endif /* ~IGB_LEGACY_TX */
1026
1027 /*********************************************************************
1028  *  Ioctl entry point
1029  *
1030  *  igb_ioctl is called when the user wants to configure the
1031  *  interface.
1032  *
1033  *  return 0 on success, positive on failure
1034  **********************************************************************/
1035
1036 static int
1037 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1038 {
1039         struct adapter  *adapter = ifp->if_softc;
1040         struct ifreq    *ifr = (struct ifreq *)data;
1041 #if defined(INET) || defined(INET6)
1042         struct ifaddr   *ifa = (struct ifaddr *)data;
1043 #endif
1044         bool            avoid_reset = FALSE;
1045         int             error = 0;
1046
1047         if (adapter->in_detach)
1048                 return (error);
1049
1050         switch (command) {
1051         case SIOCSIFADDR:
1052 #ifdef INET
1053                 if (ifa->ifa_addr->sa_family == AF_INET)
1054                         avoid_reset = TRUE;
1055 #endif
1056 #ifdef INET6
1057                 if (ifa->ifa_addr->sa_family == AF_INET6)
1058                         avoid_reset = TRUE;
1059 #endif
1060                 /*
1061                 ** Calling init results in link renegotiation,
1062                 ** so we avoid doing it when possible.
1063                 */
1064                 if (avoid_reset) {
1065                         ifp->if_flags |= IFF_UP;
1066                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1067                                 igb_init(adapter);
1068 #ifdef INET
1069                         if (!(ifp->if_flags & IFF_NOARP))
1070                                 arp_ifinit(ifp, ifa);
1071 #endif
1072                 } else
1073                         error = ether_ioctl(ifp, command, data);
1074                 break;
1075         case SIOCSIFMTU:
1076             {
1077                 int max_frame_size;
1078
1079                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1080
1081                 IGB_CORE_LOCK(adapter);
1082                 max_frame_size = 9234;
1083                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1084                     ETHER_CRC_LEN) {
1085                         IGB_CORE_UNLOCK(adapter);
1086                         error = EINVAL;
1087                         break;
1088                 }
1089
1090                 ifp->if_mtu = ifr->ifr_mtu;
1091                 adapter->max_frame_size =
1092                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1093                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1094                         igb_init_locked(adapter);
1095                 IGB_CORE_UNLOCK(adapter);
1096                 break;
1097             }
1098         case SIOCSIFFLAGS:
1099                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1100                     SIOCSIFFLAGS (Set Interface Flags)");
1101                 IGB_CORE_LOCK(adapter);
1102                 if (ifp->if_flags & IFF_UP) {
1103                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1104                                 if ((ifp->if_flags ^ adapter->if_flags) &
1105                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1106                                         igb_disable_promisc(adapter);
1107                                         igb_set_promisc(adapter);
1108                                 }
1109                         } else
1110                                 igb_init_locked(adapter);
1111                 } else
1112                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1113                                 igb_stop(adapter);
1114                 adapter->if_flags = ifp->if_flags;
1115                 IGB_CORE_UNLOCK(adapter);
1116                 break;
1117         case SIOCADDMULTI:
1118         case SIOCDELMULTI:
1119                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1120                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1121                         IGB_CORE_LOCK(adapter);
1122                         igb_disable_intr(adapter);
1123                         igb_set_multi(adapter);
1124 #ifdef DEVICE_POLLING
1125                         if (!(ifp->if_capenable & IFCAP_POLLING))
1126 #endif
1127                                 igb_enable_intr(adapter);
1128                         IGB_CORE_UNLOCK(adapter);
1129                 }
1130                 break;
1131         case SIOCSIFMEDIA:
1132                 /* Check SOL/IDER usage */
1133                 IGB_CORE_LOCK(adapter);
1134                 if (e1000_check_reset_block(&adapter->hw)) {
1135                         IGB_CORE_UNLOCK(adapter);
1136                         device_printf(adapter->dev, "Media change is"
1137                             " blocked due to SOL/IDER session.\n");
1138                         break;
1139                 }
1140                 IGB_CORE_UNLOCK(adapter);
1141         case SIOCGIFMEDIA:
1142                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1143                     SIOCxIFMEDIA (Get/Set Interface Media)");
1144                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1145                 break;
1146         case SIOCSIFCAP:
1147             {
1148                 int mask, reinit;
1149
1150                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1151                 reinit = 0;
1152                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1153 #ifdef DEVICE_POLLING
1154                 if (mask & IFCAP_POLLING) {
1155                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1156                                 error = ether_poll_register(igb_poll, ifp);
1157                                 if (error)
1158                                         return (error);
1159                                 IGB_CORE_LOCK(adapter);
1160                                 igb_disable_intr(adapter);
1161                                 ifp->if_capenable |= IFCAP_POLLING;
1162                                 IGB_CORE_UNLOCK(adapter);
1163                         } else {
1164                                 error = ether_poll_deregister(ifp);
1165                                 /* Enable interrupt even in error case */
1166                                 IGB_CORE_LOCK(adapter);
1167                                 igb_enable_intr(adapter);
1168                                 ifp->if_capenable &= ~IFCAP_POLLING;
1169                                 IGB_CORE_UNLOCK(adapter);
1170                         }
1171                 }
1172 #endif
1173 #if __FreeBSD_version >= 1000000
1174                 /* HW cannot turn these on/off separately */
1175                 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1176                         ifp->if_capenable ^= IFCAP_RXCSUM;
1177                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1178                         reinit = 1;
1179                 }
1180                 if (mask & IFCAP_TXCSUM) {
1181                         ifp->if_capenable ^= IFCAP_TXCSUM;
1182                         reinit = 1;
1183                 }
1184                 if (mask & IFCAP_TXCSUM_IPV6) {
1185                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1186                         reinit = 1;
1187                 }
1188 #else
1189                 if (mask & IFCAP_HWCSUM) {
1190                         ifp->if_capenable ^= IFCAP_HWCSUM;
1191                         reinit = 1;
1192                 }
1193 #endif
1194                 if (mask & IFCAP_TSO4) {
1195                         ifp->if_capenable ^= IFCAP_TSO4;
1196                         reinit = 1;
1197                 }
1198                 if (mask & IFCAP_TSO6) {
1199                         ifp->if_capenable ^= IFCAP_TSO6;
1200                         reinit = 1;
1201                 }
1202                 if (mask & IFCAP_VLAN_HWTAGGING) {
1203                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1204                         reinit = 1;
1205                 }
1206                 if (mask & IFCAP_VLAN_HWFILTER) {
1207                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1208                         reinit = 1;
1209                 }
1210                 if (mask & IFCAP_VLAN_HWTSO) {
1211                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1212                         reinit = 1;
1213                 }
1214                 if (mask & IFCAP_LRO) {
1215                         ifp->if_capenable ^= IFCAP_LRO;
1216                         reinit = 1;
1217                 }
1218                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1219                         igb_init(adapter);
1220                 VLAN_CAPABILITIES(ifp);
1221                 break;
1222             }
1223
1224         default:
1225                 error = ether_ioctl(ifp, command, data);
1226                 break;
1227         }
1228
1229         return (error);
1230 }
1231
1232
1233 /*********************************************************************
1234  *  Init entry point
1235  *
1236  *  This routine is used in two ways. It is used by the stack as
1237  *  init entry point in network interface structure. It is also used
1238  *  by the driver as a hw/sw initialization routine to get to a
1239  *  consistent state.
1240  *
1241  *  return 0 on success, positive on failure
1242  **********************************************************************/
1243
1244 static void
1245 igb_init_locked(struct adapter *adapter)
1246 {
1247         struct ifnet    *ifp = adapter->ifp;
1248         device_t        dev = adapter->dev;
1249
1250         INIT_DEBUGOUT("igb_init: begin");
1251
1252         IGB_CORE_LOCK_ASSERT(adapter);
1253
1254         igb_disable_intr(adapter);
1255         callout_stop(&adapter->timer);
1256
1257         /* Get the latest mac address, User can use a LAA */
1258         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1259               ETHER_ADDR_LEN);
1260
1261         /* Put the address into the Receive Address Array */
1262         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1263
1264         igb_reset(adapter);
1265         igb_update_link_status(adapter);
1266
1267         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1268
1269         /* Set hardware offload abilities */
1270         ifp->if_hwassist = 0;
1271         if (ifp->if_capenable & IFCAP_TXCSUM) {
1272 #if __FreeBSD_version >= 1000000
1273                 ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
1274                 if (adapter->hw.mac.type != e1000_82575)
1275                         ifp->if_hwassist |= CSUM_IP_SCTP;
1276 #else
1277                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1278 #if __FreeBSD_version >= 800000
1279                 if (adapter->hw.mac.type != e1000_82575)
1280                         ifp->if_hwassist |= CSUM_SCTP;
1281 #endif
1282 #endif
1283         }
1284
1285 #if __FreeBSD_version >= 1000000
1286         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
1287                 ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
1288                 if (adapter->hw.mac.type != e1000_82575)
1289                         ifp->if_hwassist |= CSUM_IP6_SCTP;
1290         }
1291 #endif
1292         if (ifp->if_capenable & IFCAP_TSO)
1293                 ifp->if_hwassist |= CSUM_TSO;
1294
1295         /* Clear bad data from Rx FIFOs */
1296         e1000_rx_fifo_flush_82575(&adapter->hw);
1297
1298         /* Configure for OS presence */
1299         igb_init_manageability(adapter);
1300
1301         /* Prepare transmit descriptors and buffers */
1302         igb_setup_transmit_structures(adapter);
1303         igb_initialize_transmit_units(adapter);
1304
1305         /* Setup Multicast table */
1306         igb_set_multi(adapter);
1307
1308         /*
1309         ** Figure out the desired mbuf pool
1310         ** for doing jumbo/packetsplit
1311         */
1312         if (adapter->max_frame_size <= 2048)
1313                 adapter->rx_mbuf_sz = MCLBYTES;
1314         else if (adapter->max_frame_size <= 4096)
1315                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1316         else
1317                 adapter->rx_mbuf_sz = MJUM9BYTES;
1318
1319         /* Prepare receive descriptors and buffers */
1320         if (igb_setup_receive_structures(adapter)) {
1321                 device_printf(dev, "Could not setup receive structures\n");
1322                 return;
1323         }
1324         igb_initialize_receive_units(adapter);
1325
1326         /* Enable VLAN support */
1327         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1328                 igb_setup_vlan_hw_support(adapter);
1329                                 
1330         /* Don't lose promiscuous settings */
1331         igb_set_promisc(adapter);
1332
1333         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1334         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1335
1336         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1337         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1338
1339         if (adapter->msix > 1) /* Set up queue routing */
1340                 igb_configure_queues(adapter);
1341
1342         /* this clears any pending interrupts */
1343         E1000_READ_REG(&adapter->hw, E1000_ICR);
1344 #ifdef DEVICE_POLLING
1345         /*
1346          * Only enable interrupts if we are not polling, make sure
1347          * they are off otherwise.
1348          */
1349         if (ifp->if_capenable & IFCAP_POLLING)
1350                 igb_disable_intr(adapter);
1351         else
1352 #endif /* DEVICE_POLLING */
1353         {
1354                 igb_enable_intr(adapter);
1355                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1356         }
1357
1358         /* Set Energy Efficient Ethernet */
1359         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1360                 if (adapter->hw.mac.type == e1000_i354)
1361                         e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1362                 else
1363                         e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1364         }
1365 }
1366
1367 static void
1368 igb_init(void *arg)
1369 {
1370         struct adapter *adapter = arg;
1371
1372         IGB_CORE_LOCK(adapter);
1373         igb_init_locked(adapter);
1374         IGB_CORE_UNLOCK(adapter);
1375 }
1376
1377
1378 static void
1379 igb_handle_que(void *context, int pending)
1380 {
1381         struct igb_queue *que = context;
1382         struct adapter *adapter = que->adapter;
1383         struct tx_ring *txr = que->txr;
1384         struct ifnet    *ifp = adapter->ifp;
1385
1386         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1387                 bool    more;
1388
1389                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1390
1391                 IGB_TX_LOCK(txr);
1392                 igb_txeof(txr);
1393 #ifndef IGB_LEGACY_TX
1394                 /* Process the stack queue only if not depleted */
1395                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1396                     !drbr_empty(ifp, txr->br))
1397                         igb_mq_start_locked(ifp, txr);
1398 #else
1399                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1400                         igb_start_locked(txr, ifp);
1401 #endif
1402                 IGB_TX_UNLOCK(txr);
1403                 /* Do we need another? */
1404                 if (more) {
1405                         taskqueue_enqueue(que->tq, &que->que_task);
1406                         return;
1407                 }
1408         }
1409
1410 #ifdef DEVICE_POLLING
1411         if (ifp->if_capenable & IFCAP_POLLING)
1412                 return;
1413 #endif
1414         /* Reenable this interrupt */
1415         if (que->eims)
1416                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1417         else
1418                 igb_enable_intr(adapter);
1419 }
1420
1421 /* Deal with link in a sleepable context */
1422 static void
1423 igb_handle_link(void *context, int pending)
1424 {
1425         struct adapter *adapter = context;
1426
1427         IGB_CORE_LOCK(adapter);
1428         igb_handle_link_locked(adapter);
1429         IGB_CORE_UNLOCK(adapter);
1430 }
1431
1432 static void
1433 igb_handle_link_locked(struct adapter *adapter)
1434 {
1435         struct tx_ring  *txr = adapter->tx_rings;
1436         struct ifnet *ifp = adapter->ifp;
1437
1438         IGB_CORE_LOCK_ASSERT(adapter);
1439         adapter->hw.mac.get_link_status = 1;
1440         igb_update_link_status(adapter);
1441         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1442                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1443                         IGB_TX_LOCK(txr);
1444 #ifndef IGB_LEGACY_TX
1445                         /* Process the stack queue only if not depleted */
1446                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1447                             !drbr_empty(ifp, txr->br))
1448                                 igb_mq_start_locked(ifp, txr);
1449 #else
1450                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1451                                 igb_start_locked(txr, ifp);
1452 #endif
1453                         IGB_TX_UNLOCK(txr);
1454                 }
1455         }
1456 }
1457
1458 /*********************************************************************
1459  *
1460  *  MSI/Legacy Deferred
1461  *  Interrupt Service routine  
1462  *
1463  *********************************************************************/
1464 static int
1465 igb_irq_fast(void *arg)
1466 {
1467         struct adapter          *adapter = arg;
1468         struct igb_queue        *que = adapter->queues;
1469         u32                     reg_icr;
1470
1471
1472         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1473
1474         /* Hot eject?  */
1475         if (reg_icr == 0xffffffff)
1476                 return FILTER_STRAY;
1477
1478         /* Definitely not our interrupt.  */
1479         if (reg_icr == 0x0)
1480                 return FILTER_STRAY;
1481
1482         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1483                 return FILTER_STRAY;
1484
1485         /*
1486          * Mask interrupts until the taskqueue is finished running.  This is
1487          * cheap, just assume that it is needed.  This also works around the
1488          * MSI message reordering errata on certain systems.
1489          */
1490         igb_disable_intr(adapter);
1491         taskqueue_enqueue(que->tq, &que->que_task);
1492
1493         /* Link status change */
1494         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1495                 taskqueue_enqueue(que->tq, &adapter->link_task);
1496
1497         if (reg_icr & E1000_ICR_RXO)
1498                 adapter->rx_overruns++;
1499         return FILTER_HANDLED;
1500 }
1501
1502 #ifdef DEVICE_POLLING
1503 #if __FreeBSD_version >= 800000
1504 #define POLL_RETURN_COUNT(a) (a)
1505 static int
1506 #else
1507 #define POLL_RETURN_COUNT(a)
1508 static void
1509 #endif
1510 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1511 {
1512         struct adapter          *adapter = ifp->if_softc;
1513         struct igb_queue        *que;
1514         struct tx_ring          *txr;
1515         u32                     reg_icr, rx_done = 0;
1516         u32                     loop = IGB_MAX_LOOP;
1517         bool                    more;
1518
1519         IGB_CORE_LOCK(adapter);
1520         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1521                 IGB_CORE_UNLOCK(adapter);
1522                 return POLL_RETURN_COUNT(rx_done);
1523         }
1524
1525         if (cmd == POLL_AND_CHECK_STATUS) {
1526                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1527                 /* Link status change */
1528                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1529                         igb_handle_link_locked(adapter);
1530
1531                 if (reg_icr & E1000_ICR_RXO)
1532                         adapter->rx_overruns++;
1533         }
1534         IGB_CORE_UNLOCK(adapter);
1535
1536         for (int i = 0; i < adapter->num_queues; i++) {
1537                 que = &adapter->queues[i];
1538                 txr = que->txr;
1539
1540                 igb_rxeof(que, count, &rx_done);
1541
1542                 IGB_TX_LOCK(txr);
1543                 do {
1544                         more = igb_txeof(txr);
1545                 } while (loop-- && more);
1546 #ifndef IGB_LEGACY_TX
1547                 if (!drbr_empty(ifp, txr->br))
1548                         igb_mq_start_locked(ifp, txr);
1549 #else
1550                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1551                         igb_start_locked(txr, ifp);
1552 #endif
1553                 IGB_TX_UNLOCK(txr);
1554         }
1555
1556         return POLL_RETURN_COUNT(rx_done);
1557 }
1558 #endif /* DEVICE_POLLING */
1559
1560 /*********************************************************************
1561  *
1562  *  MSIX Que Interrupt Service routine
1563  *
1564  **********************************************************************/
1565 static void
1566 igb_msix_que(void *arg)
1567 {
1568         struct igb_queue *que = arg;
1569         struct adapter *adapter = que->adapter;
1570         struct ifnet   *ifp = adapter->ifp;
1571         struct tx_ring *txr = que->txr;
1572         struct rx_ring *rxr = que->rxr;
1573         u32             newitr = 0;
1574         bool            more_rx;
1575
1576         /* Ignore spurious interrupts */
1577         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1578                 return;
1579
1580         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1581         ++que->irqs;
1582
1583         IGB_TX_LOCK(txr);
1584         igb_txeof(txr);
1585 #ifndef IGB_LEGACY_TX
1586         /* Process the stack queue only if not depleted */
1587         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1588             !drbr_empty(ifp, txr->br))
1589                 igb_mq_start_locked(ifp, txr);
1590 #else
1591         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1592                 igb_start_locked(txr, ifp);
1593 #endif
1594         IGB_TX_UNLOCK(txr);
1595
1596         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1597
1598         if (adapter->enable_aim == FALSE)
1599                 goto no_calc;
1600         /*
1601         ** Do Adaptive Interrupt Moderation:
1602         **  - Write out last calculated setting
1603         **  - Calculate based on average size over
1604         **    the last interval.
1605         */
1606         if (que->eitr_setting)
1607                 E1000_WRITE_REG(&adapter->hw,
1608                     E1000_EITR(que->msix), que->eitr_setting);
1609  
1610         que->eitr_setting = 0;
1611
1612         /* Idle, do nothing */
1613         if ((txr->bytes == 0) && (rxr->bytes == 0))
1614                 goto no_calc;
1615                                 
1616         /* Used half Default if sub-gig */
1617         if (adapter->link_speed != 1000)
1618                 newitr = IGB_DEFAULT_ITR / 2;
1619         else {
1620                 if ((txr->bytes) && (txr->packets))
1621                         newitr = txr->bytes/txr->packets;
1622                 if ((rxr->bytes) && (rxr->packets))
1623                         newitr = max(newitr,
1624                             (rxr->bytes / rxr->packets));
1625                 newitr += 24; /* account for hardware frame, crc */
1626                 /* set an upper boundary */
1627                 newitr = min(newitr, 3000);
1628                 /* Be nice to the mid range */
1629                 if ((newitr > 300) && (newitr < 1200))
1630                         newitr = (newitr / 3);
1631                 else
1632                         newitr = (newitr / 2);
1633         }
1634         newitr &= 0x7FFC;  /* Mask invalid bits */
1635         if (adapter->hw.mac.type == e1000_82575)
1636                 newitr |= newitr << 16;
1637         else
1638                 newitr |= E1000_EITR_CNT_IGNR;
1639                  
1640         /* save for next interrupt */
1641         que->eitr_setting = newitr;
1642
1643         /* Reset state */
1644         txr->bytes = 0;
1645         txr->packets = 0;
1646         rxr->bytes = 0;
1647         rxr->packets = 0;
1648
1649 no_calc:
1650         /* Schedule a clean task if needed*/
1651         if (more_rx)
1652                 taskqueue_enqueue(que->tq, &que->que_task);
1653         else
1654                 /* Reenable this interrupt */
1655                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1656         return;
1657 }
1658
1659
1660 /*********************************************************************
1661  *
1662  *  MSIX Link Interrupt Service routine
1663  *
1664  **********************************************************************/
1665
1666 static void
1667 igb_msix_link(void *arg)
1668 {
1669         struct adapter  *adapter = arg;
1670         u32             icr;
1671
1672         ++adapter->link_irq;
1673         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1674         if (!(icr & E1000_ICR_LSC))
1675                 goto spurious;
1676         igb_handle_link(adapter, 0);
1677
1678 spurious:
1679         /* Rearm */
1680         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1681         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1682         return;
1683 }
1684
1685
1686 /*********************************************************************
1687  *
1688  *  Media Ioctl callback
1689  *
1690  *  This routine is called whenever the user queries the status of
1691  *  the interface using ifconfig.
1692  *
1693  **********************************************************************/
1694 static void
1695 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1696 {
1697         struct adapter *adapter = ifp->if_softc;
1698
1699         INIT_DEBUGOUT("igb_media_status: begin");
1700
1701         IGB_CORE_LOCK(adapter);
1702         igb_update_link_status(adapter);
1703
1704         ifmr->ifm_status = IFM_AVALID;
1705         ifmr->ifm_active = IFM_ETHER;
1706
1707         if (!adapter->link_active) {
1708                 IGB_CORE_UNLOCK(adapter);
1709                 return;
1710         }
1711
1712         ifmr->ifm_status |= IFM_ACTIVE;
1713
1714         switch (adapter->link_speed) {
1715         case 10:
1716                 ifmr->ifm_active |= IFM_10_T;
1717                 break;
1718         case 100:
1719                 /*
1720                 ** Support for 100Mb SFP - these are Fiber 
1721                 ** but the media type appears as serdes
1722                 */
1723                 if (adapter->hw.phy.media_type ==
1724                     e1000_media_type_internal_serdes)
1725                         ifmr->ifm_active |= IFM_100_FX;
1726                 else
1727                         ifmr->ifm_active |= IFM_100_TX;
1728                 break;
1729         case 1000:
1730                 ifmr->ifm_active |= IFM_1000_T;
1731                 break;
1732         case 2500:
1733                 ifmr->ifm_active |= IFM_2500_SX;
1734                 break;
1735         }
1736
1737         if (adapter->link_duplex == FULL_DUPLEX)
1738                 ifmr->ifm_active |= IFM_FDX;
1739         else
1740                 ifmr->ifm_active |= IFM_HDX;
1741
1742         IGB_CORE_UNLOCK(adapter);
1743 }
1744
1745 /*********************************************************************
1746  *
1747  *  Media Ioctl callback
1748  *
1749  *  This routine is called when the user changes speed/duplex using
1750  *  media/mediopt option with ifconfig.
1751  *
1752  **********************************************************************/
1753 static int
1754 igb_media_change(struct ifnet *ifp)
1755 {
1756         struct adapter *adapter = ifp->if_softc;
1757         struct ifmedia  *ifm = &adapter->media;
1758
1759         INIT_DEBUGOUT("igb_media_change: begin");
1760
1761         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762                 return (EINVAL);
1763
1764         IGB_CORE_LOCK(adapter);
1765         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766         case IFM_AUTO:
1767                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769                 break;
1770         case IFM_1000_LX:
1771         case IFM_1000_SX:
1772         case IFM_1000_T:
1773                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775                 break;
1776         case IFM_100_TX:
1777                 adapter->hw.mac.autoneg = FALSE;
1778                 adapter->hw.phy.autoneg_advertised = 0;
1779                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781                 else
1782                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783                 break;
1784         case IFM_10_T:
1785                 adapter->hw.mac.autoneg = FALSE;
1786                 adapter->hw.phy.autoneg_advertised = 0;
1787                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789                 else
1790                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791                 break;
1792         default:
1793                 device_printf(adapter->dev, "Unsupported media type\n");
1794         }
1795
1796         igb_init_locked(adapter);
1797         IGB_CORE_UNLOCK(adapter);
1798
1799         return (0);
1800 }
1801
1802
1803 /*********************************************************************
1804  *
1805  *  This routine maps the mbufs to Advanced TX descriptors.
1806  *  
1807  **********************************************************************/
1808 static int
1809 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1810 {
1811         struct adapter  *adapter = txr->adapter;
1812         u32             olinfo_status = 0, cmd_type_len;
1813         int             i, j, error, nsegs;
1814         int             first;
1815         bool            remap = TRUE;
1816         struct mbuf     *m_head;
1817         bus_dma_segment_t segs[IGB_MAX_SCATTER];
1818         bus_dmamap_t    map;
1819         struct igb_tx_buf *txbuf;
1820         union e1000_adv_tx_desc *txd = NULL;
1821
1822         m_head = *m_headp;
1823
1824         /* Basic descriptor defines */
1825         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1826             E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1827
1828         if (m_head->m_flags & M_VLANTAG)
1829                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1830
1831         /*
1832          * Important to capture the first descriptor
1833          * used because it will contain the index of
1834          * the one we tell the hardware to report back
1835          */
1836         first = txr->next_avail_desc;
1837         txbuf = &txr->tx_buffers[first];
1838         map = txbuf->map;
1839
1840         /*
1841          * Map the packet for DMA.
1842          */
1843 retry:
1844         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1845             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1846
1847         if (__predict_false(error)) {
1848                 struct mbuf *m;
1849
1850                 switch (error) {
1851                 case EFBIG:
1852                         /* Try it again? - one try */
1853                         if (remap == TRUE) {
1854                                 remap = FALSE;
1855                                 m = m_collapse(*m_headp, M_NOWAIT,
1856                                     IGB_MAX_SCATTER);
1857                                 if (m == NULL) {
1858                                         adapter->mbuf_defrag_failed++;
1859                                         m_freem(*m_headp);
1860                                         *m_headp = NULL;
1861                                         return (ENOBUFS);
1862                                 }
1863                                 *m_headp = m;
1864                                 goto retry;
1865                         } else
1866                                 return (error);
1867                 default:
1868                         txr->no_tx_dma_setup++;
1869                         m_freem(*m_headp);
1870                         *m_headp = NULL;
1871                         return (error);
1872                 }
1873         }
1874
1875         /* Make certain there are enough descriptors */
1876         if (txr->tx_avail < (nsegs + 2)) {
1877                 txr->no_desc_avail++;
1878                 bus_dmamap_unload(txr->txtag, map);
1879                 return (ENOBUFS);
1880         }
1881         m_head = *m_headp;
1882
1883         /*
1884         ** Set up the appropriate offload context
1885         ** this will consume the first descriptor
1886         */
1887         error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1888         if (__predict_false(error)) {
1889                 m_freem(*m_headp);
1890                 *m_headp = NULL;
1891                 return (error);
1892         }
1893
1894         /* 82575 needs the queue index added */
1895         if (adapter->hw.mac.type == e1000_82575)
1896                 olinfo_status |= txr->me << 4;
1897
1898         i = txr->next_avail_desc;
1899         for (j = 0; j < nsegs; j++) {
1900                 bus_size_t seglen;
1901                 bus_addr_t segaddr;
1902
1903                 txbuf = &txr->tx_buffers[i];
1904                 txd = &txr->tx_base[i];
1905                 seglen = segs[j].ds_len;
1906                 segaddr = htole64(segs[j].ds_addr);
1907
1908                 txd->read.buffer_addr = segaddr;
1909                 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1910                     cmd_type_len | seglen);
1911                 txd->read.olinfo_status = htole32(olinfo_status);
1912
1913                 if (++i == txr->num_desc)
1914                         i = 0;
1915         }
1916
1917         txd->read.cmd_type_len |=
1918             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1919         txr->tx_avail -= nsegs;
1920         txr->next_avail_desc = i;
1921
1922         txbuf->m_head = m_head;
1923         /*
1924         ** Here we swap the map so the last descriptor,
1925         ** which gets the completion interrupt has the
1926         ** real map, and the first descriptor gets the
1927         ** unused map from this descriptor.
1928         */
1929         txr->tx_buffers[first].map = txbuf->map;
1930         txbuf->map = map;
1931         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1932
1933         /* Set the EOP descriptor that will be marked done */
1934         txbuf = &txr->tx_buffers[first];
1935         txbuf->eop = txd;
1936
1937         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1938             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1939         /*
1940          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1941          * hardware that this frame is available to transmit.
1942          */
1943         ++txr->total_packets;
1944         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1945
1946         return (0);
1947 }
1948 static void
1949 igb_set_promisc(struct adapter *adapter)
1950 {
1951         struct ifnet    *ifp = adapter->ifp;
1952         struct e1000_hw *hw = &adapter->hw;
1953         u32             reg;
1954
1955         if (adapter->vf_ifp) {
1956                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1957                 return;
1958         }
1959
1960         reg = E1000_READ_REG(hw, E1000_RCTL);
1961         if (ifp->if_flags & IFF_PROMISC) {
1962                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1963                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1964         } else if (ifp->if_flags & IFF_ALLMULTI) {
1965                 reg |= E1000_RCTL_MPE;
1966                 reg &= ~E1000_RCTL_UPE;
1967                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1968         }
1969 }
1970
1971 static void
1972 igb_disable_promisc(struct adapter *adapter)
1973 {
1974         struct e1000_hw *hw = &adapter->hw;
1975         struct ifnet    *ifp = adapter->ifp;
1976         u32             reg;
1977         int             mcnt = 0;
1978
1979         if (adapter->vf_ifp) {
1980                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1981                 return;
1982         }
1983         reg = E1000_READ_REG(hw, E1000_RCTL);
1984         reg &=  (~E1000_RCTL_UPE);
1985         if (ifp->if_flags & IFF_ALLMULTI)
1986                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1987         else {
1988                 struct  ifmultiaddr *ifma;
1989 #if __FreeBSD_version < 800000
1990                 IF_ADDR_LOCK(ifp);
1991 #else   
1992                 if_maddr_rlock(ifp);
1993 #endif
1994                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1995                         if (ifma->ifma_addr->sa_family != AF_LINK)
1996                                 continue;
1997                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1998                                 break;
1999                         mcnt++;
2000                 }
2001 #if __FreeBSD_version < 800000
2002                 IF_ADDR_UNLOCK(ifp);
2003 #else
2004                 if_maddr_runlock(ifp);
2005 #endif
2006         }
2007         /* Don't disable if in MAX groups */
2008         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2009                 reg &=  (~E1000_RCTL_MPE);
2010         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2011 }
2012
2013
2014 /*********************************************************************
2015  *  Multicast Update
2016  *
2017  *  This routine is called whenever multicast address list is updated.
2018  *
2019  **********************************************************************/
2020
2021 static void
2022 igb_set_multi(struct adapter *adapter)
2023 {
2024         struct ifnet    *ifp = adapter->ifp;
2025         struct ifmultiaddr *ifma;
2026         u32 reg_rctl = 0;
2027         u8  *mta;
2028
2029         int mcnt = 0;
2030
2031         IOCTL_DEBUGOUT("igb_set_multi: begin");
2032
2033         mta = adapter->mta;
2034         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2035             MAX_NUM_MULTICAST_ADDRESSES);
2036
2037 #if __FreeBSD_version < 800000
2038         IF_ADDR_LOCK(ifp);
2039 #else
2040         if_maddr_rlock(ifp);
2041 #endif
2042         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2043                 if (ifma->ifma_addr->sa_family != AF_LINK)
2044                         continue;
2045
2046                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2047                         break;
2048
2049                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2050                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2051                 mcnt++;
2052         }
2053 #if __FreeBSD_version < 800000
2054         IF_ADDR_UNLOCK(ifp);
2055 #else
2056         if_maddr_runlock(ifp);
2057 #endif
2058
2059         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2060                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2061                 reg_rctl |= E1000_RCTL_MPE;
2062                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2063         } else
2064                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2065 }
2066
2067
2068 /*********************************************************************
2069  *  Timer routine:
2070  *      This routine checks for link status,
2071  *      updates statistics, and does the watchdog.
2072  *
2073  **********************************************************************/
2074
2075 static void
2076 igb_local_timer(void *arg)
2077 {
2078         struct adapter          *adapter = arg;
2079         device_t                dev = adapter->dev;
2080         struct ifnet            *ifp = adapter->ifp;
2081         struct tx_ring          *txr = adapter->tx_rings;
2082         struct igb_queue        *que = adapter->queues;
2083         int                     hung = 0, busy = 0;
2084
2085
2086         IGB_CORE_LOCK_ASSERT(adapter);
2087
2088         igb_update_link_status(adapter);
2089         igb_update_stats_counters(adapter);
2090
2091         /*
2092         ** Check the TX queues status
2093         **      - central locked handling of OACTIVE
2094         **      - watchdog only if all queues show hung
2095         */
2096         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2097                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2098                     (adapter->pause_frames == 0))
2099                         ++hung;
2100                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2101                         ++busy;
2102                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2103                         taskqueue_enqueue(que->tq, &que->que_task);
2104         }
2105         if (hung == adapter->num_queues)
2106                 goto timeout;
2107         if (busy == adapter->num_queues)
2108                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2109         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2110             (busy < adapter->num_queues))
2111                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2112
2113         adapter->pause_frames = 0;
2114         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2115 #ifndef DEVICE_POLLING
2116         /* Schedule all queue interrupts - deadlock protection */
2117         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2118 #endif
2119         return;
2120
2121 timeout:
2122         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2123         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2124             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2125             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2126         device_printf(dev,"TX(%d) desc avail = %d,"
2127             "Next TX to Clean = %d\n",
2128             txr->me, txr->tx_avail, txr->next_to_clean);
2129         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2130         adapter->watchdog_events++;
2131         igb_init_locked(adapter);
2132 }
2133
2134 static void
2135 igb_update_link_status(struct adapter *adapter)
2136 {
2137         struct e1000_hw         *hw = &adapter->hw;
2138         struct e1000_fc_info    *fc = &hw->fc;
2139         struct ifnet            *ifp = adapter->ifp;
2140         device_t                dev = adapter->dev;
2141         struct tx_ring          *txr = adapter->tx_rings;
2142         u32                     link_check, thstat, ctrl;
2143         char                    *flowctl = NULL;
2144
2145         link_check = thstat = ctrl = 0;
2146
2147         /* Get the cached link value or read for real */
2148         switch (hw->phy.media_type) {
2149         case e1000_media_type_copper:
2150                 if (hw->mac.get_link_status) {
2151                         /* Do the work to read phy */
2152                         e1000_check_for_link(hw);
2153                         link_check = !hw->mac.get_link_status;
2154                 } else
2155                         link_check = TRUE;
2156                 break;
2157         case e1000_media_type_fiber:
2158                 e1000_check_for_link(hw);
2159                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2160                                  E1000_STATUS_LU);
2161                 break;
2162         case e1000_media_type_internal_serdes:
2163                 e1000_check_for_link(hw);
2164                 link_check = adapter->hw.mac.serdes_has_link;
2165                 break;
2166         /* VF device is type_unknown */
2167         case e1000_media_type_unknown:
2168                 e1000_check_for_link(hw);
2169                 link_check = !hw->mac.get_link_status;
2170                 /* Fall thru */
2171         default:
2172                 break;
2173         }
2174
2175         /* Check for thermal downshift or shutdown */
2176         if (hw->mac.type == e1000_i350) {
2177                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2178                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2179         }
2180
2181         /* Get the flow control for display */
2182         switch (fc->current_mode) {
2183         case e1000_fc_rx_pause:
2184                 flowctl = "RX";
2185                 break;  
2186         case e1000_fc_tx_pause:
2187                 flowctl = "TX";
2188                 break;  
2189         case e1000_fc_full:
2190                 flowctl = "Full";
2191                 break;  
2192         case e1000_fc_none:
2193         default:
2194                 flowctl = "None";
2195                 break;  
2196         }
2197
2198         /* Now we check if a transition has happened */
2199         if (link_check && (adapter->link_active == 0)) {
2200                 e1000_get_speed_and_duplex(&adapter->hw, 
2201                     &adapter->link_speed, &adapter->link_duplex);
2202                 if (bootverbose)
2203                         device_printf(dev, "Link is up %d Mbps %s,"
2204                             " Flow Control: %s\n",
2205                             adapter->link_speed,
2206                             ((adapter->link_duplex == FULL_DUPLEX) ?
2207                             "Full Duplex" : "Half Duplex"), flowctl);
2208                 adapter->link_active = 1;
2209                 ifp->if_baudrate = adapter->link_speed * 1000000;
2210                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2211                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2212                         device_printf(dev, "Link: thermal downshift\n");
2213                 /* Delay Link Up for Phy update */
2214                 if (((hw->mac.type == e1000_i210) ||
2215                     (hw->mac.type == e1000_i211)) &&
2216                     (hw->phy.id == I210_I_PHY_ID))
2217                         msec_delay(I210_LINK_DELAY);
2218                 /* Reset if the media type changed. */
2219                 if (hw->dev_spec._82575.media_changed) {
2220                         hw->dev_spec._82575.media_changed = false;
2221                         adapter->flags |= IGB_MEDIA_RESET;
2222                         igb_reset(adapter);
2223                 }       
2224                 /* This can sleep */
2225                 if_link_state_change(ifp, LINK_STATE_UP);
2226         } else if (!link_check && (adapter->link_active == 1)) {
2227                 ifp->if_baudrate = adapter->link_speed = 0;
2228                 adapter->link_duplex = 0;
2229                 if (bootverbose)
2230                         device_printf(dev, "Link is Down\n");
2231                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2232                     (thstat & E1000_THSTAT_PWR_DOWN))
2233                         device_printf(dev, "Link: thermal shutdown\n");
2234                 adapter->link_active = 0;
2235                 /* This can sleep */
2236                 if_link_state_change(ifp, LINK_STATE_DOWN);
2237                 /* Reset queue state */
2238                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2239                         txr->queue_status = IGB_QUEUE_IDLE;
2240         }
2241 }
2242
2243 /*********************************************************************
2244  *
2245  *  This routine disables all traffic on the adapter by issuing a
2246  *  global reset on the MAC and deallocates TX/RX buffers.
2247  *
2248  **********************************************************************/
2249
2250 static void
2251 igb_stop(void *arg)
2252 {
2253         struct adapter  *adapter = arg;
2254         struct ifnet    *ifp = adapter->ifp;
2255         struct tx_ring *txr = adapter->tx_rings;
2256
2257         IGB_CORE_LOCK_ASSERT(adapter);
2258
2259         INIT_DEBUGOUT("igb_stop: begin");
2260
2261         igb_disable_intr(adapter);
2262
2263         callout_stop(&adapter->timer);
2264
2265         /* Tell the stack that the interface is no longer active */
2266         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2267         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2268
2269         /* Disarm watchdog timer. */
2270         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2271                 IGB_TX_LOCK(txr);
2272                 txr->queue_status = IGB_QUEUE_IDLE;
2273                 IGB_TX_UNLOCK(txr);
2274         }
2275
2276         e1000_reset_hw(&adapter->hw);
2277         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2278
2279         e1000_led_off(&adapter->hw);
2280         e1000_cleanup_led(&adapter->hw);
2281 }
2282
2283
2284 /*********************************************************************
2285  *
2286  *  Determine hardware revision.
2287  *
2288  **********************************************************************/
2289 static void
2290 igb_identify_hardware(struct adapter *adapter)
2291 {
2292         device_t dev = adapter->dev;
2293
2294         /* Make sure our PCI config space has the necessary stuff set */
2295         pci_enable_busmaster(dev);
2296         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2297
2298         /* Save off the information about this board */
2299         adapter->hw.vendor_id = pci_get_vendor(dev);
2300         adapter->hw.device_id = pci_get_device(dev);
2301         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2302         adapter->hw.subsystem_vendor_id =
2303             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2304         adapter->hw.subsystem_device_id =
2305             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2306
2307         /* Set MAC type early for PCI setup */
2308         e1000_set_mac_type(&adapter->hw);
2309
2310         /* Are we a VF device? */
2311         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2312             (adapter->hw.mac.type == e1000_vfadapt_i350))
2313                 adapter->vf_ifp = 1;
2314         else
2315                 adapter->vf_ifp = 0;
2316 }
2317
2318 static int
2319 igb_allocate_pci_resources(struct adapter *adapter)
2320 {
2321         device_t        dev = adapter->dev;
2322         int             rid;
2323
2324         rid = PCIR_BAR(0);
2325         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2326             &rid, RF_ACTIVE);
2327         if (adapter->pci_mem == NULL) {
2328                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2329                 return (ENXIO);
2330         }
2331         adapter->osdep.mem_bus_space_tag =
2332             rman_get_bustag(adapter->pci_mem);
2333         adapter->osdep.mem_bus_space_handle =
2334             rman_get_bushandle(adapter->pci_mem);
2335         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2336
2337         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2338
2339         /* This will setup either MSI/X or MSI */
2340         adapter->msix = igb_setup_msix(adapter);
2341         adapter->hw.back = &adapter->osdep;
2342
2343         return (0);
2344 }
2345
2346 /*********************************************************************
2347  *
2348  *  Setup the Legacy or MSI Interrupt handler
2349  *
2350  **********************************************************************/
2351 static int
2352 igb_allocate_legacy(struct adapter *adapter)
2353 {
2354         device_t                dev = adapter->dev;
2355         struct igb_queue        *que = adapter->queues;
2356 #ifndef IGB_LEGACY_TX
2357         struct tx_ring          *txr = adapter->tx_rings;
2358 #endif
2359         int                     error, rid = 0;
2360
2361         /* Turn off all interrupts */
2362         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2363
2364         /* MSI RID is 1 */
2365         if (adapter->msix == 1)
2366                 rid = 1;
2367
2368         /* We allocate a single interrupt resource */
2369         adapter->res = bus_alloc_resource_any(dev,
2370             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2371         if (adapter->res == NULL) {
2372                 device_printf(dev, "Unable to allocate bus resource: "
2373                     "interrupt\n");
2374                 return (ENXIO);
2375         }
2376
2377 #ifndef IGB_LEGACY_TX
2378         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2379 #endif
2380
2381         /*
2382          * Try allocating a fast interrupt and the associated deferred
2383          * processing contexts.
2384          */
2385         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2386         /* Make tasklet for deferred link handling */
2387         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2388         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2389             taskqueue_thread_enqueue, &que->tq);
2390         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2391             device_get_nameunit(adapter->dev));
2392         if ((error = bus_setup_intr(dev, adapter->res,
2393             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2394             adapter, &adapter->tag)) != 0) {
2395                 device_printf(dev, "Failed to register fast interrupt "
2396                             "handler: %d\n", error);
2397                 taskqueue_free(que->tq);
2398                 que->tq = NULL;
2399                 return (error);
2400         }
2401
2402         return (0);
2403 }
2404
2405
2406 /*********************************************************************
2407  *
2408  *  Setup the MSIX Queue Interrupt handlers: 
2409  *
2410  **********************************************************************/
2411 static int
2412 igb_allocate_msix(struct adapter *adapter)
2413 {
2414         device_t                dev = adapter->dev;
2415         struct igb_queue        *que = adapter->queues;
2416         int                     error, rid, vector = 0;
2417
2418         /* Be sure to start with all interrupts disabled */
2419         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2420         E1000_WRITE_FLUSH(&adapter->hw);
2421
2422         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2423                 rid = vector +1;
2424                 que->res = bus_alloc_resource_any(dev,
2425                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2426                 if (que->res == NULL) {
2427                         device_printf(dev,
2428                             "Unable to allocate bus resource: "
2429                             "MSIX Queue Interrupt\n");
2430                         return (ENXIO);
2431                 }
2432                 error = bus_setup_intr(dev, que->res,
2433                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2434                     igb_msix_que, que, &que->tag);
2435                 if (error) {
2436                         que->res = NULL;
2437                         device_printf(dev, "Failed to register Queue handler");
2438                         return (error);
2439                 }
2440 #if __FreeBSD_version >= 800504
2441                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2442 #endif
2443                 que->msix = vector;
2444                 if (adapter->hw.mac.type == e1000_82575)
2445                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2446                 else
2447                         que->eims = 1 << vector;
2448                 /*
2449                 ** Bind the msix vector, and thus the
2450                 ** rings to the corresponding cpu.
2451                 */
2452                 if (adapter->num_queues > 1) {
2453                         if (igb_last_bind_cpu < 0)
2454                                 igb_last_bind_cpu = CPU_FIRST();
2455                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2456                         device_printf(dev,
2457                                 "Bound queue %d to cpu %d\n",
2458                                 i,igb_last_bind_cpu);
2459                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2460                 }
2461 #ifndef IGB_LEGACY_TX
2462                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2463                     que->txr);
2464 #endif
2465                 /* Make tasklet for deferred handling */
2466                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2467                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2468                     taskqueue_thread_enqueue, &que->tq);
2469                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2470                     device_get_nameunit(adapter->dev));
2471         }
2472
2473         /* And Link */
2474         rid = vector + 1;
2475         adapter->res = bus_alloc_resource_any(dev,
2476             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2477         if (adapter->res == NULL) {
2478                 device_printf(dev,
2479                     "Unable to allocate bus resource: "
2480                     "MSIX Link Interrupt\n");
2481                 return (ENXIO);
2482         }
2483         if ((error = bus_setup_intr(dev, adapter->res,
2484             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2485             igb_msix_link, adapter, &adapter->tag)) != 0) {
2486                 device_printf(dev, "Failed to register Link handler");
2487                 return (error);
2488         }
2489 #if __FreeBSD_version >= 800504
2490         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2491 #endif
2492         adapter->linkvec = vector;
2493
2494         return (0);
2495 }
2496
2497
2498 static void
2499 igb_configure_queues(struct adapter *adapter)
2500 {
2501         struct  e1000_hw        *hw = &adapter->hw;
2502         struct  igb_queue       *que;
2503         u32                     tmp, ivar = 0, newitr = 0;
2504
2505         /* First turn on RSS capability */
2506         if (adapter->hw.mac.type != e1000_82575)
2507                 E1000_WRITE_REG(hw, E1000_GPIE,
2508                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2509                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2510
2511         /* Turn on MSIX */
2512         switch (adapter->hw.mac.type) {
2513         case e1000_82580:
2514         case e1000_i350:
2515         case e1000_i354:
2516         case e1000_i210:
2517         case e1000_i211:
2518         case e1000_vfadapt:
2519         case e1000_vfadapt_i350:
2520                 /* RX entries */
2521                 for (int i = 0; i < adapter->num_queues; i++) {
2522                         u32 index = i >> 1;
2523                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2524                         que = &adapter->queues[i];
2525                         if (i & 1) {
2526                                 ivar &= 0xFF00FFFF;
2527                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2528                         } else {
2529                                 ivar &= 0xFFFFFF00;
2530                                 ivar |= que->msix | E1000_IVAR_VALID;
2531                         }
2532                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2533                 }
2534                 /* TX entries */
2535                 for (int i = 0; i < adapter->num_queues; i++) {
2536                         u32 index = i >> 1;
2537                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2538                         que = &adapter->queues[i];
2539                         if (i & 1) {
2540                                 ivar &= 0x00FFFFFF;
2541                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2542                         } else {
2543                                 ivar &= 0xFFFF00FF;
2544                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2545                         }
2546                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2547                         adapter->que_mask |= que->eims;
2548                 }
2549
2550                 /* And for the link interrupt */
2551                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2552                 adapter->link_mask = 1 << adapter->linkvec;
2553                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2554                 break;
2555         case e1000_82576:
2556                 /* RX entries */
2557                 for (int i = 0; i < adapter->num_queues; i++) {
2558                         u32 index = i & 0x7; /* Each IVAR has two entries */
2559                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2560                         que = &adapter->queues[i];
2561                         if (i < 8) {
2562                                 ivar &= 0xFFFFFF00;
2563                                 ivar |= que->msix | E1000_IVAR_VALID;
2564                         } else {
2565                                 ivar &= 0xFF00FFFF;
2566                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2567                         }
2568                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2569                         adapter->que_mask |= que->eims;
2570                 }
2571                 /* TX entries */
2572                 for (int i = 0; i < adapter->num_queues; i++) {
2573                         u32 index = i & 0x7; /* Each IVAR has two entries */
2574                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2575                         que = &adapter->queues[i];
2576                         if (i < 8) {
2577                                 ivar &= 0xFFFF00FF;
2578                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2579                         } else {
2580                                 ivar &= 0x00FFFFFF;
2581                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2582                         }
2583                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2584                         adapter->que_mask |= que->eims;
2585                 }
2586
2587                 /* And for the link interrupt */
2588                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2589                 adapter->link_mask = 1 << adapter->linkvec;
2590                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2591                 break;
2592
2593         case e1000_82575:
2594                 /* enable MSI-X support*/
2595                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2596                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2597                 /* Auto-Mask interrupts upon ICR read. */
2598                 tmp |= E1000_CTRL_EXT_EIAME;
2599                 tmp |= E1000_CTRL_EXT_IRCA;
2600                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2601
2602                 /* Queues */
2603                 for (int i = 0; i < adapter->num_queues; i++) {
2604                         que = &adapter->queues[i];
2605                         tmp = E1000_EICR_RX_QUEUE0 << i;
2606                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2607                         que->eims = tmp;
2608                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2609                             i, que->eims);
2610                         adapter->que_mask |= que->eims;
2611                 }
2612
2613                 /* Link */
2614                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2615                     E1000_EIMS_OTHER);
2616                 adapter->link_mask |= E1000_EIMS_OTHER;
2617         default:
2618                 break;
2619         }
2620
2621         /* Set the starting interrupt rate */
2622         if (igb_max_interrupt_rate > 0)
2623                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2624
2625         if (hw->mac.type == e1000_82575)
2626                 newitr |= newitr << 16;
2627         else
2628                 newitr |= E1000_EITR_CNT_IGNR;
2629
2630         for (int i = 0; i < adapter->num_queues; i++) {
2631                 que = &adapter->queues[i];
2632                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2633         }
2634
2635         return;
2636 }
2637
2638
2639 static void
2640 igb_free_pci_resources(struct adapter *adapter)
2641 {
2642         struct          igb_queue *que = adapter->queues;
2643         device_t        dev = adapter->dev;
2644         int             rid;
2645
2646         /*
2647         ** There is a slight possibility of a failure mode
2648         ** in attach that will result in entering this function
2649         ** before interrupt resources have been initialized, and
2650         ** in that case we do not want to execute the loops below
2651         ** We can detect this reliably by the state of the adapter
2652         ** res pointer.
2653         */
2654         if (adapter->res == NULL)
2655                 goto mem;
2656
2657         /*
2658          * First release all the interrupt resources:
2659          */
2660         for (int i = 0; i < adapter->num_queues; i++, que++) {
2661                 rid = que->msix + 1;
2662                 if (que->tag != NULL) {
2663                         bus_teardown_intr(dev, que->res, que->tag);
2664                         que->tag = NULL;
2665                 }
2666                 if (que->res != NULL)
2667                         bus_release_resource(dev,
2668                             SYS_RES_IRQ, rid, que->res);
2669         }
2670
2671         /* Clean the Legacy or Link interrupt last */
2672         if (adapter->linkvec) /* we are doing MSIX */
2673                 rid = adapter->linkvec + 1;
2674         else
2675                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2676
2677         que = adapter->queues;
2678         if (adapter->tag != NULL) {
2679                 taskqueue_drain(que->tq, &adapter->link_task);
2680                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2681                 adapter->tag = NULL;
2682         }
2683         if (adapter->res != NULL)
2684                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2685
2686         for (int i = 0; i < adapter->num_queues; i++, que++) {
2687                 if (que->tq != NULL) {
2688 #ifndef IGB_LEGACY_TX
2689                         taskqueue_drain(que->tq, &que->txr->txq_task);
2690 #endif
2691                         taskqueue_drain(que->tq, &que->que_task);
2692                         taskqueue_free(que->tq);
2693                 }
2694         }
2695 mem:
2696         if (adapter->msix)
2697                 pci_release_msi(dev);
2698
2699         if (adapter->msix_mem != NULL)
2700                 bus_release_resource(dev, SYS_RES_MEMORY,
2701                     adapter->memrid, adapter->msix_mem);
2702
2703         if (adapter->pci_mem != NULL)
2704                 bus_release_resource(dev, SYS_RES_MEMORY,
2705                     PCIR_BAR(0), adapter->pci_mem);
2706
2707 }
2708
2709 /*
2710  * Setup Either MSI/X or MSI
2711  */
2712 static int
2713 igb_setup_msix(struct adapter *adapter)
2714 {
2715         device_t        dev = adapter->dev;
2716         int             bar, want, queues, msgs, maxqueues;
2717
2718         /* tuneable override */
2719         if (igb_enable_msix == 0)
2720                 goto msi;
2721
2722         /* First try MSI/X */
2723         msgs = pci_msix_count(dev); 
2724         if (msgs == 0)
2725                 goto msi;
2726         /*
2727         ** Some new devices, as with ixgbe, now may
2728         ** use a different BAR, so we need to keep
2729         ** track of which is used.
2730         */
2731         adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2732         bar = pci_read_config(dev, adapter->memrid, 4);
2733         if (bar == 0) /* use next bar */
2734                 adapter->memrid += 4;
2735         adapter->msix_mem = bus_alloc_resource_any(dev,
2736             SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2737         if (adapter->msix_mem == NULL) {
2738                 /* May not be enabled */
2739                 device_printf(adapter->dev,
2740                     "Unable to map MSIX table \n");
2741                 goto msi;
2742         }
2743
2744         /* Figure out a reasonable auto config value */
2745         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2746
2747         /* Manual override */
2748         if (igb_num_queues != 0)
2749                 queues = igb_num_queues;
2750
2751         /* Sanity check based on HW */
2752         switch (adapter->hw.mac.type) {
2753                 case e1000_82575:
2754                         maxqueues = 4;
2755                         break;
2756                 case e1000_82576:
2757                 case e1000_82580:
2758                 case e1000_i350:
2759                 case e1000_i354:
2760                         maxqueues = 8;
2761                         break;
2762                 case e1000_i210:
2763                         maxqueues = 4;
2764                         break;
2765                 case e1000_i211:
2766                         maxqueues = 2;
2767                         break;
2768                 default:  /* VF interfaces */
2769                         maxqueues = 1;
2770                         break;
2771         }
2772         if (queues > maxqueues)
2773                 queues = maxqueues;
2774
2775         /* Manual override */
2776         if (igb_num_queues != 0)
2777                 queues = igb_num_queues;
2778
2779         /*
2780         ** One vector (RX/TX pair) per queue
2781         ** plus an additional for Link interrupt
2782         */
2783         want = queues + 1;
2784         if (msgs >= want)
2785                 msgs = want;
2786         else {
2787                 device_printf(adapter->dev,
2788                     "MSIX Configuration Problem, "
2789                     "%d vectors configured, but %d queues wanted!\n",
2790                     msgs, want);
2791                 goto msi;
2792         }
2793         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2794                 device_printf(adapter->dev,
2795                     "Using MSIX interrupts with %d vectors\n", msgs);
2796                 adapter->num_queues = queues;
2797                 return (msgs);
2798         }
2799         /*
2800         ** If MSIX alloc failed or provided us with
2801         ** less than needed, free and fall through to MSI
2802         */
2803         pci_release_msi(dev);
2804
2805 msi:
2806         if (adapter->msix_mem != NULL) {
2807                 bus_release_resource(dev, SYS_RES_MEMORY,
2808                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2809                 adapter->msix_mem = NULL;
2810         }
2811         msgs = 1;
2812         if (pci_alloc_msi(dev, &msgs) == 0) {
2813                 device_printf(adapter->dev," Using an MSI interrupt\n");
2814                 return (msgs);
2815         }
2816         device_printf(adapter->dev," Using a Legacy interrupt\n");
2817         return (0);
2818 }
2819
2820 /*********************************************************************
2821  *
2822  *  Initialize the DMA Coalescing feature
2823  *
2824  **********************************************************************/
2825 static void
2826 igb_init_dmac(struct adapter *adapter, u32 pba)
2827 {
2828         device_t        dev = adapter->dev;
2829         struct e1000_hw *hw = &adapter->hw;
2830         u32             dmac, reg = ~E1000_DMACR_DMAC_EN;
2831         u16             hwm;
2832
2833         if (hw->mac.type == e1000_i211)
2834                 return;
2835
2836         if (hw->mac.type > e1000_82580) {
2837
2838                 if (adapter->dmac == 0) { /* Disabling it */
2839                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2840                         return;
2841                 } else
2842                         device_printf(dev, "DMA Coalescing enabled\n");
2843
2844                 /* Set starting threshold */
2845                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2846
2847                 hwm = 64 * pba - adapter->max_frame_size / 16;
2848                 if (hwm < 64 * (pba - 6))
2849                         hwm = 64 * (pba - 6);
2850                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2851                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2852                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2853                     & E1000_FCRTC_RTH_COAL_MASK);
2854                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2855
2856
2857                 dmac = pba - adapter->max_frame_size / 512;
2858                 if (dmac < pba - 10)
2859                         dmac = pba - 10;
2860                 reg = E1000_READ_REG(hw, E1000_DMACR);
2861                 reg &= ~E1000_DMACR_DMACTHR_MASK;
2862                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2863                     & E1000_DMACR_DMACTHR_MASK);
2864
2865                 /* transition to L0x or L1 if available..*/
2866                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2867
2868                 /* Check if status is 2.5Gb backplane connection
2869                 * before configuration of watchdog timer, which is
2870                 * in msec values in 12.8usec intervals
2871                 * watchdog timer= msec values in 32usec intervals
2872                 * for non 2.5Gb connection
2873                 */
2874                 if (hw->mac.type == e1000_i354) {
2875                         int status = E1000_READ_REG(hw, E1000_STATUS);
2876                         if ((status & E1000_STATUS_2P5_SKU) &&
2877                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2878                                 reg |= ((adapter->dmac * 5) >> 6);
2879                         else
2880                                 reg |= (adapter->dmac >> 5);
2881                 } else {
2882                         reg |= (adapter->dmac >> 5);
2883                 }
2884
2885                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2886
2887                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2888
2889                 /* Set the interval before transition */
2890                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2891                 if (hw->mac.type == e1000_i350)
2892                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
2893                 /*
2894                 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2895                 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2896                 */
2897                 if (hw->mac.type == e1000_i354) {
2898                         int status = E1000_READ_REG(hw, E1000_STATUS);
2899                         if ((status & E1000_STATUS_2P5_SKU) &&
2900                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2901                                 reg |= 0xA;
2902                         else
2903                                 reg |= 0x4;
2904                 } else {
2905                         reg |= 0x4;
2906                 }
2907
2908                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2909
2910                 /* free space in tx packet buffer to wake from DMA coal */
2911                 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2912                     (2 * adapter->max_frame_size)) >> 6);
2913
2914                 /* make low power state decision controlled by DMA coal */
2915                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2916                 reg &= ~E1000_PCIEMISC_LX_DECISION;
2917                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2918
2919         } else if (hw->mac.type == e1000_82580) {
2920                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2921                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2922                     reg & ~E1000_PCIEMISC_LX_DECISION);
2923                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
2924         }
2925 }
2926
2927
2928 /*********************************************************************
2929  *
2930  *  Set up an fresh starting state
2931  *
2932  **********************************************************************/
2933 static void
2934 igb_reset(struct adapter *adapter)
2935 {
2936         device_t        dev = adapter->dev;
2937         struct e1000_hw *hw = &adapter->hw;
2938         struct e1000_fc_info *fc = &hw->fc;
2939         struct ifnet    *ifp = adapter->ifp;
2940         u32             pba = 0;
2941         u16             hwm;
2942
2943         INIT_DEBUGOUT("igb_reset: begin");
2944
2945         /* Let the firmware know the OS is in control */
2946         igb_get_hw_control(adapter);
2947
2948         /*
2949          * Packet Buffer Allocation (PBA)
2950          * Writing PBA sets the receive portion of the buffer
2951          * the remainder is used for the transmit buffer.
2952          */
2953         switch (hw->mac.type) {
2954         case e1000_82575:
2955                 pba = E1000_PBA_32K;
2956                 break;
2957         case e1000_82576:
2958         case e1000_vfadapt:
2959                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2960                 pba &= E1000_RXPBS_SIZE_MASK_82576;
2961                 break;
2962         case e1000_82580:
2963         case e1000_i350:
2964         case e1000_i354:
2965         case e1000_vfadapt_i350:
2966                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2967                 pba = e1000_rxpbs_adjust_82580(pba);
2968                 break;
2969         case e1000_i210:
2970         case e1000_i211:
2971                 pba = E1000_PBA_34K;
2972         default:
2973                 break;
2974         }
2975
2976         /* Special needs in case of Jumbo frames */
2977         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2978                 u32 tx_space, min_tx, min_rx;
2979                 pba = E1000_READ_REG(hw, E1000_PBA);
2980                 tx_space = pba >> 16;
2981                 pba &= 0xffff;
2982                 min_tx = (adapter->max_frame_size +
2983                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2984                 min_tx = roundup2(min_tx, 1024);
2985                 min_tx >>= 10;
2986                 min_rx = adapter->max_frame_size;
2987                 min_rx = roundup2(min_rx, 1024);
2988                 min_rx >>= 10;
2989                 if (tx_space < min_tx &&
2990                     ((min_tx - tx_space) < pba)) {
2991                         pba = pba - (min_tx - tx_space);
2992                         /*
2993                          * if short on rx space, rx wins
2994                          * and must trump tx adjustment
2995                          */
2996                         if (pba < min_rx)
2997                                 pba = min_rx;
2998                 }
2999                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3000         }
3001
3002         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3003
3004         /*
3005          * These parameters control the automatic generation (Tx) and
3006          * response (Rx) to Ethernet PAUSE frames.
3007          * - High water mark should allow for at least two frames to be
3008          *   received after sending an XOFF.
3009          * - Low water mark works best when it is very near the high water mark.
3010          *   This allows the receiver to restart by sending XON when it has
3011          *   drained a bit.
3012          */
3013         hwm = min(((pba << 10) * 9 / 10),
3014             ((pba << 10) - 2 * adapter->max_frame_size));
3015
3016         if (hw->mac.type < e1000_82576) {
3017                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3018                 fc->low_water = fc->high_water - 8;
3019         } else {
3020                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3021                 fc->low_water = fc->high_water - 16;
3022         }
3023
3024         fc->pause_time = IGB_FC_PAUSE_TIME;
3025         fc->send_xon = TRUE;
3026         if (adapter->fc)
3027                 fc->requested_mode = adapter->fc;
3028         else
3029                 fc->requested_mode = e1000_fc_default;
3030
3031         /* Issue a global reset */
3032         e1000_reset_hw(hw);
3033         E1000_WRITE_REG(hw, E1000_WUC, 0);
3034
3035         /* Reset for AutoMediaDetect */
3036         if (adapter->flags & IGB_MEDIA_RESET) {
3037                 e1000_setup_init_funcs(hw, TRUE);
3038                 e1000_get_bus_info(hw);
3039                 adapter->flags &= ~IGB_MEDIA_RESET;
3040         }
3041
3042         if (e1000_init_hw(hw) < 0)
3043                 device_printf(dev, "Hardware Initialization Failed\n");
3044
3045         /* Setup DMA Coalescing */
3046         igb_init_dmac(adapter, pba);
3047
3048         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3049         e1000_get_phy_info(hw);
3050         e1000_check_for_link(hw);
3051         return;
3052 }
3053
3054 /*********************************************************************
3055  *
3056  *  Setup networking device structure and register an interface.
3057  *
3058  **********************************************************************/
3059 static int
3060 igb_setup_interface(device_t dev, struct adapter *adapter)
3061 {
3062         struct ifnet   *ifp;
3063
3064         INIT_DEBUGOUT("igb_setup_interface: begin");
3065
3066         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3067         if (ifp == NULL) {
3068                 device_printf(dev, "can not allocate ifnet structure\n");
3069                 return (-1);
3070         }
3071         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3072         ifp->if_init =  igb_init;
3073         ifp->if_softc = adapter;
3074         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3075         ifp->if_ioctl = igb_ioctl;
3076
3077         /* TSO parameters */
3078         ifp->if_hw_tsomax = IP_MAXPACKET;
3079         ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3080         ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3081
3082 #ifndef IGB_LEGACY_TX
3083         ifp->if_transmit = igb_mq_start;
3084         ifp->if_qflush = igb_qflush;
3085 #else
3086         ifp->if_start = igb_start;
3087         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3088         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3089         IFQ_SET_READY(&ifp->if_snd);
3090 #endif
3091
3092         ether_ifattach(ifp, adapter->hw.mac.addr);
3093
3094         ifp->if_capabilities = ifp->if_capenable = 0;
3095
3096         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3097 #if __FreeBSD_version >= 1000000
3098         ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
3099 #endif
3100         ifp->if_capabilities |= IFCAP_TSO;
3101         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3102         ifp->if_capenable = ifp->if_capabilities;
3103
3104         /* Don't enable LRO by default */
3105         ifp->if_capabilities |= IFCAP_LRO;
3106
3107 #ifdef DEVICE_POLLING
3108         ifp->if_capabilities |= IFCAP_POLLING;
3109 #endif
3110
3111         /*
3112          * Tell the upper layer(s) we
3113          * support full VLAN capability.
3114          */
3115         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3116         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3117                              |  IFCAP_VLAN_HWTSO
3118                              |  IFCAP_VLAN_MTU;
3119         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3120                           |  IFCAP_VLAN_HWTSO
3121                           |  IFCAP_VLAN_MTU;
3122
3123         /*
3124         ** Don't turn this on by default, if vlans are
3125         ** created on another pseudo device (eg. lagg)
3126         ** then vlan events are not passed thru, breaking
3127         ** operation, but with HW FILTER off it works. If
3128         ** using vlans directly on the igb driver you can
3129         ** enable this and get full hardware tag filtering.
3130         */
3131         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3132
3133         /*
3134          * Specify the media types supported by this adapter and register
3135          * callbacks to update media and link information
3136          */
3137         ifmedia_init(&adapter->media, IFM_IMASK,
3138             igb_media_change, igb_media_status);
3139         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3140             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3141                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3142                             0, NULL);
3143                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3144         } else {
3145                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3146                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3147                             0, NULL);
3148                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3149                             0, NULL);
3150                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3151                             0, NULL);
3152                 if (adapter->hw.phy.type != e1000_phy_ife) {
3153                         ifmedia_add(&adapter->media,
3154                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3155                         ifmedia_add(&adapter->media,
3156                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3157                 }
3158         }
3159         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3160         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3161         return (0);
3162 }
3163
3164
3165 /*
3166  * Manage DMA'able memory.
3167  */
3168 static void
3169 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3170 {
3171         if (error)
3172                 return;
3173         *(bus_addr_t *) arg = segs[0].ds_addr;
3174 }
3175
3176 static int
3177 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3178         struct igb_dma_alloc *dma, int mapflags)
3179 {
3180         int error;
3181
3182         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3183                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3184                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3185                                 BUS_SPACE_MAXADDR,      /* highaddr */
3186                                 NULL, NULL,             /* filter, filterarg */
3187                                 size,                   /* maxsize */
3188                                 1,                      /* nsegments */
3189                                 size,                   /* maxsegsize */
3190                                 0,                      /* flags */
3191                                 NULL,                   /* lockfunc */
3192                                 NULL,                   /* lockarg */
3193                                 &dma->dma_tag);
3194         if (error) {
3195                 device_printf(adapter->dev,
3196                     "%s: bus_dma_tag_create failed: %d\n",
3197                     __func__, error);
3198                 goto fail_0;
3199         }
3200
3201         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3202             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3203         if (error) {
3204                 device_printf(adapter->dev,
3205                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3206                     __func__, (uintmax_t)size, error);
3207                 goto fail_2;
3208         }
3209
3210         dma->dma_paddr = 0;
3211         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3212             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3213         if (error || dma->dma_paddr == 0) {
3214                 device_printf(adapter->dev,
3215                     "%s: bus_dmamap_load failed: %d\n",
3216                     __func__, error);
3217                 goto fail_3;
3218         }
3219
3220         return (0);
3221
3222 fail_3:
3223         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3224 fail_2:
3225         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3226         bus_dma_tag_destroy(dma->dma_tag);
3227 fail_0:
3228         dma->dma_tag = NULL;
3229
3230         return (error);
3231 }
3232
3233 static void
3234 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3235 {
3236         if (dma->dma_tag == NULL)
3237                 return;
3238         if (dma->dma_paddr != 0) {
3239                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3240                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3241                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3242                 dma->dma_paddr = 0;
3243         }
3244         if (dma->dma_vaddr != NULL) {
3245                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3246                 dma->dma_vaddr = NULL;
3247         }
3248         bus_dma_tag_destroy(dma->dma_tag);
3249         dma->dma_tag = NULL;
3250 }
3251
3252
3253 /*********************************************************************
3254  *
3255  *  Allocate memory for the transmit and receive rings, and then
3256  *  the descriptors associated with each, called only once at attach.
3257  *
3258  **********************************************************************/
3259 static int
3260 igb_allocate_queues(struct adapter *adapter)
3261 {
3262         device_t dev = adapter->dev;
3263         struct igb_queue        *que = NULL;
3264         struct tx_ring          *txr = NULL;
3265         struct rx_ring          *rxr = NULL;
3266         int rsize, tsize, error = E1000_SUCCESS;
3267         int txconf = 0, rxconf = 0;
3268
3269         /* First allocate the top level queue structs */
3270         if (!(adapter->queues =
3271             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3272             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3273                 device_printf(dev, "Unable to allocate queue memory\n");
3274                 error = ENOMEM;
3275                 goto fail;
3276         }
3277
3278         /* Next allocate the TX ring struct memory */
3279         if (!(adapter->tx_rings =
3280             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3281             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3282                 device_printf(dev, "Unable to allocate TX ring memory\n");
3283                 error = ENOMEM;
3284                 goto tx_fail;
3285         }
3286
3287         /* Now allocate the RX */
3288         if (!(adapter->rx_rings =
3289             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3290             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3291                 device_printf(dev, "Unable to allocate RX ring memory\n");
3292                 error = ENOMEM;
3293                 goto rx_fail;
3294         }
3295
3296         tsize = roundup2(adapter->num_tx_desc *
3297             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3298         /*
3299          * Now set up the TX queues, txconf is needed to handle the
3300          * possibility that things fail midcourse and we need to
3301          * undo memory gracefully
3302          */ 
3303         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3304                 /* Set up some basics */
3305                 txr = &adapter->tx_rings[i];
3306                 txr->adapter = adapter;
3307                 txr->me = i;
3308                 txr->num_desc = adapter->num_tx_desc;
3309
3310                 /* Initialize the TX lock */
3311                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3312                     device_get_nameunit(dev), txr->me);
3313                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3314
3315                 if (igb_dma_malloc(adapter, tsize,
3316                         &txr->txdma, BUS_DMA_NOWAIT)) {
3317                         device_printf(dev,
3318                             "Unable to allocate TX Descriptor memory\n");
3319                         error = ENOMEM;
3320                         goto err_tx_desc;
3321                 }
3322                 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3323                 bzero((void *)txr->tx_base, tsize);
3324
3325                 /* Now allocate transmit buffers for the ring */
3326                 if (igb_allocate_transmit_buffers(txr)) {
3327                         device_printf(dev,
3328                             "Critical Failure setting up transmit buffers\n");
3329                         error = ENOMEM;
3330                         goto err_tx_desc;
3331                 }
3332 #ifndef IGB_LEGACY_TX
3333                 /* Allocate a buf ring */
3334                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3335                     M_WAITOK, &txr->tx_mtx);
3336 #endif
3337         }
3338
3339         /*
3340          * Next the RX queues...
3341          */ 
3342         rsize = roundup2(adapter->num_rx_desc *
3343             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3344         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3345                 rxr = &adapter->rx_rings[i];
3346                 rxr->adapter = adapter;
3347                 rxr->me = i;
3348
3349                 /* Initialize the RX lock */
3350                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3351                     device_get_nameunit(dev), txr->me);
3352                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3353
3354                 if (igb_dma_malloc(adapter, rsize,
3355                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3356                         device_printf(dev,
3357                             "Unable to allocate RxDescriptor memory\n");
3358                         error = ENOMEM;
3359                         goto err_rx_desc;
3360                 }
3361                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3362                 bzero((void *)rxr->rx_base, rsize);
3363
3364                 /* Allocate receive buffers for the ring*/
3365                 if (igb_allocate_receive_buffers(rxr)) {
3366                         device_printf(dev,
3367                             "Critical Failure setting up receive buffers\n");
3368                         error = ENOMEM;
3369                         goto err_rx_desc;
3370                 }
3371         }
3372
3373         /*
3374         ** Finally set up the queue holding structs
3375         */
3376         for (int i = 0; i < adapter->num_queues; i++) {
3377                 que = &adapter->queues[i];
3378                 que->adapter = adapter;
3379                 que->txr = &adapter->tx_rings[i];
3380                 que->rxr = &adapter->rx_rings[i];
3381         }
3382
3383         return (0);
3384
3385 err_rx_desc:
3386         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3387                 igb_dma_free(adapter, &rxr->rxdma);
3388 err_tx_desc:
3389         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3390                 igb_dma_free(adapter, &txr->txdma);
3391         free(adapter->rx_rings, M_DEVBUF);
3392 rx_fail:
3393 #ifndef IGB_LEGACY_TX
3394         buf_ring_free(txr->br, M_DEVBUF);
3395 #endif
3396         free(adapter->tx_rings, M_DEVBUF);
3397 tx_fail:
3398         free(adapter->queues, M_DEVBUF);
3399 fail:
3400         return (error);
3401 }
3402
3403 /*********************************************************************
3404  *
3405  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3406  *  the information needed to transmit a packet on the wire. This is
3407  *  called only once at attach, setup is done every reset.
3408  *
3409  **********************************************************************/
3410 static int
3411 igb_allocate_transmit_buffers(struct tx_ring *txr)
3412 {
3413         struct adapter *adapter = txr->adapter;
3414         device_t dev = adapter->dev;
3415         struct igb_tx_buf *txbuf;
3416         int error, i;
3417
3418         /*
3419          * Setup DMA descriptor areas.
3420          */
3421         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3422                                1, 0,                    /* alignment, bounds */
3423                                BUS_SPACE_MAXADDR,       /* lowaddr */
3424                                BUS_SPACE_MAXADDR,       /* highaddr */
3425                                NULL, NULL,              /* filter, filterarg */
3426                                IGB_TSO_SIZE,            /* maxsize */
3427                                IGB_MAX_SCATTER,         /* nsegments */
3428                                PAGE_SIZE,               /* maxsegsize */
3429                                0,                       /* flags */
3430                                NULL,                    /* lockfunc */
3431                                NULL,                    /* lockfuncarg */
3432                                &txr->txtag))) {
3433                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3434                 goto fail;
3435         }
3436
3437         if (!(txr->tx_buffers =
3438             (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3439             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3440                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3441                 error = ENOMEM;
3442                 goto fail;
3443         }
3444
3445         /* Create the descriptor buffer dma maps */
3446         txbuf = txr->tx_buffers;
3447         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3448                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3449                 if (error != 0) {
3450                         device_printf(dev, "Unable to create TX DMA map\n");
3451                         goto fail;
3452                 }
3453         }
3454
3455         return 0;
3456 fail:
3457         /* We free all, it handles case where we are in the middle */
3458         igb_free_transmit_structures(adapter);
3459         return (error);
3460 }
3461
3462 /*********************************************************************
3463  *
3464  *  Initialize a transmit ring.
3465  *
3466  **********************************************************************/
3467 static void
3468 igb_setup_transmit_ring(struct tx_ring *txr)
3469 {
3470         struct adapter *adapter = txr->adapter;
3471         struct igb_tx_buf *txbuf;
3472         int i;
3473 #ifdef DEV_NETMAP
3474         struct netmap_adapter *na = NA(adapter->ifp);
3475         struct netmap_slot *slot;
3476 #endif /* DEV_NETMAP */
3477
3478         /* Clear the old descriptor contents */
3479         IGB_TX_LOCK(txr);
3480 #ifdef DEV_NETMAP
3481         slot = netmap_reset(na, NR_TX, txr->me, 0);
3482 #endif /* DEV_NETMAP */
3483         bzero((void *)txr->tx_base,
3484               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3485         /* Reset indices */
3486         txr->next_avail_desc = 0;
3487         txr->next_to_clean = 0;
3488
3489         /* Free any existing tx buffers. */
3490         txbuf = txr->tx_buffers;
3491         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3492                 if (txbuf->m_head != NULL) {
3493                         bus_dmamap_sync(txr->txtag, txbuf->map,
3494                             BUS_DMASYNC_POSTWRITE);
3495                         bus_dmamap_unload(txr->txtag, txbuf->map);
3496                         m_freem(txbuf->m_head);
3497                         txbuf->m_head = NULL;
3498                 }
3499 #ifdef DEV_NETMAP
3500                 if (slot) {
3501                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3502                         /* no need to set the address */
3503                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3504                 }
3505 #endif /* DEV_NETMAP */
3506                 /* clear the watch index */
3507                 txbuf->eop = NULL;
3508         }
3509
3510         /* Set number of descriptors available */
3511         txr->tx_avail = adapter->num_tx_desc;
3512
3513         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3514             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3515         IGB_TX_UNLOCK(txr);
3516 }
3517
3518 /*********************************************************************
3519  *
3520  *  Initialize all transmit rings.
3521  *
3522  **********************************************************************/
3523 static void
3524 igb_setup_transmit_structures(struct adapter *adapter)
3525 {
3526         struct tx_ring *txr = adapter->tx_rings;
3527
3528         for (int i = 0; i < adapter->num_queues; i++, txr++)
3529                 igb_setup_transmit_ring(txr);
3530
3531         return;
3532 }
3533
3534 /*********************************************************************
3535  *
3536  *  Enable transmit unit.
3537  *
3538  **********************************************************************/
3539 static void
3540 igb_initialize_transmit_units(struct adapter *adapter)
3541 {
3542         struct tx_ring  *txr = adapter->tx_rings;
3543         struct e1000_hw *hw = &adapter->hw;
3544         u32             tctl, txdctl;
3545
3546         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3547         tctl = txdctl = 0;
3548
3549         /* Setup the Tx Descriptor Rings */
3550         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3551                 u64 bus_addr = txr->txdma.dma_paddr;
3552
3553                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3554                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3555                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3556                     (uint32_t)(bus_addr >> 32));
3557                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3558                     (uint32_t)bus_addr);
3559
3560                 /* Setup the HW Tx Head and Tail descriptor pointers */
3561                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3562                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3563
3564                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3565                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3566                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3567
3568                 txr->queue_status = IGB_QUEUE_IDLE;
3569
3570                 txdctl |= IGB_TX_PTHRESH;
3571                 txdctl |= IGB_TX_HTHRESH << 8;
3572                 txdctl |= IGB_TX_WTHRESH << 16;
3573                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3574                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3575         }
3576
3577         if (adapter->vf_ifp)
3578                 return;
3579
3580         e1000_config_collision_dist(hw);
3581
3582         /* Program the Transmit Control Register */
3583         tctl = E1000_READ_REG(hw, E1000_TCTL);
3584         tctl &= ~E1000_TCTL_CT;
3585         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3586                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3587
3588         /* This write will effectively turn on the transmit unit. */
3589         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3590 }
3591
3592 /*********************************************************************
3593  *
3594  *  Free all transmit rings.
3595  *
3596  **********************************************************************/
3597 static void
3598 igb_free_transmit_structures(struct adapter *adapter)
3599 {
3600         struct tx_ring *txr = adapter->tx_rings;
3601
3602         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3603                 IGB_TX_LOCK(txr);
3604                 igb_free_transmit_buffers(txr);
3605                 igb_dma_free(adapter, &txr->txdma);
3606                 IGB_TX_UNLOCK(txr);
3607                 IGB_TX_LOCK_DESTROY(txr);
3608         }
3609         free(adapter->tx_rings, M_DEVBUF);
3610 }
3611
3612 /*********************************************************************
3613  *
3614  *  Free transmit ring related data structures.
3615  *
3616  **********************************************************************/
3617 static void
3618 igb_free_transmit_buffers(struct tx_ring *txr)
3619 {
3620         struct adapter *adapter = txr->adapter;
3621         struct igb_tx_buf *tx_buffer;
3622         int             i;
3623
3624         INIT_DEBUGOUT("free_transmit_ring: begin");
3625
3626         if (txr->tx_buffers == NULL)
3627                 return;
3628
3629         tx_buffer = txr->tx_buffers;
3630         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3631                 if (tx_buffer->m_head != NULL) {
3632                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3633                             BUS_DMASYNC_POSTWRITE);
3634                         bus_dmamap_unload(txr->txtag,
3635                             tx_buffer->map);
3636                         m_freem(tx_buffer->m_head);
3637                         tx_buffer->m_head = NULL;
3638                         if (tx_buffer->map != NULL) {
3639                                 bus_dmamap_destroy(txr->txtag,
3640                                     tx_buffer->map);
3641                                 tx_buffer->map = NULL;
3642                         }
3643                 } else if (tx_buffer->map != NULL) {
3644                         bus_dmamap_unload(txr->txtag,
3645                             tx_buffer->map);
3646                         bus_dmamap_destroy(txr->txtag,
3647                             tx_buffer->map);
3648                         tx_buffer->map = NULL;
3649                 }
3650         }
3651 #ifndef IGB_LEGACY_TX
3652         if (txr->br != NULL)
3653                 buf_ring_free(txr->br, M_DEVBUF);
3654 #endif
3655         if (txr->tx_buffers != NULL) {
3656                 free(txr->tx_buffers, M_DEVBUF);
3657                 txr->tx_buffers = NULL;
3658         }
3659         if (txr->txtag != NULL) {
3660                 bus_dma_tag_destroy(txr->txtag);
3661                 txr->txtag = NULL;
3662         }
3663         return;
3664 }
3665
3666 /**********************************************************************
3667  *
3668  *  Setup work for hardware segmentation offload (TSO) on
3669  *  adapters using advanced tx descriptors
3670  *
3671  **********************************************************************/
3672 static int
3673 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3674     u32 *cmd_type_len, u32 *olinfo_status)
3675 {
3676         struct adapter *adapter = txr->adapter;
3677         struct e1000_adv_tx_context_desc *TXD;
3678         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3679         u32 mss_l4len_idx = 0, paylen;
3680         u16 vtag = 0, eh_type;
3681         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3682         struct ether_vlan_header *eh;
3683 #ifdef INET6
3684         struct ip6_hdr *ip6;
3685 #endif
3686 #ifdef INET
3687         struct ip *ip;
3688 #endif
3689         struct tcphdr *th;
3690
3691
3692         /*
3693          * Determine where frame payload starts.
3694          * Jump over vlan headers if already present
3695          */
3696         eh = mtod(mp, struct ether_vlan_header *);
3697         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3698                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3699                 eh_type = eh->evl_proto;
3700         } else {
3701                 ehdrlen = ETHER_HDR_LEN;
3702                 eh_type = eh->evl_encap_proto;
3703         }
3704
3705         switch (ntohs(eh_type)) {
3706 #ifdef INET6
3707         case ETHERTYPE_IPV6:
3708                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3709                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3710                 if (ip6->ip6_nxt != IPPROTO_TCP)
3711                         return (ENXIO);
3712                 ip_hlen = sizeof(struct ip6_hdr);
3713                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3714                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3715                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3716                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3717                 break;
3718 #endif
3719 #ifdef INET
3720         case ETHERTYPE_IP:
3721                 ip = (struct ip *)(mp->m_data + ehdrlen);
3722                 if (ip->ip_p != IPPROTO_TCP)
3723                         return (ENXIO);
3724                 ip->ip_sum = 0;
3725                 ip_hlen = ip->ip_hl << 2;
3726                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3727                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3728                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3729                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3730                 /* Tell transmit desc to also do IPv4 checksum. */
3731                 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3732                 break;
3733 #endif
3734         default:
3735                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3736                     __func__, ntohs(eh_type));
3737                 break;
3738         }
3739
3740         ctxd = txr->next_avail_desc;
3741         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3742
3743         tcp_hlen = th->th_off << 2;
3744
3745         /* This is used in the transmit desc in encap */
3746         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3747
3748         /* VLAN MACLEN IPLEN */
3749         if (mp->m_flags & M_VLANTAG) {
3750                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3751                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3752         }
3753
3754         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3755         vlan_macip_lens |= ip_hlen;
3756         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3757
3758         /* ADV DTYPE TUCMD */
3759         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3760         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3761         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3762
3763         /* MSS L4LEN IDX */
3764         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3765         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3766         /* 82575 needs the queue index added */
3767         if (adapter->hw.mac.type == e1000_82575)
3768                 mss_l4len_idx |= txr->me << 4;
3769         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3770
3771         TXD->seqnum_seed = htole32(0);
3772
3773         if (++ctxd == txr->num_desc)
3774                 ctxd = 0;
3775
3776         txr->tx_avail--;
3777         txr->next_avail_desc = ctxd;
3778         *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3779         *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3780         *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3781         ++txr->tso_tx;
3782         return (0);
3783 }
3784
3785 /*********************************************************************
3786  *
3787  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3788  *
3789  **********************************************************************/
3790
3791 static int
3792 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3793     u32 *cmd_type_len, u32 *olinfo_status)
3794 {
3795         struct e1000_adv_tx_context_desc *TXD;
3796         struct adapter *adapter = txr->adapter;
3797         struct ether_vlan_header *eh;
3798         struct ip *ip;
3799         struct ip6_hdr *ip6;
3800         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3801         int     ehdrlen, ip_hlen = 0;
3802         u16     etype;
3803         u8      ipproto = 0;
3804         int     offload = TRUE;
3805         int     ctxd = txr->next_avail_desc;
3806         u16     vtag = 0;
3807
3808         /* First check if TSO is to be used */
3809         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3810                 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3811
3812         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3813                 offload = FALSE;
3814
3815         /* Indicate the whole packet as payload when not doing TSO */
3816         *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3817
3818         /* Now ready a context descriptor */
3819         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3820
3821         /*
3822         ** In advanced descriptors the vlan tag must 
3823         ** be placed into the context descriptor. Hence
3824         ** we need to make one even if not doing offloads.
3825         */
3826         if (mp->m_flags & M_VLANTAG) {
3827                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3828                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3829         } else if (offload == FALSE) /* ... no offload to do */
3830                 return (0);
3831
3832         /*
3833          * Determine where frame payload starts.
3834          * Jump over vlan headers if already present,
3835          * helpful for QinQ too.
3836          */
3837         eh = mtod(mp, struct ether_vlan_header *);
3838         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3839                 etype = ntohs(eh->evl_proto);
3840                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3841         } else {
3842                 etype = ntohs(eh->evl_encap_proto);
3843                 ehdrlen = ETHER_HDR_LEN;
3844         }
3845
3846         /* Set the ether header length */
3847         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3848
3849         switch (etype) {
3850                 case ETHERTYPE_IP:
3851                         ip = (struct ip *)(mp->m_data + ehdrlen);
3852                         ip_hlen = ip->ip_hl << 2;
3853                         ipproto = ip->ip_p;
3854                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3855                         break;
3856                 case ETHERTYPE_IPV6:
3857                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3858                         ip_hlen = sizeof(struct ip6_hdr);
3859                         /* XXX-BZ this will go badly in case of ext hdrs. */
3860                         ipproto = ip6->ip6_nxt;
3861                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3862                         break;
3863                 default:
3864                         offload = FALSE;
3865                         break;
3866         }
3867
3868         vlan_macip_lens |= ip_hlen;
3869         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3870
3871         switch (ipproto) {
3872                 case IPPROTO_TCP:
3873 #if __FreeBSD_version >= 1000000
3874                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
3875 #else
3876                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3877 #endif
3878                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3879                         break;
3880                 case IPPROTO_UDP:
3881 #if __FreeBSD_version >= 1000000
3882                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
3883 #else
3884                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3885 #endif
3886                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3887                         break;
3888
3889 #if __FreeBSD_version >= 800000
3890                 case IPPROTO_SCTP:
3891 #if __FreeBSD_version >= 1000000
3892                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
3893 #else
3894                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3895 #endif
3896                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3897                         break;
3898 #endif
3899                 default:
3900                         offload = FALSE;
3901                         break;
3902         }
3903
3904         if (offload) /* For the TX descriptor setup */
3905                 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3906
3907         /* 82575 needs the queue index added */
3908         if (adapter->hw.mac.type == e1000_82575)
3909                 mss_l4len_idx = txr->me << 4;
3910
3911         /* Now copy bits into descriptor */
3912         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3913         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3914         TXD->seqnum_seed = htole32(0);
3915         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3916
3917         /* We've consumed the first desc, adjust counters */
3918         if (++ctxd == txr->num_desc)
3919                 ctxd = 0;
3920         txr->next_avail_desc = ctxd;
3921         --txr->tx_avail;
3922
3923         return (0);
3924 }
3925
3926 /**********************************************************************
3927  *
3928  *  Examine each tx_buffer in the used queue. If the hardware is done
3929  *  processing the packet then free associated resources. The
3930  *  tx_buffer is put back on the free queue.
3931  *
3932  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3933  **********************************************************************/
3934 static bool
3935 igb_txeof(struct tx_ring *txr)
3936 {
3937         struct adapter          *adapter = txr->adapter;
3938         struct ifnet            *ifp = adapter->ifp;
3939         u32                     work, processed = 0;
3940         int                     limit = adapter->tx_process_limit;
3941         struct igb_tx_buf       *buf;
3942         union e1000_adv_tx_desc *txd;
3943
3944         mtx_assert(&txr->tx_mtx, MA_OWNED);
3945
3946 #ifdef DEV_NETMAP
3947         if (netmap_tx_irq(ifp, txr->me))
3948                 return (FALSE);
3949 #endif /* DEV_NETMAP */
3950
3951         if (txr->tx_avail == txr->num_desc) {
3952                 txr->queue_status = IGB_QUEUE_IDLE;
3953                 return FALSE;
3954         }
3955
3956         /* Get work starting point */
3957         work = txr->next_to_clean;
3958         buf = &txr->tx_buffers[work];
3959         txd = &txr->tx_base[work];
3960         work -= txr->num_desc; /* The distance to ring end */
3961         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3962             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3963         do {
3964                 union e1000_adv_tx_desc *eop = buf->eop;
3965                 if (eop == NULL) /* No work */
3966                         break;
3967
3968                 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
3969                         break;  /* I/O not complete */
3970
3971                 if (buf->m_head) {
3972                         txr->bytes +=
3973                             buf->m_head->m_pkthdr.len;
3974                         bus_dmamap_sync(txr->txtag,
3975                             buf->map,
3976                             BUS_DMASYNC_POSTWRITE);
3977                         bus_dmamap_unload(txr->txtag,
3978                             buf->map);
3979                         m_freem(buf->m_head);
3980                         buf->m_head = NULL;
3981                 }
3982                 buf->eop = NULL;
3983                 ++txr->tx_avail;
3984
3985                 /* We clean the range if multi segment */
3986                 while (txd != eop) {
3987                         ++txd;
3988                         ++buf;
3989                         ++work;
3990                         /* wrap the ring? */
3991                         if (__predict_false(!work)) {
3992                                 work -= txr->num_desc;
3993                                 buf = txr->tx_buffers;
3994                                 txd = txr->tx_base;
3995                         }
3996                         if (buf->m_head) {
3997                                 txr->bytes +=
3998                                     buf->m_head->m_pkthdr.len;
3999                                 bus_dmamap_sync(txr->txtag,
4000                                     buf->map,
4001                                     BUS_DMASYNC_POSTWRITE);
4002                                 bus_dmamap_unload(txr->txtag,
4003                                     buf->map);
4004                                 m_freem(buf->m_head);
4005                                 buf->m_head = NULL;
4006                         }
4007                         ++txr->tx_avail;
4008                         buf->eop = NULL;
4009
4010                 }
4011                 ++txr->packets;
4012                 ++processed;
4013                 ++ifp->if_opackets;
4014                 txr->watchdog_time = ticks;
4015
4016                 /* Try the next packet */
4017                 ++txd;
4018                 ++buf;
4019                 ++work;
4020                 /* reset with a wrap */
4021                 if (__predict_false(!work)) {
4022                         work -= txr->num_desc;
4023                         buf = txr->tx_buffers;
4024                         txd = txr->tx_base;
4025                 }
4026                 prefetch(txd);
4027         } while (__predict_true(--limit));
4028
4029         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4030             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4031
4032         work += txr->num_desc;
4033         txr->next_to_clean = work;
4034
4035         /*
4036         ** Watchdog calculation, we know there's
4037         ** work outstanding or the first return
4038         ** would have been taken, so none processed
4039         ** for too long indicates a hang.
4040         */
4041         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4042                 txr->queue_status |= IGB_QUEUE_HUNG;
4043
4044         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4045                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;       
4046
4047         if (txr->tx_avail == txr->num_desc) {
4048                 txr->queue_status = IGB_QUEUE_IDLE;
4049                 return (FALSE);
4050         }
4051
4052         return (TRUE);
4053 }
4054
4055 /*********************************************************************
4056  *
4057  *  Refresh mbuf buffers for RX descriptor rings
4058  *   - now keeps its own state so discards due to resource
4059  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4060  *     it just returns, keeping its placeholder, thus it can simply
4061  *     be recalled to try again.
4062  *
4063  **********************************************************************/
4064 static void
4065 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4066 {
4067         struct adapter          *adapter = rxr->adapter;
4068         bus_dma_segment_t       hseg[1];
4069         bus_dma_segment_t       pseg[1];
4070         struct igb_rx_buf       *rxbuf;
4071         struct mbuf             *mh, *mp;
4072         int                     i, j, nsegs, error;
4073         bool                    refreshed = FALSE;
4074
4075         i = j = rxr->next_to_refresh;
4076         /*
4077         ** Get one descriptor beyond
4078         ** our work mark to control
4079         ** the loop.
4080         */
4081         if (++j == adapter->num_rx_desc)
4082                 j = 0;
4083
4084         while (j != limit) {
4085                 rxbuf = &rxr->rx_buffers[i];
4086                 /* No hdr mbuf used with header split off */
4087                 if (rxr->hdr_split == FALSE)
4088                         goto no_split;
4089                 if (rxbuf->m_head == NULL) {
4090                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4091                         if (mh == NULL)
4092                                 goto update;
4093                 } else
4094                         mh = rxbuf->m_head;
4095
4096                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4097                 mh->m_len = MHLEN;
4098                 mh->m_flags |= M_PKTHDR;
4099                 /* Get the memory mapping */
4100                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4101                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4102                 if (error != 0) {
4103                         printf("Refresh mbufs: hdr dmamap load"
4104                             " failure - %d\n", error);
4105                         m_free(mh);
4106                         rxbuf->m_head = NULL;
4107                         goto update;
4108                 }
4109                 rxbuf->m_head = mh;
4110                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4111                     BUS_DMASYNC_PREREAD);
4112                 rxr->rx_base[i].read.hdr_addr =
4113                     htole64(hseg[0].ds_addr);
4114 no_split:
4115                 if (rxbuf->m_pack == NULL) {
4116                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4117                             M_PKTHDR, adapter->rx_mbuf_sz);
4118                         if (mp == NULL)
4119                                 goto update;
4120                 } else
4121                         mp = rxbuf->m_pack;
4122
4123                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4124                 /* Get the memory mapping */
4125                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4126                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4127                 if (error != 0) {
4128                         printf("Refresh mbufs: payload dmamap load"
4129                             " failure - %d\n", error);
4130                         m_free(mp);
4131                         rxbuf->m_pack = NULL;
4132                         goto update;
4133                 }
4134                 rxbuf->m_pack = mp;
4135                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4136                     BUS_DMASYNC_PREREAD);
4137                 rxr->rx_base[i].read.pkt_addr =
4138                     htole64(pseg[0].ds_addr);
4139                 refreshed = TRUE; /* I feel wefreshed :) */
4140
4141                 i = j; /* our next is precalculated */
4142                 rxr->next_to_refresh = i;
4143                 if (++j == adapter->num_rx_desc)
4144                         j = 0;
4145         }
4146 update:
4147         if (refreshed) /* update tail */
4148                 E1000_WRITE_REG(&adapter->hw,
4149                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4150         return;
4151 }
4152
4153
4154 /*********************************************************************
4155  *
4156  *  Allocate memory for rx_buffer structures. Since we use one
4157  *  rx_buffer per received packet, the maximum number of rx_buffer's
4158  *  that we'll need is equal to the number of receive descriptors
4159  *  that we've allocated.
4160  *
4161  **********************************************************************/
4162 static int
4163 igb_allocate_receive_buffers(struct rx_ring *rxr)
4164 {
4165         struct  adapter         *adapter = rxr->adapter;
4166         device_t                dev = adapter->dev;
4167         struct igb_rx_buf       *rxbuf;
4168         int                     i, bsize, error;
4169
4170         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4171         if (!(rxr->rx_buffers =
4172             (struct igb_rx_buf *) malloc(bsize,
4173             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4174                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4175                 error = ENOMEM;
4176                 goto fail;
4177         }
4178
4179         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4180                                    1, 0,                /* alignment, bounds */
4181                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4182                                    BUS_SPACE_MAXADDR,   /* highaddr */
4183                                    NULL, NULL,          /* filter, filterarg */
4184                                    MSIZE,               /* maxsize */
4185                                    1,                   /* nsegments */
4186                                    MSIZE,               /* maxsegsize */
4187                                    0,                   /* flags */
4188                                    NULL,                /* lockfunc */
4189                                    NULL,                /* lockfuncarg */
4190                                    &rxr->htag))) {
4191                 device_printf(dev, "Unable to create RX DMA tag\n");
4192                 goto fail;
4193         }
4194
4195         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4196                                    1, 0,                /* alignment, bounds */
4197                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4198                                    BUS_SPACE_MAXADDR,   /* highaddr */
4199                                    NULL, NULL,          /* filter, filterarg */
4200                                    MJUM9BYTES,          /* maxsize */
4201                                    1,                   /* nsegments */
4202                                    MJUM9BYTES,          /* maxsegsize */
4203                                    0,                   /* flags */
4204                                    NULL,                /* lockfunc */
4205                                    NULL,                /* lockfuncarg */
4206                                    &rxr->ptag))) {
4207                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4208                 goto fail;
4209         }
4210
4211         for (i = 0; i < adapter->num_rx_desc; i++) {
4212                 rxbuf = &rxr->rx_buffers[i];
4213                 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4214                 if (error) {
4215                         device_printf(dev,
4216                             "Unable to create RX head DMA maps\n");
4217                         goto fail;
4218                 }
4219                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4220                 if (error) {
4221                         device_printf(dev,
4222                             "Unable to create RX packet DMA maps\n");
4223                         goto fail;
4224                 }
4225         }
4226
4227         return (0);
4228
4229 fail:
4230         /* Frees all, but can handle partial completion */
4231         igb_free_receive_structures(adapter);
4232         return (error);
4233 }
4234
4235
4236 static void
4237 igb_free_receive_ring(struct rx_ring *rxr)
4238 {
4239         struct  adapter         *adapter = rxr->adapter;
4240         struct igb_rx_buf       *rxbuf;
4241
4242
4243         for (int i = 0; i < adapter->num_rx_desc; i++) {
4244                 rxbuf = &rxr->rx_buffers[i];
4245                 if (rxbuf->m_head != NULL) {
4246                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4247                             BUS_DMASYNC_POSTREAD);
4248                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4249                         rxbuf->m_head->m_flags |= M_PKTHDR;
4250                         m_freem(rxbuf->m_head);
4251                 }
4252                 if (rxbuf->m_pack != NULL) {
4253                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4254                             BUS_DMASYNC_POSTREAD);
4255                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4256                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4257                         m_freem(rxbuf->m_pack);
4258                 }
4259                 rxbuf->m_head = NULL;
4260                 rxbuf->m_pack = NULL;
4261         }
4262 }
4263
4264
4265 /*********************************************************************
4266  *
4267  *  Initialize a receive ring and its buffers.
4268  *
4269  **********************************************************************/
4270 static int
4271 igb_setup_receive_ring(struct rx_ring *rxr)
4272 {
4273         struct  adapter         *adapter;
4274         struct  ifnet           *ifp;
4275         device_t                dev;
4276         struct igb_rx_buf       *rxbuf;
4277         bus_dma_segment_t       pseg[1], hseg[1];
4278         struct lro_ctrl         *lro = &rxr->lro;
4279         int                     rsize, nsegs, error = 0;
4280 #ifdef DEV_NETMAP
4281         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4282         struct netmap_slot *slot;
4283 #endif /* DEV_NETMAP */
4284
4285         adapter = rxr->adapter;
4286         dev = adapter->dev;
4287         ifp = adapter->ifp;
4288
4289         /* Clear the ring contents */
4290         IGB_RX_LOCK(rxr);
4291 #ifdef DEV_NETMAP
4292         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4293 #endif /* DEV_NETMAP */
4294         rsize = roundup2(adapter->num_rx_desc *
4295             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4296         bzero((void *)rxr->rx_base, rsize);
4297
4298         /*
4299         ** Free current RX buffer structures and their mbufs
4300         */
4301         igb_free_receive_ring(rxr);
4302
4303         /* Configure for header split? */
4304         if (igb_header_split)
4305                 rxr->hdr_split = TRUE;
4306
4307         /* Now replenish the ring mbufs */
4308         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4309                 struct mbuf     *mh, *mp;
4310
4311                 rxbuf = &rxr->rx_buffers[j];
4312 #ifdef DEV_NETMAP
4313                 if (slot) {
4314                         /* slot sj is mapped to the i-th NIC-ring entry */
4315                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4316                         uint64_t paddr;
4317                         void *addr;
4318
4319                         addr = PNMB(na, slot + sj, &paddr);
4320                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4321                         /* Update descriptor */
4322                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4323                         continue;
4324                 }
4325 #endif /* DEV_NETMAP */
4326                 if (rxr->hdr_split == FALSE)
4327                         goto skip_head;
4328
4329                 /* First the header */
4330                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4331                 if (rxbuf->m_head == NULL) {
4332                         error = ENOBUFS;
4333                         goto fail;
4334                 }
4335                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4336                 mh = rxbuf->m_head;
4337                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4338                 mh->m_flags |= M_PKTHDR;
4339                 /* Get the memory mapping */
4340                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4341                     rxbuf->hmap, rxbuf->m_head, hseg,
4342                     &nsegs, BUS_DMA_NOWAIT);
4343                 if (error != 0) /* Nothing elegant to do here */
4344                         goto fail;
4345                 bus_dmamap_sync(rxr->htag,
4346                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4347                 /* Update descriptor */
4348                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4349
4350 skip_head:
4351                 /* Now the payload cluster */
4352                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4353                     M_PKTHDR, adapter->rx_mbuf_sz);
4354                 if (rxbuf->m_pack == NULL) {
4355                         error = ENOBUFS;
4356                         goto fail;
4357                 }
4358                 mp = rxbuf->m_pack;
4359                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4360                 /* Get the memory mapping */
4361                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4362                     rxbuf->pmap, mp, pseg,
4363                     &nsegs, BUS_DMA_NOWAIT);
4364                 if (error != 0)
4365                         goto fail;
4366                 bus_dmamap_sync(rxr->ptag,
4367                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4368                 /* Update descriptor */
4369                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4370         }
4371
4372         /* Setup our descriptor indices */
4373         rxr->next_to_check = 0;
4374         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4375         rxr->lro_enabled = FALSE;
4376         rxr->rx_split_packets = 0;
4377         rxr->rx_bytes = 0;
4378
4379         rxr->fmp = NULL;
4380         rxr->lmp = NULL;
4381
4382         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4383             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4384
4385         /*
4386         ** Now set up the LRO interface, we
4387         ** also only do head split when LRO
4388         ** is enabled, since so often they
4389         ** are undesireable in similar setups.
4390         */
4391         if (ifp->if_capenable & IFCAP_LRO) {
4392                 error = tcp_lro_init(lro);
4393                 if (error) {
4394                         device_printf(dev, "LRO Initialization failed!\n");
4395                         goto fail;
4396                 }
4397                 INIT_DEBUGOUT("RX LRO Initialized\n");
4398                 rxr->lro_enabled = TRUE;
4399                 lro->ifp = adapter->ifp;
4400         }
4401
4402         IGB_RX_UNLOCK(rxr);
4403         return (0);
4404
4405 fail:
4406         igb_free_receive_ring(rxr);
4407         IGB_RX_UNLOCK(rxr);
4408         return (error);
4409 }
4410
4411
4412 /*********************************************************************
4413  *
4414  *  Initialize all receive rings.
4415  *
4416  **********************************************************************/
4417 static int
4418 igb_setup_receive_structures(struct adapter *adapter)
4419 {
4420         struct rx_ring *rxr = adapter->rx_rings;
4421         int i;
4422
4423         for (i = 0; i < adapter->num_queues; i++, rxr++)
4424                 if (igb_setup_receive_ring(rxr))
4425                         goto fail;
4426
4427         return (0);
4428 fail:
4429         /*
4430          * Free RX buffers allocated so far, we will only handle
4431          * the rings that completed, the failing case will have
4432          * cleaned up for itself. 'i' is the endpoint.
4433          */
4434         for (int j = 0; j < i; ++j) {
4435                 rxr = &adapter->rx_rings[j];
4436                 IGB_RX_LOCK(rxr);
4437                 igb_free_receive_ring(rxr);
4438                 IGB_RX_UNLOCK(rxr);
4439         }
4440
4441         return (ENOBUFS);
4442 }
4443
4444 /*********************************************************************
4445  *
4446  *  Enable receive unit.
4447  *
4448  **********************************************************************/
4449 static void
4450 igb_initialize_receive_units(struct adapter *adapter)
4451 {
4452         struct rx_ring  *rxr = adapter->rx_rings;
4453         struct ifnet    *ifp = adapter->ifp;
4454         struct e1000_hw *hw = &adapter->hw;
4455         u32             rctl, rxcsum, psize, srrctl = 0;
4456
4457         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4458
4459         /*
4460          * Make sure receives are disabled while setting
4461          * up the descriptor ring
4462          */
4463         rctl = E1000_READ_REG(hw, E1000_RCTL);
4464         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4465
4466         /*
4467         ** Set up for header split
4468         */
4469         if (igb_header_split) {
4470                 /* Use a standard mbuf for the header */
4471                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4472                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4473         } else
4474                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4475
4476         /*
4477         ** Set up for jumbo frames
4478         */
4479         if (ifp->if_mtu > ETHERMTU) {
4480                 rctl |= E1000_RCTL_LPE;
4481                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4482                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4483                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4484                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4485                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4486                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4487                 }
4488                 /* Set maximum packet len */
4489                 psize = adapter->max_frame_size;
4490                 /* are we on a vlan? */
4491                 if (adapter->ifp->if_vlantrunk != NULL)
4492                         psize += VLAN_TAG_SIZE;
4493                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4494         } else {
4495                 rctl &= ~E1000_RCTL_LPE;
4496                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4497                 rctl |= E1000_RCTL_SZ_2048;
4498         }
4499
4500         /*
4501          * If TX flow control is disabled and there's >1 queue defined,
4502          * enable DROP.
4503          *
4504          * This drops frames rather than hanging the RX MAC for all queues.
4505          */
4506         if ((adapter->num_queues > 1) &&
4507             (adapter->fc == e1000_fc_none ||
4508              adapter->fc == e1000_fc_rx_pause)) {
4509                 srrctl |= E1000_SRRCTL_DROP_EN;
4510         }
4511
4512         /* Setup the Base and Length of the Rx Descriptor Rings */
4513         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4514                 u64 bus_addr = rxr->rxdma.dma_paddr;
4515                 u32 rxdctl;
4516
4517                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4518                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4519                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4520                     (uint32_t)(bus_addr >> 32));
4521                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4522                     (uint32_t)bus_addr);
4523                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4524                 /* Enable this Queue */
4525                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4526                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4527                 rxdctl &= 0xFFF00000;
4528                 rxdctl |= IGB_RX_PTHRESH;
4529                 rxdctl |= IGB_RX_HTHRESH << 8;
4530                 rxdctl |= IGB_RX_WTHRESH << 16;
4531                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4532         }
4533
4534         /*
4535         ** Setup for RX MultiQueue
4536         */
4537         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4538         if (adapter->num_queues >1) {
4539                 u32 random[10], mrqc, shift = 0;
4540                 union igb_reta {
4541                         u32 dword;
4542                         u8  bytes[4];
4543                 } reta;
4544
4545                 arc4rand(&random, sizeof(random), 0);
4546                 if (adapter->hw.mac.type == e1000_82575)
4547                         shift = 6;
4548                 /* Warning FM follows */
4549                 for (int i = 0; i < 128; i++) {
4550                         reta.bytes[i & 3] =
4551                             (i % adapter->num_queues) << shift;
4552                         if ((i & 3) == 3)
4553                                 E1000_WRITE_REG(hw,
4554                                     E1000_RETA(i >> 2), reta.dword);
4555                 }
4556                 /* Now fill in hash table */
4557                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4558                 for (int i = 0; i < 10; i++)
4559                         E1000_WRITE_REG_ARRAY(hw,
4560                             E1000_RSSRK(0), i, random[i]);
4561
4562                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4563                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4564                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4565                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4566                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4567                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4568                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4569                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4570
4571                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4572
4573                 /*
4574                 ** NOTE: Receive Full-Packet Checksum Offload 
4575                 ** is mutually exclusive with Multiqueue. However
4576                 ** this is not the same as TCP/IP checksums which
4577                 ** still work.
4578                 */
4579                 rxcsum |= E1000_RXCSUM_PCSD;
4580 #if __FreeBSD_version >= 800000
4581                 /* For SCTP Offload */
4582                 if ((hw->mac.type != e1000_82575) &&
4583                     (ifp->if_capenable & IFCAP_RXCSUM))
4584                         rxcsum |= E1000_RXCSUM_CRCOFL;
4585 #endif
4586         } else {
4587                 /* Non RSS setup */
4588                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4589                         rxcsum |= E1000_RXCSUM_IPPCSE;
4590 #if __FreeBSD_version >= 800000
4591                         if (adapter->hw.mac.type != e1000_82575)
4592                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4593 #endif
4594                 } else
4595                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4596         }
4597         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4598
4599         /* Setup the Receive Control Register */
4600         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4601         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4602                    E1000_RCTL_RDMTS_HALF |
4603                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4604         /* Strip CRC bytes. */
4605         rctl |= E1000_RCTL_SECRC;
4606         /* Make sure VLAN Filters are off */
4607         rctl &= ~E1000_RCTL_VFE;
4608         /* Don't store bad packets */
4609         rctl &= ~E1000_RCTL_SBP;
4610
4611         /* Enable Receives */
4612         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4613
4614         /*
4615          * Setup the HW Rx Head and Tail Descriptor Pointers
4616          *   - needs to be after enable
4617          */
4618         for (int i = 0; i < adapter->num_queues; i++) {
4619                 rxr = &adapter->rx_rings[i];
4620                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4621 #ifdef DEV_NETMAP
4622                 /*
4623                  * an init() while a netmap client is active must
4624                  * preserve the rx buffers passed to userspace.
4625                  * In this driver it means we adjust RDT to
4626                  * something different from next_to_refresh
4627                  * (which is not used in netmap mode).
4628                  */
4629                 if (ifp->if_capenable & IFCAP_NETMAP) {
4630                         struct netmap_adapter *na = NA(adapter->ifp);
4631                         struct netmap_kring *kring = &na->rx_rings[i];
4632                         int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4633
4634                         if (t >= adapter->num_rx_desc)
4635                                 t -= adapter->num_rx_desc;
4636                         else if (t < 0)
4637                                 t += adapter->num_rx_desc;
4638                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4639                 } else
4640 #endif /* DEV_NETMAP */
4641                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4642         }
4643         return;
4644 }
4645
4646 /*********************************************************************
4647  *
4648  *  Free receive rings.
4649  *
4650  **********************************************************************/
4651 static void
4652 igb_free_receive_structures(struct adapter *adapter)
4653 {
4654         struct rx_ring *rxr = adapter->rx_rings;
4655
4656         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4657                 struct lro_ctrl *lro = &rxr->lro;
4658                 igb_free_receive_buffers(rxr);
4659                 tcp_lro_free(lro);
4660                 igb_dma_free(adapter, &rxr->rxdma);
4661         }
4662
4663         free(adapter->rx_rings, M_DEVBUF);
4664 }
4665
4666 /*********************************************************************
4667  *
4668  *  Free receive ring data structures.
4669  *
4670  **********************************************************************/
4671 static void
4672 igb_free_receive_buffers(struct rx_ring *rxr)
4673 {
4674         struct adapter          *adapter = rxr->adapter;
4675         struct igb_rx_buf       *rxbuf;
4676         int i;
4677
4678         INIT_DEBUGOUT("free_receive_structures: begin");
4679
4680         /* Cleanup any existing buffers */
4681         if (rxr->rx_buffers != NULL) {
4682                 for (i = 0; i < adapter->num_rx_desc; i++) {
4683                         rxbuf = &rxr->rx_buffers[i];
4684                         if (rxbuf->m_head != NULL) {
4685                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4686                                     BUS_DMASYNC_POSTREAD);
4687                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4688                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4689                                 m_freem(rxbuf->m_head);
4690                         }
4691                         if (rxbuf->m_pack != NULL) {
4692                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4693                                     BUS_DMASYNC_POSTREAD);
4694                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4695                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4696                                 m_freem(rxbuf->m_pack);
4697                         }
4698                         rxbuf->m_head = NULL;
4699                         rxbuf->m_pack = NULL;
4700                         if (rxbuf->hmap != NULL) {
4701                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4702                                 rxbuf->hmap = NULL;
4703                         }
4704                         if (rxbuf->pmap != NULL) {
4705                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4706                                 rxbuf->pmap = NULL;
4707                         }
4708                 }
4709                 if (rxr->rx_buffers != NULL) {
4710                         free(rxr->rx_buffers, M_DEVBUF);
4711                         rxr->rx_buffers = NULL;
4712                 }
4713         }
4714
4715         if (rxr->htag != NULL) {
4716                 bus_dma_tag_destroy(rxr->htag);
4717                 rxr->htag = NULL;
4718         }
4719         if (rxr->ptag != NULL) {
4720                 bus_dma_tag_destroy(rxr->ptag);
4721                 rxr->ptag = NULL;
4722         }
4723 }
4724
4725 static __inline void
4726 igb_rx_discard(struct rx_ring *rxr, int i)
4727 {
4728         struct igb_rx_buf       *rbuf;
4729
4730         rbuf = &rxr->rx_buffers[i];
4731
4732         /* Partially received? Free the chain */
4733         if (rxr->fmp != NULL) {
4734                 rxr->fmp->m_flags |= M_PKTHDR;
4735                 m_freem(rxr->fmp);
4736                 rxr->fmp = NULL;
4737                 rxr->lmp = NULL;
4738         }
4739
4740         /*
4741         ** With advanced descriptors the writeback
4742         ** clobbers the buffer addrs, so its easier
4743         ** to just free the existing mbufs and take
4744         ** the normal refresh path to get new buffers
4745         ** and mapping.
4746         */
4747         if (rbuf->m_head) {
4748                 m_free(rbuf->m_head);
4749                 rbuf->m_head = NULL;
4750                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4751         }
4752
4753         if (rbuf->m_pack) {
4754                 m_free(rbuf->m_pack);
4755                 rbuf->m_pack = NULL;
4756                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4757         }
4758
4759         return;
4760 }
4761
4762 static __inline void
4763 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4764 {
4765
4766         /*
4767          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4768          * should be computed by hardware. Also it should not have VLAN tag in
4769          * ethernet header.
4770          */
4771         if (rxr->lro_enabled &&
4772             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4773             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4774             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4775             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4776             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4777             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4778                 /*
4779                  * Send to the stack if:
4780                  **  - LRO not enabled, or
4781                  **  - no LRO resources, or
4782                  **  - lro enqueue fails
4783                  */
4784                 if (rxr->lro.lro_cnt != 0)
4785                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4786                                 return;
4787         }
4788         IGB_RX_UNLOCK(rxr);
4789         (*ifp->if_input)(ifp, m);
4790         IGB_RX_LOCK(rxr);
4791 }
4792
4793 /*********************************************************************
4794  *
4795  *  This routine executes in interrupt context. It replenishes
4796  *  the mbufs in the descriptor and sends data which has been
4797  *  dma'ed into host memory to upper layer.
4798  *
4799  *  We loop at most count times if count is > 0, or until done if
4800  *  count < 0.
4801  *
4802  *  Return TRUE if more to clean, FALSE otherwise
4803  *********************************************************************/
4804 static bool
4805 igb_rxeof(struct igb_queue *que, int count, int *done)
4806 {
4807         struct adapter          *adapter = que->adapter;
4808         struct rx_ring          *rxr = que->rxr;
4809         struct ifnet            *ifp = adapter->ifp;
4810         struct lro_ctrl         *lro = &rxr->lro;
4811         struct lro_entry        *queued;
4812         int                     i, processed = 0, rxdone = 0;
4813         u32                     ptype, staterr = 0;
4814         union e1000_adv_rx_desc *cur;
4815
4816         IGB_RX_LOCK(rxr);
4817         /* Sync the ring. */
4818         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4819             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4820
4821 #ifdef DEV_NETMAP
4822         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4823                 IGB_RX_UNLOCK(rxr);
4824                 return (FALSE);
4825         }
4826 #endif /* DEV_NETMAP */
4827
4828         /* Main clean loop */
4829         for (i = rxr->next_to_check; count != 0;) {
4830                 struct mbuf             *sendmp, *mh, *mp;
4831                 struct igb_rx_buf       *rxbuf;
4832                 u16                     hlen, plen, hdr, vtag;
4833                 bool                    eop = FALSE;
4834  
4835                 cur = &rxr->rx_base[i];
4836                 staterr = le32toh(cur->wb.upper.status_error);
4837                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4838                         break;
4839                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4840                         break;
4841                 count--;
4842                 sendmp = mh = mp = NULL;
4843                 cur->wb.upper.status_error = 0;
4844                 rxbuf = &rxr->rx_buffers[i];
4845                 plen = le16toh(cur->wb.upper.length);
4846                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4847                 if (((adapter->hw.mac.type == e1000_i350) ||
4848                     (adapter->hw.mac.type == e1000_i354)) &&
4849                     (staterr & E1000_RXDEXT_STATERR_LB))
4850                         vtag = be16toh(cur->wb.upper.vlan);
4851                 else
4852                         vtag = le16toh(cur->wb.upper.vlan);
4853                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4854                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4855
4856                 /*
4857                  * Free the frame (all segments) if we're at EOP and
4858                  * it's an error.
4859                  *
4860                  * The datasheet states that EOP + status is only valid for
4861                  * the final segment in a multi-segment frame.
4862                  */
4863                 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4864                         adapter->dropped_pkts++;
4865                         ++rxr->rx_discarded;
4866                         igb_rx_discard(rxr, i);
4867                         goto next_desc;
4868                 }
4869
4870                 /*
4871                 ** The way the hardware is configured to
4872                 ** split, it will ONLY use the header buffer
4873                 ** when header split is enabled, otherwise we
4874                 ** get normal behavior, ie, both header and
4875                 ** payload are DMA'd into the payload buffer.
4876                 **
4877                 ** The fmp test is to catch the case where a
4878                 ** packet spans multiple descriptors, in that
4879                 ** case only the first header is valid.
4880                 */
4881                 if (rxr->hdr_split && rxr->fmp == NULL) {
4882                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4883                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4884                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4885                         if (hlen > IGB_HDR_BUF)
4886                                 hlen = IGB_HDR_BUF;
4887                         mh = rxr->rx_buffers[i].m_head;
4888                         mh->m_len = hlen;
4889                         /* clear buf pointer for refresh */
4890                         rxbuf->m_head = NULL;
4891                         /*
4892                         ** Get the payload length, this
4893                         ** could be zero if its a small
4894                         ** packet.
4895                         */
4896                         if (plen > 0) {
4897                                 mp = rxr->rx_buffers[i].m_pack;
4898                                 mp->m_len = plen;
4899                                 mh->m_next = mp;
4900                                 /* clear buf pointer */
4901                                 rxbuf->m_pack = NULL;
4902                                 rxr->rx_split_packets++;
4903                         }
4904                 } else {
4905                         /*
4906                         ** Either no header split, or a
4907                         ** secondary piece of a fragmented
4908                         ** split packet.
4909                         */
4910                         mh = rxr->rx_buffers[i].m_pack;
4911                         mh->m_len = plen;
4912                         /* clear buf info for refresh */
4913                         rxbuf->m_pack = NULL;
4914                 }
4915                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4916
4917                 ++processed; /* So we know when to refresh */
4918
4919                 /* Initial frame - setup */
4920                 if (rxr->fmp == NULL) {
4921                         mh->m_pkthdr.len = mh->m_len;
4922                         /* Save the head of the chain */
4923                         rxr->fmp = mh;
4924                         rxr->lmp = mh;
4925                         if (mp != NULL) {
4926                                 /* Add payload if split */
4927                                 mh->m_pkthdr.len += mp->m_len;
4928                                 rxr->lmp = mh->m_next;
4929                         }
4930                 } else {
4931                         /* Chain mbuf's together */
4932                         rxr->lmp->m_next = mh;
4933                         rxr->lmp = rxr->lmp->m_next;
4934                         rxr->fmp->m_pkthdr.len += mh->m_len;
4935                 }
4936
4937                 if (eop) {
4938                         rxr->fmp->m_pkthdr.rcvif = ifp;
4939                         ifp->if_ipackets++;
4940                         rxr->rx_packets++;
4941                         /* capture data for AIM */
4942                         rxr->packets++;
4943                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4944                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4945
4946                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4947                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4948
4949                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4950                             (staterr & E1000_RXD_STAT_VP) != 0) {
4951                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4952                                 rxr->fmp->m_flags |= M_VLANTAG;
4953                         }
4954
4955                         /*
4956                          * In case of multiqueue, we have RXCSUM.PCSD bit set
4957                          * and never cleared. This means we have RSS hash
4958                          * available to be used.
4959                          */
4960                         if (adapter->num_queues > 1) {
4961                                 rxr->fmp->m_pkthdr.flowid = 
4962                                     le32toh(cur->wb.lower.hi_dword.rss);
4963                                 /*
4964                                  * Full RSS support is not avilable in
4965                                  * FreeBSD 10 so setting the hash type to
4966                                  * OPAQUE.
4967                                  */
4968                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
4969                         } else {
4970 #ifndef IGB_LEGACY_TX
4971                                 rxr->fmp->m_pkthdr.flowid = que->msix;
4972                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
4973 #endif
4974                         }
4975                         sendmp = rxr->fmp;
4976                         /* Make sure to set M_PKTHDR. */
4977                         sendmp->m_flags |= M_PKTHDR;
4978                         rxr->fmp = NULL;
4979                         rxr->lmp = NULL;
4980                 }
4981
4982 next_desc:
4983                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4984                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4985
4986                 /* Advance our pointers to the next descriptor. */
4987                 if (++i == adapter->num_rx_desc)
4988                         i = 0;
4989                 /*
4990                 ** Send to the stack or LRO
4991                 */
4992                 if (sendmp != NULL) {
4993                         rxr->next_to_check = i;
4994                         igb_rx_input(rxr, ifp, sendmp, ptype);
4995                         i = rxr->next_to_check;
4996                         rxdone++;
4997                 }
4998
4999                 /* Every 8 descriptors we go to refresh mbufs */
5000                 if (processed == 8) {
5001                         igb_refresh_mbufs(rxr, i);
5002                         processed = 0;
5003                 }
5004         }
5005
5006         /* Catch any remainders */
5007         if (igb_rx_unrefreshed(rxr))
5008                 igb_refresh_mbufs(rxr, i);
5009
5010         rxr->next_to_check = i;
5011
5012         /*
5013          * Flush any outstanding LRO work
5014          */
5015         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5016                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5017                 tcp_lro_flush(lro, queued);
5018         }
5019
5020         if (done != NULL)
5021                 *done += rxdone;
5022
5023         IGB_RX_UNLOCK(rxr);
5024         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5025 }
5026
5027 /*********************************************************************
5028  *
5029  *  Verify that the hardware indicated that the checksum is valid.
5030  *  Inform the stack about the status of checksum so that stack
5031  *  doesn't spend time verifying the checksum.
5032  *
5033  *********************************************************************/
5034 static void
5035 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5036 {
5037         u16 status = (u16)staterr;
5038         u8  errors = (u8) (staterr >> 24);
5039         int sctp;
5040
5041         /* Ignore Checksum bit is set */
5042         if (status & E1000_RXD_STAT_IXSM) {
5043                 mp->m_pkthdr.csum_flags = 0;
5044                 return;
5045         }
5046
5047         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5048             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5049                 sctp = 1;
5050         else
5051                 sctp = 0;
5052         if (status & E1000_RXD_STAT_IPCS) {
5053                 /* Did it pass? */
5054                 if (!(errors & E1000_RXD_ERR_IPE)) {
5055                         /* IP Checksum Good */
5056                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5057                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5058                 } else
5059                         mp->m_pkthdr.csum_flags = 0;
5060         }
5061
5062         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5063                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5064 #if __FreeBSD_version >= 800000
5065                 if (sctp) /* reassign */
5066                         type = CSUM_SCTP_VALID;
5067 #endif
5068                 /* Did it pass? */
5069                 if (!(errors & E1000_RXD_ERR_TCPE)) {
5070                         mp->m_pkthdr.csum_flags |= type;
5071                         if (sctp == 0)
5072                                 mp->m_pkthdr.csum_data = htons(0xffff);
5073                 }
5074         }
5075         return;
5076 }
5077
5078 /*
5079  * This routine is run via an vlan
5080  * config EVENT
5081  */
5082 static void
5083 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5084 {
5085         struct adapter  *adapter = ifp->if_softc;
5086         u32             index, bit;
5087
5088         if (ifp->if_softc !=  arg)   /* Not our event */
5089                 return;
5090
5091         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5092                 return;
5093
5094         IGB_CORE_LOCK(adapter);
5095         index = (vtag >> 5) & 0x7F;
5096         bit = vtag & 0x1F;
5097         adapter->shadow_vfta[index] |= (1 << bit);
5098         ++adapter->num_vlans;
5099         /* Change hw filter setting */
5100         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5101                 igb_setup_vlan_hw_support(adapter);
5102         IGB_CORE_UNLOCK(adapter);
5103 }
5104
5105 /*
5106  * This routine is run via an vlan
5107  * unconfig EVENT
5108  */
5109 static void
5110 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5111 {
5112         struct adapter  *adapter = ifp->if_softc;
5113         u32             index, bit;
5114
5115         if (ifp->if_softc !=  arg)
5116                 return;
5117
5118         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5119                 return;
5120
5121         IGB_CORE_LOCK(adapter);
5122         index = (vtag >> 5) & 0x7F;
5123         bit = vtag & 0x1F;
5124         adapter->shadow_vfta[index] &= ~(1 << bit);
5125         --adapter->num_vlans;
5126         /* Change hw filter setting */
5127         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5128                 igb_setup_vlan_hw_support(adapter);
5129         IGB_CORE_UNLOCK(adapter);
5130 }
5131
5132 static void
5133 igb_setup_vlan_hw_support(struct adapter *adapter)
5134 {
5135         struct e1000_hw *hw = &adapter->hw;
5136         struct ifnet    *ifp = adapter->ifp;
5137         u32             reg;
5138
5139         if (adapter->vf_ifp) {
5140                 e1000_rlpml_set_vf(hw,
5141                     adapter->max_frame_size + VLAN_TAG_SIZE);
5142                 return;
5143         }
5144
5145         reg = E1000_READ_REG(hw, E1000_CTRL);
5146         reg |= E1000_CTRL_VME;
5147         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5148
5149         /* Enable the Filter Table */
5150         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5151                 reg = E1000_READ_REG(hw, E1000_RCTL);
5152                 reg &= ~E1000_RCTL_CFIEN;
5153                 reg |= E1000_RCTL_VFE;
5154                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5155         }
5156
5157         /* Update the frame size */
5158         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5159             adapter->max_frame_size + VLAN_TAG_SIZE);
5160
5161         /* Don't bother with table if no vlans */
5162         if ((adapter->num_vlans == 0) ||
5163             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5164                 return;
5165         /*
5166         ** A soft reset zero's out the VFTA, so
5167         ** we need to repopulate it now.
5168         */
5169         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5170                 if (adapter->shadow_vfta[i] != 0) {
5171                         if (adapter->vf_ifp)
5172                                 e1000_vfta_set_vf(hw,
5173                                     adapter->shadow_vfta[i], TRUE);
5174                         else
5175                                 e1000_write_vfta(hw,
5176                                     i, adapter->shadow_vfta[i]);
5177                 }
5178 }
5179
5180 static void
5181 igb_enable_intr(struct adapter *adapter)
5182 {
5183         /* With RSS set up what to auto clear */
5184         if (adapter->msix_mem) {
5185                 u32 mask = (adapter->que_mask | adapter->link_mask);
5186                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5187                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5188                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5189                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5190                     E1000_IMS_LSC);
5191         } else {
5192                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5193                     IMS_ENABLE_MASK);
5194         }
5195         E1000_WRITE_FLUSH(&adapter->hw);
5196
5197         return;
5198 }
5199
5200 static void
5201 igb_disable_intr(struct adapter *adapter)
5202 {
5203         if (adapter->msix_mem) {
5204                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5205                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5206         } 
5207         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5208         E1000_WRITE_FLUSH(&adapter->hw);
5209         return;
5210 }
5211
5212 /*
5213  * Bit of a misnomer, what this really means is
5214  * to enable OS management of the system... aka
5215  * to disable special hardware management features 
5216  */
5217 static void
5218 igb_init_manageability(struct adapter *adapter)
5219 {
5220         if (adapter->has_manage) {
5221                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5222                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5223
5224                 /* disable hardware interception of ARP */
5225                 manc &= ~(E1000_MANC_ARP_EN);
5226
5227                 /* enable receiving management packets to the host */
5228                 manc |= E1000_MANC_EN_MNG2HOST;
5229                 manc2h |= 1 << 5;  /* Mng Port 623 */
5230                 manc2h |= 1 << 6;  /* Mng Port 664 */
5231                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5232                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5233         }
5234 }
5235
5236 /*
5237  * Give control back to hardware management
5238  * controller if there is one.
5239  */
5240 static void
5241 igb_release_manageability(struct adapter *adapter)
5242 {
5243         if (adapter->has_manage) {
5244                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5245
5246                 /* re-enable hardware interception of ARP */
5247                 manc |= E1000_MANC_ARP_EN;
5248                 manc &= ~E1000_MANC_EN_MNG2HOST;
5249
5250                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5251         }
5252 }
5253
5254 /*
5255  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5256  * For ASF and Pass Through versions of f/w this means that
5257  * the driver is loaded. 
5258  *
5259  */
5260 static void
5261 igb_get_hw_control(struct adapter *adapter)
5262 {
5263         u32 ctrl_ext;
5264
5265         if (adapter->vf_ifp)
5266                 return;
5267
5268         /* Let firmware know the driver has taken over */
5269         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5270         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5271             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5272 }
5273
5274 /*
5275  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5276  * For ASF and Pass Through versions of f/w this means that the
5277  * driver is no longer loaded.
5278  *
5279  */
5280 static void
5281 igb_release_hw_control(struct adapter *adapter)
5282 {
5283         u32 ctrl_ext;
5284
5285         if (adapter->vf_ifp)
5286                 return;
5287
5288         /* Let firmware taken over control of h/w */
5289         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5290         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5291             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5292 }
5293
5294 static int
5295 igb_is_valid_ether_addr(uint8_t *addr)
5296 {
5297         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5298
5299         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5300                 return (FALSE);
5301         }
5302
5303         return (TRUE);
5304 }
5305
5306
5307 /*
5308  * Enable PCI Wake On Lan capability
5309  */
5310 static void
5311 igb_enable_wakeup(device_t dev)
5312 {
5313         u16     cap, status;
5314         u8      id;
5315
5316         /* First find the capabilities pointer*/
5317         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5318         /* Read the PM Capabilities */
5319         id = pci_read_config(dev, cap, 1);
5320         if (id != PCIY_PMG)     /* Something wrong */
5321                 return;
5322         /* OK, we have the power capabilities, so
5323            now get the status register */
5324         cap += PCIR_POWER_STATUS;
5325         status = pci_read_config(dev, cap, 2);
5326         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5327         pci_write_config(dev, cap, status, 2);
5328         return;
5329 }
5330
5331 static void
5332 igb_led_func(void *arg, int onoff)
5333 {
5334         struct adapter  *adapter = arg;
5335
5336         IGB_CORE_LOCK(adapter);
5337         if (onoff) {
5338                 e1000_setup_led(&adapter->hw);
5339                 e1000_led_on(&adapter->hw);
5340         } else {
5341                 e1000_led_off(&adapter->hw);
5342                 e1000_cleanup_led(&adapter->hw);
5343         }
5344         IGB_CORE_UNLOCK(adapter);
5345 }
5346
5347 /**********************************************************************
5348  *
5349  *  Update the board statistics counters.
5350  *
5351  **********************************************************************/
5352 static void
5353 igb_update_stats_counters(struct adapter *adapter)
5354 {
5355         struct ifnet            *ifp;
5356         struct e1000_hw         *hw = &adapter->hw;
5357         struct e1000_hw_stats   *stats;
5358
5359         /* 
5360         ** The virtual function adapter has only a
5361         ** small controlled set of stats, do only 
5362         ** those and return.
5363         */
5364         if (adapter->vf_ifp) {
5365                 igb_update_vf_stats_counters(adapter);
5366                 return;
5367         }
5368
5369         stats = (struct e1000_hw_stats  *)adapter->stats;
5370
5371         if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5372            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5373                 stats->symerrs +=
5374                     E1000_READ_REG(hw,E1000_SYMERRS);
5375                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5376         }
5377
5378         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5379         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5380         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5381         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5382
5383         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5384         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5385         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5386         stats->dc += E1000_READ_REG(hw, E1000_DC);
5387         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5388         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5389         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5390         /*
5391         ** For watchdog management we need to know if we have been
5392         ** paused during the last interval, so capture that here.
5393         */ 
5394         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5395         stats->xoffrxc += adapter->pause_frames;
5396         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5397         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5398         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5399         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5400         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5401         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5402         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5403         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5404         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5405         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5406         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5407         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5408
5409         /* For the 64-bit byte counters the low dword must be read first. */
5410         /* Both registers clear on the read of the high dword */
5411
5412         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5413             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5414         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5415             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5416
5417         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5418         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5419         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5420         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5421         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5422
5423         stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5424         stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5425         stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5426
5427         stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5428             ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5429         stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5430             ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5431
5432         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5433         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5434         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5435         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5436         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5437         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5438         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5439         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5440         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5441         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5442
5443         /* Interrupt Counts */
5444
5445         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5446         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5447         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5448         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5449         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5450         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5451         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5452         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5453         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5454
5455         /* Host to Card Statistics */
5456
5457         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5458         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5459         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5460         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5461         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5462         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5463         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5464         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5465             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5466         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5467             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5468         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5469         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5470         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5471
5472         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5473         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5474         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5475         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5476         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5477         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5478
5479         ifp = adapter->ifp;
5480         ifp->if_collisions = stats->colc;
5481
5482         /* Rx Errors */
5483         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5484             stats->crcerrs + stats->algnerrc +
5485             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5486
5487         /* Tx Errors */
5488         ifp->if_oerrors = stats->ecol +
5489             stats->latecol + adapter->watchdog_events;
5490
5491         /* Driver specific counters */
5492         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5493         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5494         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5495         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5496         adapter->packet_buf_alloc_tx =
5497             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5498         adapter->packet_buf_alloc_rx =
5499             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5500 }
5501
5502
5503 /**********************************************************************
5504  *
5505  *  Initialize the VF board statistics counters.
5506  *
5507  **********************************************************************/
5508 static void
5509 igb_vf_init_stats(struct adapter *adapter)
5510 {
5511         struct e1000_hw *hw = &adapter->hw;
5512         struct e1000_vf_stats   *stats;
5513
5514         stats = (struct e1000_vf_stats  *)adapter->stats;
5515         if (stats == NULL)
5516                 return;
5517         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5518         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5519         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5520         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5521         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5522 }
5523  
5524 /**********************************************************************
5525  *
5526  *  Update the VF board statistics counters.
5527  *
5528  **********************************************************************/
5529 static void
5530 igb_update_vf_stats_counters(struct adapter *adapter)
5531 {
5532         struct e1000_hw *hw = &adapter->hw;
5533         struct e1000_vf_stats   *stats;
5534
5535         if (adapter->link_speed == 0)
5536                 return;
5537
5538         stats = (struct e1000_vf_stats  *)adapter->stats;
5539
5540         UPDATE_VF_REG(E1000_VFGPRC,
5541             stats->last_gprc, stats->gprc);
5542         UPDATE_VF_REG(E1000_VFGORC,
5543             stats->last_gorc, stats->gorc);
5544         UPDATE_VF_REG(E1000_VFGPTC,
5545             stats->last_gptc, stats->gptc);
5546         UPDATE_VF_REG(E1000_VFGOTC,
5547             stats->last_gotc, stats->gotc);
5548         UPDATE_VF_REG(E1000_VFMPRC,
5549             stats->last_mprc, stats->mprc);
5550 }
5551
5552 /* Export a single 32-bit register via a read-only sysctl. */
5553 static int
5554 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5555 {
5556         struct adapter *adapter;
5557         u_int val;
5558
5559         adapter = oidp->oid_arg1;
5560         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5561         return (sysctl_handle_int(oidp, &val, 0, req));
5562 }
5563
5564 /*
5565 **  Tuneable interrupt rate handler
5566 */
5567 static int
5568 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5569 {
5570         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5571         int                     error;
5572         u32                     reg, usec, rate;
5573                         
5574         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5575         usec = ((reg & 0x7FFC) >> 2);
5576         if (usec > 0)
5577                 rate = 1000000 / usec;
5578         else
5579                 rate = 0;
5580         error = sysctl_handle_int(oidp, &rate, 0, req);
5581         if (error || !req->newptr)
5582                 return error;
5583         return 0;
5584 }
5585
5586 /*
5587  * Add sysctl variables, one per statistic, to the system.
5588  */
5589 static void
5590 igb_add_hw_stats(struct adapter *adapter)
5591 {
5592         device_t dev = adapter->dev;
5593
5594         struct tx_ring *txr = adapter->tx_rings;
5595         struct rx_ring *rxr = adapter->rx_rings;
5596
5597         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5598         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5599         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5600         struct e1000_hw_stats *stats = adapter->stats;
5601
5602         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5603         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5604
5605 #define QUEUE_NAME_LEN 32
5606         char namebuf[QUEUE_NAME_LEN];
5607
5608         /* Driver Statistics */
5609         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5610                         CTLFLAG_RD, &adapter->dropped_pkts,
5611                         "Driver dropped packets");
5612         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 
5613                         CTLFLAG_RD, &adapter->link_irq,
5614                         "Link MSIX IRQ Handled");
5615         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5616                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5617                         "Defragmenting mbuf chain failed");
5618         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5619                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5620                         "Driver tx dma failure in xmit");
5621         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5622                         CTLFLAG_RD, &adapter->rx_overruns,
5623                         "RX overruns");
5624         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5625                         CTLFLAG_RD, &adapter->watchdog_events,
5626                         "Watchdog timeouts");
5627
5628         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5629                         CTLFLAG_RD, &adapter->device_control,
5630                         "Device Control Register");
5631         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5632                         CTLFLAG_RD, &adapter->rx_control,
5633                         "Receiver Control Register");
5634         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5635                         CTLFLAG_RD, &adapter->int_mask,
5636                         "Interrupt Mask");
5637         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5638                         CTLFLAG_RD, &adapter->eint_mask,
5639                         "Extended Interrupt Mask");
5640         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5641                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5642                         "Transmit Buffer Packet Allocation");
5643         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5644                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5645                         "Receive Buffer Packet Allocation");
5646         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5647                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5648                         "Flow Control High Watermark");
5649         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5650                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5651                         "Flow Control Low Watermark");
5652
5653         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5654                 struct lro_ctrl *lro = &rxr->lro;
5655
5656                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5657                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5658                                             CTLFLAG_RD, NULL, "Queue Name");
5659                 queue_list = SYSCTL_CHILDREN(queue_node);
5660
5661                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5662                                 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5663                                 sizeof(&adapter->queues[i]),
5664                                 igb_sysctl_interrupt_rate_handler,
5665                                 "IU", "Interrupt Rate");
5666
5667                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5668                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5669                                 igb_sysctl_reg_handler, "IU",
5670                                 "Transmit Descriptor Head");
5671                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5672                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5673                                 igb_sysctl_reg_handler, "IU",
5674                                 "Transmit Descriptor Tail");
5675                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5676                                 CTLFLAG_RD, &txr->no_desc_avail,
5677                                 "Queue No Descriptor Available");
5678                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5679                                 CTLFLAG_RD, &txr->total_packets,
5680                                 "Queue Packets Transmitted");
5681
5682                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5683                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5684                                 igb_sysctl_reg_handler, "IU",
5685                                 "Receive Descriptor Head");
5686                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5687                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5688                                 igb_sysctl_reg_handler, "IU",
5689                                 "Receive Descriptor Tail");
5690                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5691                                 CTLFLAG_RD, &rxr->rx_packets,
5692                                 "Queue Packets Received");
5693                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5694                                 CTLFLAG_RD, &rxr->rx_bytes,
5695                                 "Queue Bytes Received");
5696                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5697                                 CTLFLAG_RD, &lro->lro_queued, 0,
5698                                 "LRO Queued");
5699                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5700                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5701                                 "LRO Flushed");
5702         }
5703
5704         /* MAC stats get their own sub node */
5705
5706         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5707                                     CTLFLAG_RD, NULL, "MAC Statistics");
5708         stat_list = SYSCTL_CHILDREN(stat_node);
5709
5710         /*
5711         ** VF adapter has a very limited set of stats
5712         ** since its not managing the metal, so to speak.
5713         */
5714         if (adapter->vf_ifp) {
5715         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5716                         CTLFLAG_RD, &stats->gprc,
5717                         "Good Packets Received");
5718         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5719                         CTLFLAG_RD, &stats->gptc,
5720                         "Good Packets Transmitted");
5721         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5722                         CTLFLAG_RD, &stats->gorc, 
5723                         "Good Octets Received"); 
5724         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5725                         CTLFLAG_RD, &stats->gotc, 
5726                         "Good Octets Transmitted"); 
5727         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5728                         CTLFLAG_RD, &stats->mprc,
5729                         "Multicast Packets Received");
5730                 return;
5731         }
5732
5733         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5734                         CTLFLAG_RD, &stats->ecol,
5735                         "Excessive collisions");
5736         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5737                         CTLFLAG_RD, &stats->scc,
5738                         "Single collisions");
5739         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5740                         CTLFLAG_RD, &stats->mcc,
5741                         "Multiple collisions");
5742         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5743                         CTLFLAG_RD, &stats->latecol,
5744                         "Late collisions");
5745         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5746                         CTLFLAG_RD, &stats->colc,
5747                         "Collision Count");
5748         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5749                         CTLFLAG_RD, &stats->symerrs,
5750                         "Symbol Errors");
5751         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5752                         CTLFLAG_RD, &stats->sec,
5753                         "Sequence Errors");
5754         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5755                         CTLFLAG_RD, &stats->dc,
5756                         "Defer Count");
5757         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5758                         CTLFLAG_RD, &stats->mpc,
5759                         "Missed Packets");
5760         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5761                         CTLFLAG_RD, &stats->rlec,
5762                         "Receive Length Errors");
5763         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5764                         CTLFLAG_RD, &stats->rnbc,
5765                         "Receive No Buffers");
5766         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5767                         CTLFLAG_RD, &stats->ruc,
5768                         "Receive Undersize");
5769         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5770                         CTLFLAG_RD, &stats->rfc,
5771                         "Fragmented Packets Received");
5772         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5773                         CTLFLAG_RD, &stats->roc,
5774                         "Oversized Packets Received");
5775         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5776                         CTLFLAG_RD, &stats->rjc,
5777                         "Recevied Jabber");
5778         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5779                         CTLFLAG_RD, &stats->rxerrc,
5780                         "Receive Errors");
5781         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5782                         CTLFLAG_RD, &stats->crcerrs,
5783                         "CRC errors");
5784         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5785                         CTLFLAG_RD, &stats->algnerrc,
5786                         "Alignment Errors");
5787         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
5788                         CTLFLAG_RD, &stats->tncrs,
5789                         "Transmit with No CRS");
5790         /* On 82575 these are collision counts */
5791         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5792                         CTLFLAG_RD, &stats->cexterr,
5793                         "Collision/Carrier extension errors");
5794         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5795                         CTLFLAG_RD, &stats->xonrxc,
5796                         "XON Received");
5797         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5798                         CTLFLAG_RD, &stats->xontxc,
5799                         "XON Transmitted");
5800         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5801                         CTLFLAG_RD, &stats->xoffrxc,
5802                         "XOFF Received");
5803         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5804                         CTLFLAG_RD, &stats->xofftxc,
5805                         "XOFF Transmitted");
5806         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
5807                         CTLFLAG_RD, &stats->fcruc,
5808                         "Unsupported Flow Control Received");
5809         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
5810                         CTLFLAG_RD, &stats->mgprc,
5811                         "Management Packets Received");
5812         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
5813                         CTLFLAG_RD, &stats->mgpdc,
5814                         "Management Packets Dropped");
5815         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
5816                         CTLFLAG_RD, &stats->mgptc,
5817                         "Management Packets Transmitted");
5818         /* Packet Reception Stats */
5819         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5820                         CTLFLAG_RD, &stats->tpr,
5821                         "Total Packets Received");
5822         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5823                         CTLFLAG_RD, &stats->gprc,
5824                         "Good Packets Received");
5825         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5826                         CTLFLAG_RD, &stats->bprc,
5827                         "Broadcast Packets Received");
5828         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5829                         CTLFLAG_RD, &stats->mprc,
5830                         "Multicast Packets Received");
5831         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5832                         CTLFLAG_RD, &stats->prc64,
5833                         "64 byte frames received");
5834         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5835                         CTLFLAG_RD, &stats->prc127,
5836                         "65-127 byte frames received");
5837         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5838                         CTLFLAG_RD, &stats->prc255,
5839                         "128-255 byte frames received");
5840         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5841                         CTLFLAG_RD, &stats->prc511,
5842                         "256-511 byte frames received");
5843         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5844                         CTLFLAG_RD, &stats->prc1023,
5845                         "512-1023 byte frames received");
5846         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5847                         CTLFLAG_RD, &stats->prc1522,
5848                         "1023-1522 byte frames received");
5849         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5850                         CTLFLAG_RD, &stats->gorc, 
5851                         "Good Octets Received");
5852         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 
5853                         CTLFLAG_RD, &stats->tor, 
5854                         "Total Octets Received");
5855
5856         /* Packet Transmission Stats */
5857         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5858                         CTLFLAG_RD, &stats->gotc, 
5859                         "Good Octets Transmitted"); 
5860         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 
5861                         CTLFLAG_RD, &stats->tot, 
5862                         "Total Octets Transmitted");
5863         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5864                         CTLFLAG_RD, &stats->tpt,
5865                         "Total Packets Transmitted");
5866         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5867                         CTLFLAG_RD, &stats->gptc,
5868                         "Good Packets Transmitted");
5869         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5870                         CTLFLAG_RD, &stats->bptc,
5871                         "Broadcast Packets Transmitted");
5872         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5873                         CTLFLAG_RD, &stats->mptc,
5874                         "Multicast Packets Transmitted");
5875         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5876                         CTLFLAG_RD, &stats->ptc64,
5877                         "64 byte frames transmitted");
5878         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5879                         CTLFLAG_RD, &stats->ptc127,
5880                         "65-127 byte frames transmitted");
5881         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5882                         CTLFLAG_RD, &stats->ptc255,
5883                         "128-255 byte frames transmitted");
5884         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5885                         CTLFLAG_RD, &stats->ptc511,
5886                         "256-511 byte frames transmitted");
5887         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5888                         CTLFLAG_RD, &stats->ptc1023,
5889                         "512-1023 byte frames transmitted");
5890         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5891                         CTLFLAG_RD, &stats->ptc1522,
5892                         "1024-1522 byte frames transmitted");
5893         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5894                         CTLFLAG_RD, &stats->tsctc,
5895                         "TSO Contexts Transmitted");
5896         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5897                         CTLFLAG_RD, &stats->tsctfc,
5898                         "TSO Contexts Failed");
5899
5900
5901         /* Interrupt Stats */
5902
5903         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5904                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5905         int_list = SYSCTL_CHILDREN(int_node);
5906
5907         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5908                         CTLFLAG_RD, &stats->iac,
5909                         "Interrupt Assertion Count");
5910
5911         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5912                         CTLFLAG_RD, &stats->icrxptc,
5913                         "Interrupt Cause Rx Pkt Timer Expire Count");
5914
5915         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5916                         CTLFLAG_RD, &stats->icrxatc,
5917                         "Interrupt Cause Rx Abs Timer Expire Count");
5918
5919         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5920                         CTLFLAG_RD, &stats->ictxptc,
5921                         "Interrupt Cause Tx Pkt Timer Expire Count");
5922
5923         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5924                         CTLFLAG_RD, &stats->ictxatc,
5925                         "Interrupt Cause Tx Abs Timer Expire Count");
5926
5927         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5928                         CTLFLAG_RD, &stats->ictxqec,
5929                         "Interrupt Cause Tx Queue Empty Count");
5930
5931         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5932                         CTLFLAG_RD, &stats->ictxqmtc,
5933                         "Interrupt Cause Tx Queue Min Thresh Count");
5934
5935         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5936                         CTLFLAG_RD, &stats->icrxdmtc,
5937                         "Interrupt Cause Rx Desc Min Thresh Count");
5938
5939         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5940                         CTLFLAG_RD, &stats->icrxoc,
5941                         "Interrupt Cause Receiver Overrun Count");
5942
5943         /* Host to Card Stats */
5944
5945         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5946                                     CTLFLAG_RD, NULL, 
5947                                     "Host to Card Statistics");
5948
5949         host_list = SYSCTL_CHILDREN(host_node);
5950
5951         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5952                         CTLFLAG_RD, &stats->cbtmpc,
5953                         "Circuit Breaker Tx Packet Count");
5954
5955         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5956                         CTLFLAG_RD, &stats->htdpmc,
5957                         "Host Transmit Discarded Packets");
5958
5959         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5960                         CTLFLAG_RD, &stats->rpthc,
5961                         "Rx Packets To Host");
5962
5963         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5964                         CTLFLAG_RD, &stats->cbrmpc,
5965                         "Circuit Breaker Rx Packet Count");
5966
5967         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5968                         CTLFLAG_RD, &stats->cbrdpc,
5969                         "Circuit Breaker Rx Dropped Count");
5970
5971         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5972                         CTLFLAG_RD, &stats->hgptc,
5973                         "Host Good Packets Tx Count");
5974
5975         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5976                         CTLFLAG_RD, &stats->htcbdpc,
5977                         "Host Tx Circuit Breaker Dropped Count");
5978
5979         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5980                         CTLFLAG_RD, &stats->hgorc,
5981                         "Host Good Octets Received Count");
5982
5983         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5984                         CTLFLAG_RD, &stats->hgotc,
5985                         "Host Good Octets Transmit Count");
5986
5987         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5988                         CTLFLAG_RD, &stats->lenerrs,
5989                         "Length Errors");
5990
5991         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5992                         CTLFLAG_RD, &stats->scvpc,
5993                         "SerDes/SGMII Code Violation Pkt Count");
5994
5995         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5996                         CTLFLAG_RD, &stats->hrmpc,
5997                         "Header Redirection Missed Packet Count");
5998 }
5999
6000
6001 /**********************************************************************
6002  *
6003  *  This routine provides a way to dump out the adapter eeprom,
6004  *  often a useful debug/service tool. This only dumps the first
6005  *  32 words, stuff that matters is in that extent.
6006  *
6007  **********************************************************************/
6008 static int
6009 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6010 {
6011         struct adapter *adapter;
6012         int error;
6013         int result;
6014
6015         result = -1;
6016         error = sysctl_handle_int(oidp, &result, 0, req);
6017
6018         if (error || !req->newptr)
6019                 return (error);
6020
6021         /*
6022          * This value will cause a hex dump of the
6023          * first 32 16-bit words of the EEPROM to
6024          * the screen.
6025          */
6026         if (result == 1) {
6027                 adapter = (struct adapter *)arg1;
6028                 igb_print_nvm_info(adapter);
6029         }
6030
6031         return (error);
6032 }
6033
6034 static void
6035 igb_print_nvm_info(struct adapter *adapter)
6036 {
6037         u16     eeprom_data;
6038         int     i, j, row = 0;
6039
6040         /* Its a bit crude, but it gets the job done */
6041         printf("\nInterface EEPROM Dump:\n");
6042         printf("Offset\n0x0000  ");
6043         for (i = 0, j = 0; i < 32; i++, j++) {
6044                 if (j == 8) { /* Make the offset block */
6045                         j = 0; ++row;
6046                         printf("\n0x00%x0  ",row);
6047                 }
6048                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6049                 printf("%04x ", eeprom_data);
6050         }
6051         printf("\n");
6052 }
6053
6054 static void
6055 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6056         const char *description, int *limit, int value)
6057 {
6058         *limit = value;
6059         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6060             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6061             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6062 }
6063
6064 /*
6065 ** Set flow control using sysctl:
6066 ** Flow control values:
6067 **      0 - off
6068 **      1 - rx pause
6069 **      2 - tx pause
6070 **      3 - full
6071 */
6072 static int
6073 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6074 {
6075         int             error;
6076         static int      input = 3; /* default is full */
6077         struct adapter  *adapter = (struct adapter *) arg1;
6078
6079         error = sysctl_handle_int(oidp, &input, 0, req);
6080
6081         if ((error) || (req->newptr == NULL))
6082                 return (error);
6083
6084         switch (input) {
6085                 case e1000_fc_rx_pause:
6086                 case e1000_fc_tx_pause:
6087                 case e1000_fc_full:
6088                 case e1000_fc_none:
6089                         adapter->hw.fc.requested_mode = input;
6090                         adapter->fc = input;
6091                         break;
6092                 default:
6093                         /* Do nothing */
6094                         return (error);
6095         }
6096
6097         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6098         e1000_force_mac_fc(&adapter->hw);
6099         /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6100         return (error);
6101 }
6102
6103 /*
6104 ** Manage DMA Coalesce:
6105 ** Control values:
6106 **      0/1 - off/on
6107 **      Legal timer values are:
6108 **      250,500,1000-10000 in thousands
6109 */
6110 static int
6111 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6112 {
6113         struct adapter *adapter = (struct adapter *) arg1;
6114         int             error;
6115
6116         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6117
6118         if ((error) || (req->newptr == NULL))
6119                 return (error);
6120
6121         switch (adapter->dmac) {
6122                 case 0:
6123                         /* Disabling */
6124                         break;
6125                 case 1: /* Just enable and use default */
6126                         adapter->dmac = 1000;
6127                         break;
6128                 case 250:
6129                 case 500:
6130                 case 1000:
6131                 case 2000:
6132                 case 3000:
6133                 case 4000:
6134                 case 5000:
6135                 case 6000:
6136                 case 7000:
6137                 case 8000:
6138                 case 9000:
6139                 case 10000:
6140                         /* Legal values - allow */
6141                         break;
6142                 default:
6143                         /* Do nothing, illegal value */
6144                         adapter->dmac = 0;
6145                         return (EINVAL);
6146         }
6147         /* Reinit the interface */
6148         igb_init(adapter);
6149         return (error);
6150 }
6151
6152 /*
6153 ** Manage Energy Efficient Ethernet:
6154 ** Control values:
6155 **     0/1 - enabled/disabled
6156 */
6157 static int
6158 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6159 {
6160         struct adapter  *adapter = (struct adapter *) arg1;
6161         int             error, value;
6162
6163         value = adapter->hw.dev_spec._82575.eee_disable;
6164         error = sysctl_handle_int(oidp, &value, 0, req);
6165         if (error || req->newptr == NULL)
6166                 return (error);
6167         IGB_CORE_LOCK(adapter);
6168         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6169         igb_init_locked(adapter);
6170         IGB_CORE_UNLOCK(adapter);
6171         return (0);
6172 }