]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/e1000/if_igb.c
MFC of r314281:
[FreeBSD/stable/10.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38
39 #ifdef HAVE_KERNEL_OPTION_HEADERS
40 #include "opt_device_polling.h"
41 #include "opt_altq.h"
42 #endif
43
44 #include "if_igb.h"
45
46 /*********************************************************************
47  *  Driver version:
48  *********************************************************************/
49 char igb_driver_version[] = "2.5.3-k";
50
51
52 /*********************************************************************
53  *  PCI Device ID Table
54  *
55  *  Used by probe to select devices to load on
56  *  Last field stores an index into e1000_strings
57  *  Last entry must be all 0s
58  *
59  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
60  *********************************************************************/
61
62 static igb_vendor_info_t igb_vendor_info_array[] =
63 {
64         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
65         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
66         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
67         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
68         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
69         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
70         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
71         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
72         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
73         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
74         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
75         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
76         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
77         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
78         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
79         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
80         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
81         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
82         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
83         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
84         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
85         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
86         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
87         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,  0, 0, 0},
88         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
89         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,  0, 0, 0},
90         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
91         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
92         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
93         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
94         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
95         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
96         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,  0, 0, 0},
97         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
98         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,  0, 0, 0},
99         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
100         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
101         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
102         {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,  0, 0, 0},
103         /* required last entry */
104         {0, 0, 0, 0, 0}
105 };
106
107 /*********************************************************************
108  *  Table of branding strings for all supported NICs.
109  *********************************************************************/
110
111 static char *igb_strings[] = {
112         "Intel(R) PRO/1000 Network Connection"
113 };
114
115 /*********************************************************************
116  *  Function prototypes
117  *********************************************************************/
118 static int      igb_probe(device_t);
119 static int      igb_attach(device_t);
120 static int      igb_detach(device_t);
121 static int      igb_shutdown(device_t);
122 static int      igb_suspend(device_t);
123 static int      igb_resume(device_t);
124 #ifndef IGB_LEGACY_TX
125 static int      igb_mq_start(struct ifnet *, struct mbuf *);
126 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
127 static void     igb_qflush(struct ifnet *);
128 static void     igb_deferred_mq_start(void *, int);
129 #else
130 static void     igb_start(struct ifnet *);
131 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
132 #endif
133 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
134 static void     igb_init(void *);
135 static void     igb_init_locked(struct adapter *);
136 static void     igb_stop(void *);
137 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
138 static int      igb_media_change(struct ifnet *);
139 static void     igb_identify_hardware(struct adapter *);
140 static int      igb_allocate_pci_resources(struct adapter *);
141 static int      igb_allocate_msix(struct adapter *);
142 static int      igb_allocate_legacy(struct adapter *);
143 static int      igb_setup_msix(struct adapter *);
144 static void     igb_free_pci_resources(struct adapter *);
145 static void     igb_local_timer(void *);
146 static void     igb_reset(struct adapter *);
147 static int      igb_setup_interface(device_t, struct adapter *);
148 static int      igb_allocate_queues(struct adapter *);
149 static void     igb_configure_queues(struct adapter *);
150
151 static int      igb_allocate_transmit_buffers(struct tx_ring *);
152 static void     igb_setup_transmit_structures(struct adapter *);
153 static void     igb_setup_transmit_ring(struct tx_ring *);
154 static void     igb_initialize_transmit_units(struct adapter *);
155 static void     igb_free_transmit_structures(struct adapter *);
156 static void     igb_free_transmit_buffers(struct tx_ring *);
157
158 static int      igb_allocate_receive_buffers(struct rx_ring *);
159 static int      igb_setup_receive_structures(struct adapter *);
160 static int      igb_setup_receive_ring(struct rx_ring *);
161 static void     igb_initialize_receive_units(struct adapter *);
162 static void     igb_free_receive_structures(struct adapter *);
163 static void     igb_free_receive_buffers(struct rx_ring *);
164 static void     igb_free_receive_ring(struct rx_ring *);
165
166 static void     igb_enable_intr(struct adapter *);
167 static void     igb_disable_intr(struct adapter *);
168 static void     igb_update_stats_counters(struct adapter *);
169 static bool     igb_txeof(struct tx_ring *);
170
171 static __inline void igb_rx_discard(struct rx_ring *, int);
172 static __inline void igb_rx_input(struct rx_ring *,
173                     struct ifnet *, struct mbuf *, u32);
174
175 static bool     igb_rxeof(struct igb_queue *, int, int *);
176 static void     igb_rx_checksum(u32, struct mbuf *, u32);
177 static int      igb_tx_ctx_setup(struct tx_ring *,
178                     struct mbuf *, u32 *, u32 *);
179 static int      igb_tso_setup(struct tx_ring *,
180                     struct mbuf *, u32 *, u32 *);
181 static void     igb_set_promisc(struct adapter *);
182 static void     igb_disable_promisc(struct adapter *);
183 static void     igb_set_multi(struct adapter *);
184 static void     igb_update_link_status(struct adapter *);
185 static void     igb_refresh_mbufs(struct rx_ring *, int);
186
187 static void     igb_register_vlan(void *, struct ifnet *, u16);
188 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
189 static void     igb_setup_vlan_hw_support(struct adapter *);
190
191 static int      igb_xmit(struct tx_ring *, struct mbuf **);
192 static int      igb_dma_malloc(struct adapter *, bus_size_t,
193                     struct igb_dma_alloc *, int);
194 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
195 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
196 static void     igb_print_nvm_info(struct adapter *);
197 static int      igb_is_valid_ether_addr(u8 *);
198 static void     igb_add_hw_stats(struct adapter *);
199
200 static void     igb_vf_init_stats(struct adapter *);
201 static void     igb_update_vf_stats_counters(struct adapter *);
202
203 /* Management and WOL Support */
204 static void     igb_init_manageability(struct adapter *);
205 static void     igb_release_manageability(struct adapter *);
206 static void     igb_get_hw_control(struct adapter *);
207 static void     igb_release_hw_control(struct adapter *);
208 static void     igb_enable_wakeup(device_t);
209 static void     igb_led_func(void *, int);
210
211 static int      igb_irq_fast(void *);
212 static void     igb_msix_que(void *);
213 static void     igb_msix_link(void *);
214 static void     igb_handle_que(void *context, int pending);
215 static void     igb_handle_link(void *context, int pending);
216 static void     igb_handle_link_locked(struct adapter *);
217
218 static void     igb_set_sysctl_value(struct adapter *, const char *,
219                     const char *, int *, int);
220 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
221 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
222 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
223
224 #ifdef DEVICE_POLLING
225 static poll_handler_t igb_poll;
226 #endif /* POLLING */
227
228 /*********************************************************************
229  *  FreeBSD Device Interface Entry Points
230  *********************************************************************/
231
232 static device_method_t igb_methods[] = {
233         /* Device interface */
234         DEVMETHOD(device_probe, igb_probe),
235         DEVMETHOD(device_attach, igb_attach),
236         DEVMETHOD(device_detach, igb_detach),
237         DEVMETHOD(device_shutdown, igb_shutdown),
238         DEVMETHOD(device_suspend, igb_suspend),
239         DEVMETHOD(device_resume, igb_resume),
240         DEVMETHOD_END
241 };
242
243 static driver_t igb_driver = {
244         "igb", igb_methods, sizeof(struct adapter),
245 };
246
247 static devclass_t igb_devclass;
248 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
249 MODULE_DEPEND(igb, pci, 1, 1, 1);
250 MODULE_DEPEND(igb, ether, 1, 1, 1);
251
252 /*********************************************************************
253  *  Tunable default values.
254  *********************************************************************/
255
256 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
257
258 /* Descriptor defaults */
259 static int igb_rxd = IGB_DEFAULT_RXD;
260 static int igb_txd = IGB_DEFAULT_TXD;
261 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
262 TUNABLE_INT("hw.igb.txd", &igb_txd);
263 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
264     "Number of receive descriptors per queue");
265 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
266     "Number of transmit descriptors per queue");
267
268 /*
269 ** AIM: Adaptive Interrupt Moderation
270 ** which means that the interrupt rate
271 ** is varied over time based on the
272 ** traffic for that interrupt vector
273 */
274 static int igb_enable_aim = TRUE;
275 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
276 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
277     "Enable adaptive interrupt moderation");
278
279 /*
280  * MSIX should be the default for best performance,
281  * but this allows it to be forced off for testing.
282  */         
283 static int igb_enable_msix = 1;
284 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
285 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
286     "Enable MSI-X interrupts");
287
288 /*
289 ** Tuneable Interrupt rate
290 */
291 static int igb_max_interrupt_rate = 8000;
292 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
293 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296 #ifndef IGB_LEGACY_TX
297 /*
298 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
299 */
300 static int igb_buf_ring_size = IGB_BR_SIZE;
301 TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
302 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
303     &igb_buf_ring_size, 0, "Size of the bufring");
304 #endif
305
306 /*
307 ** Header split causes the packet header to
308 ** be dma'd to a seperate mbuf from the payload.
309 ** this can have memory alignment benefits. But
310 ** another plus is that small packets often fit
311 ** into the header and thus use no cluster. Its
312 ** a very workload dependent type feature.
313 */
314 static int igb_header_split = FALSE;
315 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
316 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
317     "Enable receive mbuf header split");
318
319 /*
320 ** This will autoconfigure based on the
321 ** number of CPUs and max supported
322 ** MSIX messages if left at 0.
323 */
324 static int igb_num_queues = 0;
325 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
326 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
327     "Number of queues to configure, 0 indicates autoconfigure");
328
329 /*
330 ** Global variable to store last used CPU when binding queues
331 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
332 ** queue is bound to a cpu.
333 */
334 static int igb_last_bind_cpu = -1;
335
336 /* How many packets rxeof tries to clean at a time */
337 static int igb_rx_process_limit = 100;
338 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
339 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
340     &igb_rx_process_limit, 0,
341     "Maximum number of received packets to process at a time, -1 means unlimited");
342
343 /* How many packets txeof tries to clean at a time */
344 static int igb_tx_process_limit = -1;
345 TUNABLE_INT("hw.igb.tx_process_limit", &igb_tx_process_limit);
346 SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
347     &igb_tx_process_limit, 0,
348     "Maximum number of sent packets to process at a time, -1 means unlimited");
349
350 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
351 #include <dev/netmap/if_igb_netmap.h>
352 #endif /* DEV_NETMAP */
353 /*********************************************************************
354  *  Device identification routine
355  *
356  *  igb_probe determines if the driver should be loaded on
357  *  adapter based on PCI vendor/device id of the adapter.
358  *
359  *  return BUS_PROBE_DEFAULT on success, positive on failure
360  *********************************************************************/
361
362 static int
363 igb_probe(device_t dev)
364 {
365         char            adapter_name[256];
366         uint16_t        pci_vendor_id = 0;
367         uint16_t        pci_device_id = 0;
368         uint16_t        pci_subvendor_id = 0;
369         uint16_t        pci_subdevice_id = 0;
370         igb_vendor_info_t *ent;
371
372         INIT_DEBUGOUT("igb_probe: begin");
373
374         pci_vendor_id = pci_get_vendor(dev);
375         if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
376                 return (ENXIO);
377
378         pci_device_id = pci_get_device(dev);
379         pci_subvendor_id = pci_get_subvendor(dev);
380         pci_subdevice_id = pci_get_subdevice(dev);
381
382         ent = igb_vendor_info_array;
383         while (ent->vendor_id != 0) {
384                 if ((pci_vendor_id == ent->vendor_id) &&
385                     (pci_device_id == ent->device_id) &&
386
387                     ((pci_subvendor_id == ent->subvendor_id) ||
388                     (ent->subvendor_id == 0)) &&
389
390                     ((pci_subdevice_id == ent->subdevice_id) ||
391                     (ent->subdevice_id == 0))) {
392                         sprintf(adapter_name, "%s, Version - %s",
393                                 igb_strings[ent->index],
394                                 igb_driver_version);
395                         device_set_desc_copy(dev, adapter_name);
396                         return (BUS_PROBE_DEFAULT);
397                 }
398                 ent++;
399         }
400         return (ENXIO);
401 }
402
403 /*********************************************************************
404  *  Device initialization routine
405  *
406  *  The attach entry point is called when the driver is being loaded.
407  *  This routine identifies the type of hardware, allocates all resources
408  *  and initializes the hardware.
409  *
410  *  return 0 on success, positive on failure
411  *********************************************************************/
412
413 static int
414 igb_attach(device_t dev)
415 {
416         struct adapter  *adapter;
417         int             error = 0;
418         u16             eeprom_data;
419
420         INIT_DEBUGOUT("igb_attach: begin");
421
422         if (resource_disabled("igb", device_get_unit(dev))) {
423                 device_printf(dev, "Disabled by device hint\n");
424                 return (ENXIO);
425         }
426
427         adapter = device_get_softc(dev);
428         adapter->dev = adapter->osdep.dev = dev;
429         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
430
431         /* SYSCTLs */
432         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
433             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
434             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
435             igb_sysctl_nvm_info, "I", "NVM Information");
436
437         igb_set_sysctl_value(adapter, "enable_aim",
438             "Interrupt Moderation", &adapter->enable_aim,
439             igb_enable_aim);
440
441         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
442             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
443             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
444             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
445
446         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
447
448         /* Determine hardware and mac info */
449         igb_identify_hardware(adapter);
450
451         /* Setup PCI resources */
452         if (igb_allocate_pci_resources(adapter)) {
453                 device_printf(dev, "Allocation of PCI resources failed\n");
454                 error = ENXIO;
455                 goto err_pci;
456         }
457
458         /* Do Shared Code initialization */
459         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
460                 device_printf(dev, "Setup of Shared code failed\n");
461                 error = ENXIO;
462                 goto err_pci;
463         }
464
465         e1000_get_bus_info(&adapter->hw);
466
467         /* Sysctls for limiting the amount of work done in the taskqueues */
468         igb_set_sysctl_value(adapter, "rx_processing_limit",
469             "max number of rx packets to process",
470             &adapter->rx_process_limit, igb_rx_process_limit);
471
472         igb_set_sysctl_value(adapter, "tx_processing_limit",
473             "max number of tx packets to process",
474             &adapter->tx_process_limit, igb_tx_process_limit);
475
476         /*
477          * Validate number of transmit and receive descriptors. It
478          * must not exceed hardware maximum, and must be multiple
479          * of E1000_DBA_ALIGN.
480          */
481         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
482             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
483                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
484                     IGB_DEFAULT_TXD, igb_txd);
485                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
486         } else
487                 adapter->num_tx_desc = igb_txd;
488         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
489             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
490                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
491                     IGB_DEFAULT_RXD, igb_rxd);
492                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
493         } else
494                 adapter->num_rx_desc = igb_rxd;
495
496         adapter->hw.mac.autoneg = DO_AUTO_NEG;
497         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
498         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
499
500         /* Copper options */
501         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
502                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
503                 adapter->hw.phy.disable_polarity_correction = FALSE;
504                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
505         }
506
507         /*
508          * Set the frame limits assuming
509          * standard ethernet sized frames.
510          */
511         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
512
513         /*
514         ** Allocate and Setup Queues
515         */
516         if (igb_allocate_queues(adapter)) {
517                 error = ENOMEM;
518                 goto err_pci;
519         }
520
521         /* Allocate the appropriate stats memory */
522         if (adapter->vf_ifp) {
523                 adapter->stats =
524                     (struct e1000_vf_stats *)malloc(sizeof \
525                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526                 igb_vf_init_stats(adapter);
527         } else
528                 adapter->stats =
529                     (struct e1000_hw_stats *)malloc(sizeof \
530                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
531         if (adapter->stats == NULL) {
532                 device_printf(dev, "Can not allocate stats memory\n");
533                 error = ENOMEM;
534                 goto err_late;
535         }
536
537         /* Allocate multicast array memory. */
538         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
539             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
540         if (adapter->mta == NULL) {
541                 device_printf(dev, "Can not allocate multicast setup array\n");
542                 error = ENOMEM;
543                 goto err_late;
544         }
545
546         /* Some adapter-specific advanced features */
547         if (adapter->hw.mac.type >= e1000_i350) {
548                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
549                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
550                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
551                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
552                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
553                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
554                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
555                     adapter, 0, igb_sysctl_eee, "I",
556                     "Disable Energy Efficient Ethernet");
557                 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558                         if (adapter->hw.mac.type == e1000_i354)
559                                 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
560                         else
561                                 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
562                 }
563         }
564
565         /*
566         ** Start from a known state, this is
567         ** important in reading the nvm and
568         ** mac from that.
569         */
570         e1000_reset_hw(&adapter->hw);
571
572         /* Make sure we have a good EEPROM before we read from it */
573         if (((adapter->hw.mac.type != e1000_i210) &&
574             (adapter->hw.mac.type != e1000_i211)) &&
575             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
576                 /*
577                 ** Some PCI-E parts fail the first check due to
578                 ** the link being in sleep state, call it again,
579                 ** if it fails a second time its a real issue.
580                 */
581                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
582                         device_printf(dev,
583                             "The EEPROM Checksum Is Not Valid\n");
584                         error = EIO;
585                         goto err_late;
586                 }
587         }
588
589         /*
590         ** Copy the permanent MAC address out of the EEPROM
591         */
592         if (e1000_read_mac_addr(&adapter->hw) < 0) {
593                 device_printf(dev, "EEPROM read error while reading MAC"
594                     " address\n");
595                 error = EIO;
596                 goto err_late;
597         }
598         /* Check its sanity */
599         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
600                 device_printf(dev, "Invalid MAC address\n");
601                 error = EIO;
602                 goto err_late;
603         }
604
605         /* Setup OS specific network interface */
606         if (igb_setup_interface(dev, adapter) != 0)
607                 goto err_late;
608
609         /* Now get a good starting state */
610         igb_reset(adapter);
611
612         /* Initialize statistics */
613         igb_update_stats_counters(adapter);
614
615         adapter->hw.mac.get_link_status = 1;
616         igb_update_link_status(adapter);
617
618         /* Indicate SOL/IDER usage */
619         if (e1000_check_reset_block(&adapter->hw))
620                 device_printf(dev,
621                     "PHY reset is blocked due to SOL/IDER session.\n");
622
623         /* Determine if we have to control management hardware */
624         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
625
626         /*
627          * Setup Wake-on-Lan
628          */
629         /* APME bit in EEPROM is mapped to WUC.APME */
630         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
631         if (eeprom_data)
632                 adapter->wol = E1000_WUFC_MAG;
633
634         /* Register for VLAN events */
635         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
636              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
637         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
638              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
639
640         igb_add_hw_stats(adapter);
641
642         /* Tell the stack that the interface is not active */
643         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
644         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
645
646         adapter->led_dev = led_create(igb_led_func, adapter,
647             device_get_nameunit(dev));
648
649         /* 
650         ** Configure Interrupts
651         */
652         if ((adapter->msix > 1) && (igb_enable_msix))
653                 error = igb_allocate_msix(adapter);
654         else /* MSI or Legacy */
655                 error = igb_allocate_legacy(adapter);
656         if (error)
657                 goto err_late;
658
659 #ifdef DEV_NETMAP
660         igb_netmap_attach(adapter);
661 #endif /* DEV_NETMAP */
662         INIT_DEBUGOUT("igb_attach: end");
663
664         return (0);
665
666 err_late:
667         if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */
668                 return(error);
669         igb_free_transmit_structures(adapter);
670         igb_free_receive_structures(adapter);
671         igb_release_hw_control(adapter);
672 err_pci:
673         igb_free_pci_resources(adapter);
674         if (adapter->ifp != NULL)
675                 if_free(adapter->ifp);
676         free(adapter->mta, M_DEVBUF);
677         IGB_CORE_LOCK_DESTROY(adapter);
678
679         return (error);
680 }
681
682 /*********************************************************************
683  *  Device removal routine
684  *
685  *  The detach entry point is called when the driver is being removed.
686  *  This routine stops the adapter and deallocates all the resources
687  *  that were allocated for driver operation.
688  *
689  *  return 0 on success, positive on failure
690  *********************************************************************/
691
692 static int
693 igb_detach(device_t dev)
694 {
695         struct adapter  *adapter = device_get_softc(dev);
696         struct ifnet    *ifp = adapter->ifp;
697
698         INIT_DEBUGOUT("igb_detach: begin");
699
700         /* Make sure VLANS are not using driver */
701         if (adapter->ifp->if_vlantrunk != NULL) {
702                 device_printf(dev,"Vlan in use, detach first\n");
703                 return (EBUSY);
704         }
705
706         ether_ifdetach(adapter->ifp);
707
708         if (adapter->led_dev != NULL)
709                 led_destroy(adapter->led_dev);
710
711 #ifdef DEVICE_POLLING
712         if (ifp->if_capenable & IFCAP_POLLING)
713                 ether_poll_deregister(ifp);
714 #endif
715
716         IGB_CORE_LOCK(adapter);
717         adapter->in_detach = 1;
718         igb_stop(adapter);
719         IGB_CORE_UNLOCK(adapter);
720
721         e1000_phy_hw_reset(&adapter->hw);
722
723         /* Give control back to firmware */
724         igb_release_manageability(adapter);
725         igb_release_hw_control(adapter);
726
727         if (adapter->wol) {
728                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
729                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
730                 igb_enable_wakeup(dev);
731         }
732
733         /* Unregister VLAN events */
734         if (adapter->vlan_attach != NULL)
735                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
736         if (adapter->vlan_detach != NULL)
737                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
738
739         callout_drain(&adapter->timer);
740
741 #ifdef DEV_NETMAP
742         netmap_detach(adapter->ifp);
743 #endif /* DEV_NETMAP */
744         igb_free_pci_resources(adapter);
745         bus_generic_detach(dev);
746         if_free(ifp);
747
748         igb_free_transmit_structures(adapter);
749         igb_free_receive_structures(adapter);
750         if (adapter->mta != NULL)
751                 free(adapter->mta, M_DEVBUF);
752
753         IGB_CORE_LOCK_DESTROY(adapter);
754
755         return (0);
756 }
757
758 /*********************************************************************
759  *
760  *  Shutdown entry point
761  *
762  **********************************************************************/
763
764 static int
765 igb_shutdown(device_t dev)
766 {
767         return igb_suspend(dev);
768 }
769
770 /*
771  * Suspend/resume device methods.
772  */
773 static int
774 igb_suspend(device_t dev)
775 {
776         struct adapter *adapter = device_get_softc(dev);
777
778         IGB_CORE_LOCK(adapter);
779
780         igb_stop(adapter);
781
782         igb_release_manageability(adapter);
783         igb_release_hw_control(adapter);
784
785         if (adapter->wol) {
786                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
787                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
788                 igb_enable_wakeup(dev);
789         }
790
791         IGB_CORE_UNLOCK(adapter);
792
793         return bus_generic_suspend(dev);
794 }
795
796 static int
797 igb_resume(device_t dev)
798 {
799         struct adapter *adapter = device_get_softc(dev);
800         struct tx_ring  *txr = adapter->tx_rings;
801         struct ifnet *ifp = adapter->ifp;
802
803         IGB_CORE_LOCK(adapter);
804         igb_init_locked(adapter);
805         igb_init_manageability(adapter);
806
807         if ((ifp->if_flags & IFF_UP) &&
808             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
809                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
810                         IGB_TX_LOCK(txr);
811 #ifndef IGB_LEGACY_TX
812                         /* Process the stack queue only if not depleted */
813                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
814                             !drbr_empty(ifp, txr->br))
815                                 igb_mq_start_locked(ifp, txr);
816 #else
817                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
818                                 igb_start_locked(txr, ifp);
819 #endif
820                         IGB_TX_UNLOCK(txr);
821                 }
822         }
823         IGB_CORE_UNLOCK(adapter);
824
825         return bus_generic_resume(dev);
826 }
827
828
829 #ifdef IGB_LEGACY_TX
830
831 /*********************************************************************
832  *  Transmit entry point
833  *
834  *  igb_start is called by the stack to initiate a transmit.
835  *  The driver will remain in this routine as long as there are
836  *  packets to transmit and transmit resources are available.
837  *  In case resources are not available stack is notified and
838  *  the packet is requeued.
839  **********************************************************************/
840
841 static void
842 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
843 {
844         struct adapter  *adapter = ifp->if_softc;
845         struct mbuf     *m_head;
846
847         IGB_TX_LOCK_ASSERT(txr);
848
849         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
850             IFF_DRV_RUNNING)
851                 return;
852         if (!adapter->link_active)
853                 return;
854
855         /* Call cleanup if number of TX descriptors low */
856         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
857                 igb_txeof(txr);
858
859         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
860                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
861                         txr->queue_status |= IGB_QUEUE_DEPLETED;
862                         break;
863                 }
864                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
865                 if (m_head == NULL)
866                         break;
867                 /*
868                  *  Encapsulation can modify our pointer, and or make it
869                  *  NULL on failure.  In that event, we can't requeue.
870                  */
871                 if (igb_xmit(txr, &m_head)) {
872                         if (m_head != NULL)
873                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
874                         if (txr->tx_avail <= IGB_MAX_SCATTER)
875                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
876                         break;
877                 }
878
879                 /* Send a copy of the frame to the BPF listener */
880                 ETHER_BPF_MTAP(ifp, m_head);
881
882                 /* Set watchdog on */
883                 txr->watchdog_time = ticks;
884                 txr->queue_status |= IGB_QUEUE_WORKING;
885         }
886 }
887  
888 /*
889  * Legacy TX driver routine, called from the
890  * stack, always uses tx[0], and spins for it.
891  * Should not be used with multiqueue tx
892  */
893 static void
894 igb_start(struct ifnet *ifp)
895 {
896         struct adapter  *adapter = ifp->if_softc;
897         struct tx_ring  *txr = adapter->tx_rings;
898
899         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
900                 IGB_TX_LOCK(txr);
901                 igb_start_locked(txr, ifp);
902                 IGB_TX_UNLOCK(txr);
903         }
904         return;
905 }
906
907 #else /* ~IGB_LEGACY_TX */
908
909 /*
910 ** Multiqueue Transmit Entry:
911 **  quick turnaround to the stack
912 **
913 */
914 static int
915 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
916 {
917         struct adapter          *adapter = ifp->if_softc;
918         struct igb_queue        *que;
919         struct tx_ring          *txr;
920         int                     i, err = 0;
921
922         /* Which queue to use */
923         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
924                 i = m->m_pkthdr.flowid % adapter->num_queues;
925         else
926                 i = curcpu % adapter->num_queues;
927         txr = &adapter->tx_rings[i];
928         que = &adapter->queues[i];
929
930         err = drbr_enqueue(ifp, txr->br, m);
931         if (err)
932                 return (err);
933         if (IGB_TX_TRYLOCK(txr)) {
934                 igb_mq_start_locked(ifp, txr);
935                 IGB_TX_UNLOCK(txr);
936         } else
937                 taskqueue_enqueue(que->tq, &txr->txq_task);
938
939         return (0);
940 }
941
942 static int
943 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
944 {
945         struct adapter  *adapter = txr->adapter;
946         struct mbuf     *next;
947         int             err = 0, enq = 0;
948
949         IGB_TX_LOCK_ASSERT(txr);
950
951         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
952             adapter->link_active == 0)
953                 return (ENETDOWN);
954
955         /* Process the queue */
956         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
957                 if ((err = igb_xmit(txr, &next)) != 0) {
958                         if (next == NULL) {
959                                 /* It was freed, move forward */
960                                 drbr_advance(ifp, txr->br);
961                         } else {
962                                 /* 
963                                  * Still have one left, it may not be
964                                  * the same since the transmit function
965                                  * may have changed it.
966                                  */
967                                 drbr_putback(ifp, txr->br, next);
968                         }
969                         break;
970                 }
971                 drbr_advance(ifp, txr->br);
972                 enq++;
973                 ifp->if_obytes += next->m_pkthdr.len;
974                 if (next->m_flags & M_MCAST)
975                         ifp->if_omcasts++;
976                 ETHER_BPF_MTAP(ifp, next);
977                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
978                         break;
979         }
980         if (enq > 0) {
981                 /* Set the watchdog */
982                 txr->queue_status |= IGB_QUEUE_WORKING;
983                 txr->watchdog_time = ticks;
984         }
985         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
986                 igb_txeof(txr);
987         if (txr->tx_avail <= IGB_MAX_SCATTER)
988                 txr->queue_status |= IGB_QUEUE_DEPLETED;
989         return (err);
990 }
991
992 /*
993  * Called from a taskqueue to drain queued transmit packets.
994  */
995 static void
996 igb_deferred_mq_start(void *arg, int pending)
997 {
998         struct tx_ring *txr = arg;
999         struct adapter *adapter = txr->adapter;
1000         struct ifnet *ifp = adapter->ifp;
1001
1002         IGB_TX_LOCK(txr);
1003         if (!drbr_empty(ifp, txr->br))
1004                 igb_mq_start_locked(ifp, txr);
1005         IGB_TX_UNLOCK(txr);
1006 }
1007
1008 /*
1009 ** Flush all ring buffers
1010 */
1011 static void
1012 igb_qflush(struct ifnet *ifp)
1013 {
1014         struct adapter  *adapter = ifp->if_softc;
1015         struct tx_ring  *txr = adapter->tx_rings;
1016         struct mbuf     *m;
1017
1018         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1019                 IGB_TX_LOCK(txr);
1020                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1021                         m_freem(m);
1022                 IGB_TX_UNLOCK(txr);
1023         }
1024         if_qflush(ifp);
1025 }
1026 #endif /* ~IGB_LEGACY_TX */
1027
1028 /*********************************************************************
1029  *  Ioctl entry point
1030  *
1031  *  igb_ioctl is called when the user wants to configure the
1032  *  interface.
1033  *
1034  *  return 0 on success, positive on failure
1035  **********************************************************************/
1036
1037 static int
1038 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1039 {
1040         struct adapter  *adapter = ifp->if_softc;
1041         struct ifreq    *ifr = (struct ifreq *)data;
1042 #if defined(INET) || defined(INET6)
1043         struct ifaddr   *ifa = (struct ifaddr *)data;
1044 #endif
1045         bool            avoid_reset = FALSE;
1046         int             error = 0;
1047
1048         if (adapter->in_detach)
1049                 return (error);
1050
1051         switch (command) {
1052         case SIOCSIFADDR:
1053 #ifdef INET
1054                 if (ifa->ifa_addr->sa_family == AF_INET)
1055                         avoid_reset = TRUE;
1056 #endif
1057 #ifdef INET6
1058                 if (ifa->ifa_addr->sa_family == AF_INET6)
1059                         avoid_reset = TRUE;
1060 #endif
1061                 /*
1062                 ** Calling init results in link renegotiation,
1063                 ** so we avoid doing it when possible.
1064                 */
1065                 if (avoid_reset) {
1066                         ifp->if_flags |= IFF_UP;
1067                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1068                                 igb_init(adapter);
1069 #ifdef INET
1070                         if (!(ifp->if_flags & IFF_NOARP))
1071                                 arp_ifinit(ifp, ifa);
1072 #endif
1073                 } else
1074                         error = ether_ioctl(ifp, command, data);
1075                 break;
1076         case SIOCSIFMTU:
1077             {
1078                 int max_frame_size;
1079
1080                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1081
1082                 IGB_CORE_LOCK(adapter);
1083                 max_frame_size = 9234;
1084                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1085                     ETHER_CRC_LEN) {
1086                         IGB_CORE_UNLOCK(adapter);
1087                         error = EINVAL;
1088                         break;
1089                 }
1090
1091                 ifp->if_mtu = ifr->ifr_mtu;
1092                 adapter->max_frame_size =
1093                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1094                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1095                         igb_init_locked(adapter);
1096                 IGB_CORE_UNLOCK(adapter);
1097                 break;
1098             }
1099         case SIOCSIFFLAGS:
1100                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1101                     SIOCSIFFLAGS (Set Interface Flags)");
1102                 IGB_CORE_LOCK(adapter);
1103                 if (ifp->if_flags & IFF_UP) {
1104                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1105                                 if ((ifp->if_flags ^ adapter->if_flags) &
1106                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1107                                         igb_disable_promisc(adapter);
1108                                         igb_set_promisc(adapter);
1109                                 }
1110                         } else
1111                                 igb_init_locked(adapter);
1112                 } else
1113                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1114                                 igb_stop(adapter);
1115                 adapter->if_flags = ifp->if_flags;
1116                 IGB_CORE_UNLOCK(adapter);
1117                 break;
1118         case SIOCADDMULTI:
1119         case SIOCDELMULTI:
1120                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1121                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1122                         IGB_CORE_LOCK(adapter);
1123                         igb_disable_intr(adapter);
1124                         igb_set_multi(adapter);
1125 #ifdef DEVICE_POLLING
1126                         if (!(ifp->if_capenable & IFCAP_POLLING))
1127 #endif
1128                                 igb_enable_intr(adapter);
1129                         IGB_CORE_UNLOCK(adapter);
1130                 }
1131                 break;
1132         case SIOCSIFMEDIA:
1133                 /* Check SOL/IDER usage */
1134                 IGB_CORE_LOCK(adapter);
1135                 if (e1000_check_reset_block(&adapter->hw)) {
1136                         IGB_CORE_UNLOCK(adapter);
1137                         device_printf(adapter->dev, "Media change is"
1138                             " blocked due to SOL/IDER session.\n");
1139                         break;
1140                 }
1141                 IGB_CORE_UNLOCK(adapter);
1142         case SIOCGIFMEDIA:
1143                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1144                     SIOCxIFMEDIA (Get/Set Interface Media)");
1145                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1146                 break;
1147         case SIOCSIFCAP:
1148             {
1149                 int mask, reinit;
1150
1151                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1152                 reinit = 0;
1153                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1154 #ifdef DEVICE_POLLING
1155                 if (mask & IFCAP_POLLING) {
1156                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1157                                 error = ether_poll_register(igb_poll, ifp);
1158                                 if (error)
1159                                         return (error);
1160                                 IGB_CORE_LOCK(adapter);
1161                                 igb_disable_intr(adapter);
1162                                 ifp->if_capenable |= IFCAP_POLLING;
1163                                 IGB_CORE_UNLOCK(adapter);
1164                         } else {
1165                                 error = ether_poll_deregister(ifp);
1166                                 /* Enable interrupt even in error case */
1167                                 IGB_CORE_LOCK(adapter);
1168                                 igb_enable_intr(adapter);
1169                                 ifp->if_capenable &= ~IFCAP_POLLING;
1170                                 IGB_CORE_UNLOCK(adapter);
1171                         }
1172                 }
1173 #endif
1174 #if __FreeBSD_version >= 1000000
1175                 /* HW cannot turn these on/off separately */
1176                 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1177                         ifp->if_capenable ^= IFCAP_RXCSUM;
1178                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1179                         reinit = 1;
1180                 }
1181                 if (mask & IFCAP_TXCSUM) {
1182                         ifp->if_capenable ^= IFCAP_TXCSUM;
1183                         reinit = 1;
1184                 }
1185                 if (mask & IFCAP_TXCSUM_IPV6) {
1186                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1187                         reinit = 1;
1188                 }
1189 #else
1190                 if (mask & IFCAP_HWCSUM) {
1191                         ifp->if_capenable ^= IFCAP_HWCSUM;
1192                         reinit = 1;
1193                 }
1194 #endif
1195                 if (mask & IFCAP_TSO4) {
1196                         ifp->if_capenable ^= IFCAP_TSO4;
1197                         reinit = 1;
1198                 }
1199                 if (mask & IFCAP_TSO6) {
1200                         ifp->if_capenable ^= IFCAP_TSO6;
1201                         reinit = 1;
1202                 }
1203                 if (mask & IFCAP_VLAN_HWTAGGING) {
1204                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1205                         reinit = 1;
1206                 }
1207                 if (mask & IFCAP_VLAN_HWFILTER) {
1208                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1209                         reinit = 1;
1210                 }
1211                 if (mask & IFCAP_VLAN_HWTSO) {
1212                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1213                         reinit = 1;
1214                 }
1215                 if (mask & IFCAP_LRO) {
1216                         ifp->if_capenable ^= IFCAP_LRO;
1217                         reinit = 1;
1218                 }
1219                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1220                         igb_init(adapter);
1221                 VLAN_CAPABILITIES(ifp);
1222                 break;
1223             }
1224
1225         default:
1226                 error = ether_ioctl(ifp, command, data);
1227                 break;
1228         }
1229
1230         return (error);
1231 }
1232
1233
1234 /*********************************************************************
1235  *  Init entry point
1236  *
1237  *  This routine is used in two ways. It is used by the stack as
1238  *  init entry point in network interface structure. It is also used
1239  *  by the driver as a hw/sw initialization routine to get to a
1240  *  consistent state.
1241  *
1242  *  return 0 on success, positive on failure
1243  **********************************************************************/
1244
1245 static void
1246 igb_init_locked(struct adapter *adapter)
1247 {
1248         struct ifnet    *ifp = adapter->ifp;
1249         device_t        dev = adapter->dev;
1250
1251         INIT_DEBUGOUT("igb_init: begin");
1252
1253         IGB_CORE_LOCK_ASSERT(adapter);
1254
1255         igb_disable_intr(adapter);
1256         callout_stop(&adapter->timer);
1257
1258         /* Get the latest mac address, User can use a LAA */
1259         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1260               ETHER_ADDR_LEN);
1261
1262         /* Put the address into the Receive Address Array */
1263         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1264
1265         igb_reset(adapter);
1266         igb_update_link_status(adapter);
1267
1268         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1269
1270         /* Set hardware offload abilities */
1271         ifp->if_hwassist = 0;
1272         if (ifp->if_capenable & IFCAP_TXCSUM) {
1273 #if __FreeBSD_version >= 1000000
1274                 ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
1275                 if (adapter->hw.mac.type != e1000_82575)
1276                         ifp->if_hwassist |= CSUM_IP_SCTP;
1277 #else
1278                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1279 #if __FreeBSD_version >= 800000
1280                 if (adapter->hw.mac.type != e1000_82575)
1281                         ifp->if_hwassist |= CSUM_SCTP;
1282 #endif
1283 #endif
1284         }
1285
1286 #if __FreeBSD_version >= 1000000
1287         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
1288                 ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
1289                 if (adapter->hw.mac.type != e1000_82575)
1290                         ifp->if_hwassist |= CSUM_IP6_SCTP;
1291         }
1292 #endif
1293         if (ifp->if_capenable & IFCAP_TSO)
1294                 ifp->if_hwassist |= CSUM_TSO;
1295
1296         /* Clear bad data from Rx FIFOs */
1297         e1000_rx_fifo_flush_82575(&adapter->hw);
1298
1299         /* Configure for OS presence */
1300         igb_init_manageability(adapter);
1301
1302         /* Prepare transmit descriptors and buffers */
1303         igb_setup_transmit_structures(adapter);
1304         igb_initialize_transmit_units(adapter);
1305
1306         /* Setup Multicast table */
1307         igb_set_multi(adapter);
1308
1309         /*
1310         ** Figure out the desired mbuf pool
1311         ** for doing jumbo/packetsplit
1312         */
1313         if (adapter->max_frame_size <= 2048)
1314                 adapter->rx_mbuf_sz = MCLBYTES;
1315         else if (adapter->max_frame_size <= 4096)
1316                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1317         else
1318                 adapter->rx_mbuf_sz = MJUM9BYTES;
1319
1320         /* Prepare receive descriptors and buffers */
1321         if (igb_setup_receive_structures(adapter)) {
1322                 device_printf(dev, "Could not setup receive structures\n");
1323                 return;
1324         }
1325         igb_initialize_receive_units(adapter);
1326
1327         /* Enable VLAN support */
1328         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1329                 igb_setup_vlan_hw_support(adapter);
1330                                 
1331         /* Don't lose promiscuous settings */
1332         igb_set_promisc(adapter);
1333
1334         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1335         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1336
1337         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1338         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1339
1340         if (adapter->msix > 1) /* Set up queue routing */
1341                 igb_configure_queues(adapter);
1342
1343         /* this clears any pending interrupts */
1344         E1000_READ_REG(&adapter->hw, E1000_ICR);
1345 #ifdef DEVICE_POLLING
1346         /*
1347          * Only enable interrupts if we are not polling, make sure
1348          * they are off otherwise.
1349          */
1350         if (ifp->if_capenable & IFCAP_POLLING)
1351                 igb_disable_intr(adapter);
1352         else
1353 #endif /* DEVICE_POLLING */
1354         {
1355                 igb_enable_intr(adapter);
1356                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1357         }
1358
1359         /* Set Energy Efficient Ethernet */
1360         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1361                 if (adapter->hw.mac.type == e1000_i354)
1362                         e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1363                 else
1364                         e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1365         }
1366 }
1367
1368 static void
1369 igb_init(void *arg)
1370 {
1371         struct adapter *adapter = arg;
1372
1373         IGB_CORE_LOCK(adapter);
1374         igb_init_locked(adapter);
1375         IGB_CORE_UNLOCK(adapter);
1376 }
1377
1378
1379 static void
1380 igb_handle_que(void *context, int pending)
1381 {
1382         struct igb_queue *que = context;
1383         struct adapter *adapter = que->adapter;
1384         struct tx_ring *txr = que->txr;
1385         struct ifnet    *ifp = adapter->ifp;
1386
1387         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1388                 bool    more;
1389
1390                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1391
1392                 IGB_TX_LOCK(txr);
1393                 igb_txeof(txr);
1394 #ifndef IGB_LEGACY_TX
1395                 /* Process the stack queue only if not depleted */
1396                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1397                     !drbr_empty(ifp, txr->br))
1398                         igb_mq_start_locked(ifp, txr);
1399 #else
1400                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1401                         igb_start_locked(txr, ifp);
1402 #endif
1403                 IGB_TX_UNLOCK(txr);
1404                 /* Do we need another? */
1405                 if (more) {
1406                         taskqueue_enqueue(que->tq, &que->que_task);
1407                         return;
1408                 }
1409         }
1410
1411 #ifdef DEVICE_POLLING
1412         if (ifp->if_capenable & IFCAP_POLLING)
1413                 return;
1414 #endif
1415         /* Reenable this interrupt */
1416         if (que->eims)
1417                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1418         else
1419                 igb_enable_intr(adapter);
1420 }
1421
1422 /* Deal with link in a sleepable context */
1423 static void
1424 igb_handle_link(void *context, int pending)
1425 {
1426         struct adapter *adapter = context;
1427
1428         IGB_CORE_LOCK(adapter);
1429         igb_handle_link_locked(adapter);
1430         IGB_CORE_UNLOCK(adapter);
1431 }
1432
1433 static void
1434 igb_handle_link_locked(struct adapter *adapter)
1435 {
1436         struct tx_ring  *txr = adapter->tx_rings;
1437         struct ifnet *ifp = adapter->ifp;
1438
1439         IGB_CORE_LOCK_ASSERT(adapter);
1440         adapter->hw.mac.get_link_status = 1;
1441         igb_update_link_status(adapter);
1442         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1443                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1444                         IGB_TX_LOCK(txr);
1445 #ifndef IGB_LEGACY_TX
1446                         /* Process the stack queue only if not depleted */
1447                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1448                             !drbr_empty(ifp, txr->br))
1449                                 igb_mq_start_locked(ifp, txr);
1450 #else
1451                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1452                                 igb_start_locked(txr, ifp);
1453 #endif
1454                         IGB_TX_UNLOCK(txr);
1455                 }
1456         }
1457 }
1458
1459 /*********************************************************************
1460  *
1461  *  MSI/Legacy Deferred
1462  *  Interrupt Service routine  
1463  *
1464  *********************************************************************/
1465 static int
1466 igb_irq_fast(void *arg)
1467 {
1468         struct adapter          *adapter = arg;
1469         struct igb_queue        *que = adapter->queues;
1470         u32                     reg_icr;
1471
1472
1473         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1474
1475         /* Hot eject?  */
1476         if (reg_icr == 0xffffffff)
1477                 return FILTER_STRAY;
1478
1479         /* Definitely not our interrupt.  */
1480         if (reg_icr == 0x0)
1481                 return FILTER_STRAY;
1482
1483         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1484                 return FILTER_STRAY;
1485
1486         /*
1487          * Mask interrupts until the taskqueue is finished running.  This is
1488          * cheap, just assume that it is needed.  This also works around the
1489          * MSI message reordering errata on certain systems.
1490          */
1491         igb_disable_intr(adapter);
1492         taskqueue_enqueue(que->tq, &que->que_task);
1493
1494         /* Link status change */
1495         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1496                 taskqueue_enqueue(que->tq, &adapter->link_task);
1497
1498         if (reg_icr & E1000_ICR_RXO)
1499                 adapter->rx_overruns++;
1500         return FILTER_HANDLED;
1501 }
1502
1503 #ifdef DEVICE_POLLING
1504 #if __FreeBSD_version >= 800000
1505 #define POLL_RETURN_COUNT(a) (a)
1506 static int
1507 #else
1508 #define POLL_RETURN_COUNT(a)
1509 static void
1510 #endif
1511 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1512 {
1513         struct adapter          *adapter = ifp->if_softc;
1514         struct igb_queue        *que;
1515         struct tx_ring          *txr;
1516         u32                     reg_icr, rx_done = 0;
1517         u32                     loop = IGB_MAX_LOOP;
1518         bool                    more;
1519
1520         IGB_CORE_LOCK(adapter);
1521         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1522                 IGB_CORE_UNLOCK(adapter);
1523                 return POLL_RETURN_COUNT(rx_done);
1524         }
1525
1526         if (cmd == POLL_AND_CHECK_STATUS) {
1527                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1528                 /* Link status change */
1529                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1530                         igb_handle_link_locked(adapter);
1531
1532                 if (reg_icr & E1000_ICR_RXO)
1533                         adapter->rx_overruns++;
1534         }
1535         IGB_CORE_UNLOCK(adapter);
1536
1537         for (int i = 0; i < adapter->num_queues; i++) {
1538                 que = &adapter->queues[i];
1539                 txr = que->txr;
1540
1541                 igb_rxeof(que, count, &rx_done);
1542
1543                 IGB_TX_LOCK(txr);
1544                 do {
1545                         more = igb_txeof(txr);
1546                 } while (loop-- && more);
1547 #ifndef IGB_LEGACY_TX
1548                 if (!drbr_empty(ifp, txr->br))
1549                         igb_mq_start_locked(ifp, txr);
1550 #else
1551                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1552                         igb_start_locked(txr, ifp);
1553 #endif
1554                 IGB_TX_UNLOCK(txr);
1555         }
1556
1557         return POLL_RETURN_COUNT(rx_done);
1558 }
1559 #endif /* DEVICE_POLLING */
1560
1561 /*********************************************************************
1562  *
1563  *  MSIX Que Interrupt Service routine
1564  *
1565  **********************************************************************/
1566 static void
1567 igb_msix_que(void *arg)
1568 {
1569         struct igb_queue *que = arg;
1570         struct adapter *adapter = que->adapter;
1571         struct ifnet   *ifp = adapter->ifp;
1572         struct tx_ring *txr = que->txr;
1573         struct rx_ring *rxr = que->rxr;
1574         u32             newitr = 0;
1575         bool            more_rx;
1576
1577         /* Ignore spurious interrupts */
1578         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1579                 return;
1580
1581         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1582         ++que->irqs;
1583
1584         IGB_TX_LOCK(txr);
1585         igb_txeof(txr);
1586 #ifndef IGB_LEGACY_TX
1587         /* Process the stack queue only if not depleted */
1588         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1589             !drbr_empty(ifp, txr->br))
1590                 igb_mq_start_locked(ifp, txr);
1591 #else
1592         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1593                 igb_start_locked(txr, ifp);
1594 #endif
1595         IGB_TX_UNLOCK(txr);
1596
1597         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1598
1599         if (adapter->enable_aim == FALSE)
1600                 goto no_calc;
1601         /*
1602         ** Do Adaptive Interrupt Moderation:
1603         **  - Write out last calculated setting
1604         **  - Calculate based on average size over
1605         **    the last interval.
1606         */
1607         if (que->eitr_setting)
1608                 E1000_WRITE_REG(&adapter->hw,
1609                     E1000_EITR(que->msix), que->eitr_setting);
1610  
1611         que->eitr_setting = 0;
1612
1613         /* Idle, do nothing */
1614         if ((txr->bytes == 0) && (rxr->bytes == 0))
1615                 goto no_calc;
1616                                 
1617         /* Used half Default if sub-gig */
1618         if (adapter->link_speed != 1000)
1619                 newitr = IGB_DEFAULT_ITR / 2;
1620         else {
1621                 if ((txr->bytes) && (txr->packets))
1622                         newitr = txr->bytes/txr->packets;
1623                 if ((rxr->bytes) && (rxr->packets))
1624                         newitr = max(newitr,
1625                             (rxr->bytes / rxr->packets));
1626                 newitr += 24; /* account for hardware frame, crc */
1627                 /* set an upper boundary */
1628                 newitr = min(newitr, 3000);
1629                 /* Be nice to the mid range */
1630                 if ((newitr > 300) && (newitr < 1200))
1631                         newitr = (newitr / 3);
1632                 else
1633                         newitr = (newitr / 2);
1634         }
1635         newitr &= 0x7FFC;  /* Mask invalid bits */
1636         if (adapter->hw.mac.type == e1000_82575)
1637                 newitr |= newitr << 16;
1638         else
1639                 newitr |= E1000_EITR_CNT_IGNR;
1640                  
1641         /* save for next interrupt */
1642         que->eitr_setting = newitr;
1643
1644         /* Reset state */
1645         txr->bytes = 0;
1646         txr->packets = 0;
1647         rxr->bytes = 0;
1648         rxr->packets = 0;
1649
1650 no_calc:
1651         /* Schedule a clean task if needed*/
1652         if (more_rx)
1653                 taskqueue_enqueue(que->tq, &que->que_task);
1654         else
1655                 /* Reenable this interrupt */
1656                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1657         return;
1658 }
1659
1660
1661 /*********************************************************************
1662  *
1663  *  MSIX Link Interrupt Service routine
1664  *
1665  **********************************************************************/
1666
1667 static void
1668 igb_msix_link(void *arg)
1669 {
1670         struct adapter  *adapter = arg;
1671         u32             icr;
1672
1673         ++adapter->link_irq;
1674         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1675         if (!(icr & E1000_ICR_LSC))
1676                 goto spurious;
1677         igb_handle_link(adapter, 0);
1678
1679 spurious:
1680         /* Rearm */
1681         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1682         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1683         return;
1684 }
1685
1686
1687 /*********************************************************************
1688  *
1689  *  Media Ioctl callback
1690  *
1691  *  This routine is called whenever the user queries the status of
1692  *  the interface using ifconfig.
1693  *
1694  **********************************************************************/
1695 static void
1696 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1697 {
1698         struct adapter *adapter = ifp->if_softc;
1699
1700         INIT_DEBUGOUT("igb_media_status: begin");
1701
1702         IGB_CORE_LOCK(adapter);
1703         igb_update_link_status(adapter);
1704
1705         ifmr->ifm_status = IFM_AVALID;
1706         ifmr->ifm_active = IFM_ETHER;
1707
1708         if (!adapter->link_active) {
1709                 IGB_CORE_UNLOCK(adapter);
1710                 return;
1711         }
1712
1713         ifmr->ifm_status |= IFM_ACTIVE;
1714
1715         switch (adapter->link_speed) {
1716         case 10:
1717                 ifmr->ifm_active |= IFM_10_T;
1718                 break;
1719         case 100:
1720                 /*
1721                 ** Support for 100Mb SFP - these are Fiber 
1722                 ** but the media type appears as serdes
1723                 */
1724                 if (adapter->hw.phy.media_type ==
1725                     e1000_media_type_internal_serdes)
1726                         ifmr->ifm_active |= IFM_100_FX;
1727                 else
1728                         ifmr->ifm_active |= IFM_100_TX;
1729                 break;
1730         case 1000:
1731                 ifmr->ifm_active |= IFM_1000_T;
1732                 break;
1733         case 2500:
1734                 ifmr->ifm_active |= IFM_2500_SX;
1735                 break;
1736         }
1737
1738         if (adapter->link_duplex == FULL_DUPLEX)
1739                 ifmr->ifm_active |= IFM_FDX;
1740         else
1741                 ifmr->ifm_active |= IFM_HDX;
1742
1743         IGB_CORE_UNLOCK(adapter);
1744 }
1745
1746 /*********************************************************************
1747  *
1748  *  Media Ioctl callback
1749  *
1750  *  This routine is called when the user changes speed/duplex using
1751  *  media/mediopt option with ifconfig.
1752  *
1753  **********************************************************************/
1754 static int
1755 igb_media_change(struct ifnet *ifp)
1756 {
1757         struct adapter *adapter = ifp->if_softc;
1758         struct ifmedia  *ifm = &adapter->media;
1759
1760         INIT_DEBUGOUT("igb_media_change: begin");
1761
1762         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1763                 return (EINVAL);
1764
1765         IGB_CORE_LOCK(adapter);
1766         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1767         case IFM_AUTO:
1768                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1769                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1770                 break;
1771         case IFM_1000_LX:
1772         case IFM_1000_SX:
1773         case IFM_1000_T:
1774                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1775                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1776                 break;
1777         case IFM_100_TX:
1778                 adapter->hw.mac.autoneg = FALSE;
1779                 adapter->hw.phy.autoneg_advertised = 0;
1780                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1781                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1782                 else
1783                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1784                 break;
1785         case IFM_10_T:
1786                 adapter->hw.mac.autoneg = FALSE;
1787                 adapter->hw.phy.autoneg_advertised = 0;
1788                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1789                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1790                 else
1791                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1792                 break;
1793         default:
1794                 device_printf(adapter->dev, "Unsupported media type\n");
1795         }
1796
1797         igb_init_locked(adapter);
1798         IGB_CORE_UNLOCK(adapter);
1799
1800         return (0);
1801 }
1802
1803
1804 /*********************************************************************
1805  *
1806  *  This routine maps the mbufs to Advanced TX descriptors.
1807  *  
1808  **********************************************************************/
1809 static int
1810 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1811 {
1812         struct adapter  *adapter = txr->adapter;
1813         u32             olinfo_status = 0, cmd_type_len;
1814         int             i, j, error, nsegs;
1815         int             first;
1816         bool            remap = TRUE;
1817         struct mbuf     *m_head;
1818         bus_dma_segment_t segs[IGB_MAX_SCATTER];
1819         bus_dmamap_t    map;
1820         struct igb_tx_buf *txbuf;
1821         union e1000_adv_tx_desc *txd = NULL;
1822
1823         m_head = *m_headp;
1824
1825         /* Basic descriptor defines */
1826         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1827             E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1828
1829         if (m_head->m_flags & M_VLANTAG)
1830                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1831
1832         /*
1833          * Important to capture the first descriptor
1834          * used because it will contain the index of
1835          * the one we tell the hardware to report back
1836          */
1837         first = txr->next_avail_desc;
1838         txbuf = &txr->tx_buffers[first];
1839         map = txbuf->map;
1840
1841         /*
1842          * Map the packet for DMA.
1843          */
1844 retry:
1845         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1846             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1847
1848         if (__predict_false(error)) {
1849                 struct mbuf *m;
1850
1851                 switch (error) {
1852                 case EFBIG:
1853                         /* Try it again? - one try */
1854                         if (remap == TRUE) {
1855                                 remap = FALSE;
1856                                 m = m_collapse(*m_headp, M_NOWAIT,
1857                                     IGB_MAX_SCATTER);
1858                                 if (m == NULL) {
1859                                         adapter->mbuf_defrag_failed++;
1860                                         m_freem(*m_headp);
1861                                         *m_headp = NULL;
1862                                         return (ENOBUFS);
1863                                 }
1864                                 *m_headp = m;
1865                                 goto retry;
1866                         } else
1867                                 return (error);
1868                 default:
1869                         txr->no_tx_dma_setup++;
1870                         m_freem(*m_headp);
1871                         *m_headp = NULL;
1872                         return (error);
1873                 }
1874         }
1875
1876         /* Make certain there are enough descriptors */
1877         if (txr->tx_avail < (nsegs + 2)) {
1878                 txr->no_desc_avail++;
1879                 bus_dmamap_unload(txr->txtag, map);
1880                 return (ENOBUFS);
1881         }
1882         m_head = *m_headp;
1883
1884         /*
1885         ** Set up the appropriate offload context
1886         ** this will consume the first descriptor
1887         */
1888         error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1889         if (__predict_false(error)) {
1890                 m_freem(*m_headp);
1891                 *m_headp = NULL;
1892                 return (error);
1893         }
1894
1895         /* 82575 needs the queue index added */
1896         if (adapter->hw.mac.type == e1000_82575)
1897                 olinfo_status |= txr->me << 4;
1898
1899         i = txr->next_avail_desc;
1900         for (j = 0; j < nsegs; j++) {
1901                 bus_size_t seglen;
1902                 bus_addr_t segaddr;
1903
1904                 txbuf = &txr->tx_buffers[i];
1905                 txd = &txr->tx_base[i];
1906                 seglen = segs[j].ds_len;
1907                 segaddr = htole64(segs[j].ds_addr);
1908
1909                 txd->read.buffer_addr = segaddr;
1910                 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1911                     cmd_type_len | seglen);
1912                 txd->read.olinfo_status = htole32(olinfo_status);
1913
1914                 if (++i == txr->num_desc)
1915                         i = 0;
1916         }
1917
1918         txd->read.cmd_type_len |=
1919             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1920         txr->tx_avail -= nsegs;
1921         txr->next_avail_desc = i;
1922
1923         txbuf->m_head = m_head;
1924         /*
1925         ** Here we swap the map so the last descriptor,
1926         ** which gets the completion interrupt has the
1927         ** real map, and the first descriptor gets the
1928         ** unused map from this descriptor.
1929         */
1930         txr->tx_buffers[first].map = txbuf->map;
1931         txbuf->map = map;
1932         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1933
1934         /* Set the EOP descriptor that will be marked done */
1935         txbuf = &txr->tx_buffers[first];
1936         txbuf->eop = txd;
1937
1938         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1939             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1940         /*
1941          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1942          * hardware that this frame is available to transmit.
1943          */
1944         ++txr->total_packets;
1945         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1946
1947         return (0);
1948 }
1949 static void
1950 igb_set_promisc(struct adapter *adapter)
1951 {
1952         struct ifnet    *ifp = adapter->ifp;
1953         struct e1000_hw *hw = &adapter->hw;
1954         u32             reg;
1955
1956         if (adapter->vf_ifp) {
1957                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1958                 return;
1959         }
1960
1961         reg = E1000_READ_REG(hw, E1000_RCTL);
1962         if (ifp->if_flags & IFF_PROMISC) {
1963                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1964                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1965         } else if (ifp->if_flags & IFF_ALLMULTI) {
1966                 reg |= E1000_RCTL_MPE;
1967                 reg &= ~E1000_RCTL_UPE;
1968                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1969         }
1970 }
1971
1972 static void
1973 igb_disable_promisc(struct adapter *adapter)
1974 {
1975         struct e1000_hw *hw = &adapter->hw;
1976         struct ifnet    *ifp = adapter->ifp;
1977         u32             reg;
1978         int             mcnt = 0;
1979
1980         if (adapter->vf_ifp) {
1981                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1982                 return;
1983         }
1984         reg = E1000_READ_REG(hw, E1000_RCTL);
1985         reg &=  (~E1000_RCTL_UPE);
1986         if (ifp->if_flags & IFF_ALLMULTI)
1987                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1988         else {
1989                 struct  ifmultiaddr *ifma;
1990 #if __FreeBSD_version < 800000
1991                 IF_ADDR_LOCK(ifp);
1992 #else   
1993                 if_maddr_rlock(ifp);
1994 #endif
1995                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1996                         if (ifma->ifma_addr->sa_family != AF_LINK)
1997                                 continue;
1998                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1999                                 break;
2000                         mcnt++;
2001                 }
2002 #if __FreeBSD_version < 800000
2003                 IF_ADDR_UNLOCK(ifp);
2004 #else
2005                 if_maddr_runlock(ifp);
2006 #endif
2007         }
2008         /* Don't disable if in MAX groups */
2009         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2010                 reg &=  (~E1000_RCTL_MPE);
2011         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2012 }
2013
2014
2015 /*********************************************************************
2016  *  Multicast Update
2017  *
2018  *  This routine is called whenever multicast address list is updated.
2019  *
2020  **********************************************************************/
2021
2022 static void
2023 igb_set_multi(struct adapter *adapter)
2024 {
2025         struct ifnet    *ifp = adapter->ifp;
2026         struct ifmultiaddr *ifma;
2027         u32 reg_rctl = 0;
2028         u8  *mta;
2029
2030         int mcnt = 0;
2031
2032         IOCTL_DEBUGOUT("igb_set_multi: begin");
2033
2034         mta = adapter->mta;
2035         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2036             MAX_NUM_MULTICAST_ADDRESSES);
2037
2038 #if __FreeBSD_version < 800000
2039         IF_ADDR_LOCK(ifp);
2040 #else
2041         if_maddr_rlock(ifp);
2042 #endif
2043         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2044                 if (ifma->ifma_addr->sa_family != AF_LINK)
2045                         continue;
2046
2047                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2048                         break;
2049
2050                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2051                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2052                 mcnt++;
2053         }
2054 #if __FreeBSD_version < 800000
2055         IF_ADDR_UNLOCK(ifp);
2056 #else
2057         if_maddr_runlock(ifp);
2058 #endif
2059
2060         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2061                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2062                 reg_rctl |= E1000_RCTL_MPE;
2063                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2064         } else
2065                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2066 }
2067
2068
2069 /*********************************************************************
2070  *  Timer routine:
2071  *      This routine checks for link status,
2072  *      updates statistics, and does the watchdog.
2073  *
2074  **********************************************************************/
2075
2076 static void
2077 igb_local_timer(void *arg)
2078 {
2079         struct adapter          *adapter = arg;
2080         device_t                dev = adapter->dev;
2081         struct ifnet            *ifp = adapter->ifp;
2082         struct tx_ring          *txr = adapter->tx_rings;
2083         struct igb_queue        *que = adapter->queues;
2084         int                     hung = 0, busy = 0;
2085
2086
2087         IGB_CORE_LOCK_ASSERT(adapter);
2088
2089         igb_update_link_status(adapter);
2090         igb_update_stats_counters(adapter);
2091
2092         /*
2093         ** Check the TX queues status
2094         **      - central locked handling of OACTIVE
2095         **      - watchdog only if all queues show hung
2096         */
2097         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2098                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2099                     (adapter->pause_frames == 0))
2100                         ++hung;
2101                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2102                         ++busy;
2103                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2104                         taskqueue_enqueue(que->tq, &que->que_task);
2105         }
2106         if (hung == adapter->num_queues)
2107                 goto timeout;
2108         if (busy == adapter->num_queues)
2109                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2110         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2111             (busy < adapter->num_queues))
2112                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2113
2114         adapter->pause_frames = 0;
2115         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2116 #ifndef DEVICE_POLLING
2117         /* Schedule all queue interrupts - deadlock protection */
2118         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2119 #endif
2120         return;
2121
2122 timeout:
2123         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2124         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2125             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2126             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2127         device_printf(dev,"TX(%d) desc avail = %d,"
2128             "Next TX to Clean = %d\n",
2129             txr->me, txr->tx_avail, txr->next_to_clean);
2130         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2131         adapter->watchdog_events++;
2132         igb_init_locked(adapter);
2133 }
2134
2135 static void
2136 igb_update_link_status(struct adapter *adapter)
2137 {
2138         struct e1000_hw         *hw = &adapter->hw;
2139         struct e1000_fc_info    *fc = &hw->fc;
2140         struct ifnet            *ifp = adapter->ifp;
2141         device_t                dev = adapter->dev;
2142         struct tx_ring          *txr = adapter->tx_rings;
2143         u32                     link_check, thstat, ctrl;
2144         char                    *flowctl = NULL;
2145
2146         link_check = thstat = ctrl = 0;
2147
2148         /* Get the cached link value or read for real */
2149         switch (hw->phy.media_type) {
2150         case e1000_media_type_copper:
2151                 if (hw->mac.get_link_status) {
2152                         /* Do the work to read phy */
2153                         e1000_check_for_link(hw);
2154                         link_check = !hw->mac.get_link_status;
2155                 } else
2156                         link_check = TRUE;
2157                 break;
2158         case e1000_media_type_fiber:
2159                 e1000_check_for_link(hw);
2160                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2161                                  E1000_STATUS_LU);
2162                 break;
2163         case e1000_media_type_internal_serdes:
2164                 e1000_check_for_link(hw);
2165                 link_check = adapter->hw.mac.serdes_has_link;
2166                 break;
2167         /* VF device is type_unknown */
2168         case e1000_media_type_unknown:
2169                 e1000_check_for_link(hw);
2170                 link_check = !hw->mac.get_link_status;
2171                 /* Fall thru */
2172         default:
2173                 break;
2174         }
2175
2176         /* Check for thermal downshift or shutdown */
2177         if (hw->mac.type == e1000_i350) {
2178                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2179                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2180         }
2181
2182         /* Get the flow control for display */
2183         switch (fc->current_mode) {
2184         case e1000_fc_rx_pause:
2185                 flowctl = "RX";
2186                 break;  
2187         case e1000_fc_tx_pause:
2188                 flowctl = "TX";
2189                 break;  
2190         case e1000_fc_full:
2191                 flowctl = "Full";
2192                 break;  
2193         case e1000_fc_none:
2194         default:
2195                 flowctl = "None";
2196                 break;  
2197         }
2198
2199         /* Now we check if a transition has happened */
2200         if (link_check && (adapter->link_active == 0)) {
2201                 e1000_get_speed_and_duplex(&adapter->hw, 
2202                     &adapter->link_speed, &adapter->link_duplex);
2203                 if (bootverbose)
2204                         device_printf(dev, "Link is up %d Mbps %s,"
2205                             " Flow Control: %s\n",
2206                             adapter->link_speed,
2207                             ((adapter->link_duplex == FULL_DUPLEX) ?
2208                             "Full Duplex" : "Half Duplex"), flowctl);
2209                 adapter->link_active = 1;
2210                 ifp->if_baudrate = adapter->link_speed * 1000000;
2211                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2212                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2213                         device_printf(dev, "Link: thermal downshift\n");
2214                 /* Delay Link Up for Phy update */
2215                 if (((hw->mac.type == e1000_i210) ||
2216                     (hw->mac.type == e1000_i211)) &&
2217                     (hw->phy.id == I210_I_PHY_ID))
2218                         msec_delay(I210_LINK_DELAY);
2219                 /* Reset if the media type changed. */
2220                 if (hw->dev_spec._82575.media_changed) {
2221                         hw->dev_spec._82575.media_changed = false;
2222                         adapter->flags |= IGB_MEDIA_RESET;
2223                         igb_reset(adapter);
2224                 }       
2225                 /* This can sleep */
2226                 if_link_state_change(ifp, LINK_STATE_UP);
2227         } else if (!link_check && (adapter->link_active == 1)) {
2228                 ifp->if_baudrate = adapter->link_speed = 0;
2229                 adapter->link_duplex = 0;
2230                 if (bootverbose)
2231                         device_printf(dev, "Link is Down\n");
2232                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2233                     (thstat & E1000_THSTAT_PWR_DOWN))
2234                         device_printf(dev, "Link: thermal shutdown\n");
2235                 adapter->link_active = 0;
2236                 /* This can sleep */
2237                 if_link_state_change(ifp, LINK_STATE_DOWN);
2238                 /* Reset queue state */
2239                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2240                         txr->queue_status = IGB_QUEUE_IDLE;
2241         }
2242 }
2243
2244 /*********************************************************************
2245  *
2246  *  This routine disables all traffic on the adapter by issuing a
2247  *  global reset on the MAC and deallocates TX/RX buffers.
2248  *
2249  **********************************************************************/
2250
2251 static void
2252 igb_stop(void *arg)
2253 {
2254         struct adapter  *adapter = arg;
2255         struct ifnet    *ifp = adapter->ifp;
2256         struct tx_ring *txr = adapter->tx_rings;
2257
2258         IGB_CORE_LOCK_ASSERT(adapter);
2259
2260         INIT_DEBUGOUT("igb_stop: begin");
2261
2262         igb_disable_intr(adapter);
2263
2264         callout_stop(&adapter->timer);
2265
2266         /* Tell the stack that the interface is no longer active */
2267         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2268         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2269
2270         /* Disarm watchdog timer. */
2271         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2272                 IGB_TX_LOCK(txr);
2273                 txr->queue_status = IGB_QUEUE_IDLE;
2274                 IGB_TX_UNLOCK(txr);
2275         }
2276
2277         e1000_reset_hw(&adapter->hw);
2278         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2279
2280         e1000_led_off(&adapter->hw);
2281         e1000_cleanup_led(&adapter->hw);
2282 }
2283
2284
2285 /*********************************************************************
2286  *
2287  *  Determine hardware revision.
2288  *
2289  **********************************************************************/
2290 static void
2291 igb_identify_hardware(struct adapter *adapter)
2292 {
2293         device_t dev = adapter->dev;
2294
2295         /* Make sure our PCI config space has the necessary stuff set */
2296         pci_enable_busmaster(dev);
2297         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2298
2299         /* Save off the information about this board */
2300         adapter->hw.vendor_id = pci_get_vendor(dev);
2301         adapter->hw.device_id = pci_get_device(dev);
2302         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2303         adapter->hw.subsystem_vendor_id =
2304             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2305         adapter->hw.subsystem_device_id =
2306             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2307
2308         /* Set MAC type early for PCI setup */
2309         e1000_set_mac_type(&adapter->hw);
2310
2311         /* Are we a VF device? */
2312         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2313             (adapter->hw.mac.type == e1000_vfadapt_i350))
2314                 adapter->vf_ifp = 1;
2315         else
2316                 adapter->vf_ifp = 0;
2317 }
2318
2319 static int
2320 igb_allocate_pci_resources(struct adapter *adapter)
2321 {
2322         device_t        dev = adapter->dev;
2323         int             rid;
2324
2325         rid = PCIR_BAR(0);
2326         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2327             &rid, RF_ACTIVE);
2328         if (adapter->pci_mem == NULL) {
2329                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2330                 return (ENXIO);
2331         }
2332         adapter->osdep.mem_bus_space_tag =
2333             rman_get_bustag(adapter->pci_mem);
2334         adapter->osdep.mem_bus_space_handle =
2335             rman_get_bushandle(adapter->pci_mem);
2336         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2337
2338         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2339
2340         /* This will setup either MSI/X or MSI */
2341         adapter->msix = igb_setup_msix(adapter);
2342         adapter->hw.back = &adapter->osdep;
2343
2344         return (0);
2345 }
2346
2347 /*********************************************************************
2348  *
2349  *  Setup the Legacy or MSI Interrupt handler
2350  *
2351  **********************************************************************/
2352 static int
2353 igb_allocate_legacy(struct adapter *adapter)
2354 {
2355         device_t                dev = adapter->dev;
2356         struct igb_queue        *que = adapter->queues;
2357 #ifndef IGB_LEGACY_TX
2358         struct tx_ring          *txr = adapter->tx_rings;
2359 #endif
2360         int                     error, rid = 0;
2361
2362         /* Turn off all interrupts */
2363         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2364
2365         /* MSI RID is 1 */
2366         if (adapter->msix == 1)
2367                 rid = 1;
2368
2369         /* We allocate a single interrupt resource */
2370         adapter->res = bus_alloc_resource_any(dev,
2371             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2372         if (adapter->res == NULL) {
2373                 device_printf(dev, "Unable to allocate bus resource: "
2374                     "interrupt\n");
2375                 return (ENXIO);
2376         }
2377
2378 #ifndef IGB_LEGACY_TX
2379         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2380 #endif
2381
2382         /*
2383          * Try allocating a fast interrupt and the associated deferred
2384          * processing contexts.
2385          */
2386         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2387         /* Make tasklet for deferred link handling */
2388         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2389         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2390             taskqueue_thread_enqueue, &que->tq);
2391         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2392             device_get_nameunit(adapter->dev));
2393         if ((error = bus_setup_intr(dev, adapter->res,
2394             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2395             adapter, &adapter->tag)) != 0) {
2396                 device_printf(dev, "Failed to register fast interrupt "
2397                             "handler: %d\n", error);
2398                 taskqueue_free(que->tq);
2399                 que->tq = NULL;
2400                 return (error);
2401         }
2402
2403         return (0);
2404 }
2405
2406
2407 /*********************************************************************
2408  *
2409  *  Setup the MSIX Queue Interrupt handlers: 
2410  *
2411  **********************************************************************/
2412 static int
2413 igb_allocate_msix(struct adapter *adapter)
2414 {
2415         device_t                dev = adapter->dev;
2416         struct igb_queue        *que = adapter->queues;
2417         int                     error, rid, vector = 0;
2418
2419         /* Be sure to start with all interrupts disabled */
2420         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2421         E1000_WRITE_FLUSH(&adapter->hw);
2422
2423         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2424                 rid = vector +1;
2425                 que->res = bus_alloc_resource_any(dev,
2426                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2427                 if (que->res == NULL) {
2428                         device_printf(dev,
2429                             "Unable to allocate bus resource: "
2430                             "MSIX Queue Interrupt\n");
2431                         return (ENXIO);
2432                 }
2433                 error = bus_setup_intr(dev, que->res,
2434                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2435                     igb_msix_que, que, &que->tag);
2436                 if (error) {
2437                         que->res = NULL;
2438                         device_printf(dev, "Failed to register Queue handler");
2439                         return (error);
2440                 }
2441 #if __FreeBSD_version >= 800504
2442                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2443 #endif
2444                 que->msix = vector;
2445                 if (adapter->hw.mac.type == e1000_82575)
2446                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2447                 else
2448                         que->eims = 1 << vector;
2449                 /*
2450                 ** Bind the msix vector, and thus the
2451                 ** rings to the corresponding cpu.
2452                 */
2453                 if (adapter->num_queues > 1) {
2454                         if (igb_last_bind_cpu < 0)
2455                                 igb_last_bind_cpu = CPU_FIRST();
2456                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2457                         device_printf(dev,
2458                                 "Bound queue %d to cpu %d\n",
2459                                 i,igb_last_bind_cpu);
2460                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2461                 }
2462 #ifndef IGB_LEGACY_TX
2463                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2464                     que->txr);
2465 #endif
2466                 /* Make tasklet for deferred handling */
2467                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2468                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2469                     taskqueue_thread_enqueue, &que->tq);
2470                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2471                     device_get_nameunit(adapter->dev));
2472         }
2473
2474         /* And Link */
2475         rid = vector + 1;
2476         adapter->res = bus_alloc_resource_any(dev,
2477             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2478         if (adapter->res == NULL) {
2479                 device_printf(dev,
2480                     "Unable to allocate bus resource: "
2481                     "MSIX Link Interrupt\n");
2482                 return (ENXIO);
2483         }
2484         if ((error = bus_setup_intr(dev, adapter->res,
2485             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2486             igb_msix_link, adapter, &adapter->tag)) != 0) {
2487                 device_printf(dev, "Failed to register Link handler");
2488                 return (error);
2489         }
2490 #if __FreeBSD_version >= 800504
2491         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2492 #endif
2493         adapter->linkvec = vector;
2494
2495         return (0);
2496 }
2497
2498
2499 static void
2500 igb_configure_queues(struct adapter *adapter)
2501 {
2502         struct  e1000_hw        *hw = &adapter->hw;
2503         struct  igb_queue       *que;
2504         u32                     tmp, ivar = 0, newitr = 0;
2505
2506         /* First turn on RSS capability */
2507         if (adapter->hw.mac.type != e1000_82575)
2508                 E1000_WRITE_REG(hw, E1000_GPIE,
2509                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2510                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2511
2512         /* Turn on MSIX */
2513         switch (adapter->hw.mac.type) {
2514         case e1000_82580:
2515         case e1000_i350:
2516         case e1000_i354:
2517         case e1000_i210:
2518         case e1000_i211:
2519         case e1000_vfadapt:
2520         case e1000_vfadapt_i350:
2521                 /* RX entries */
2522                 for (int i = 0; i < adapter->num_queues; i++) {
2523                         u32 index = i >> 1;
2524                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2525                         que = &adapter->queues[i];
2526                         if (i & 1) {
2527                                 ivar &= 0xFF00FFFF;
2528                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2529                         } else {
2530                                 ivar &= 0xFFFFFF00;
2531                                 ivar |= que->msix | E1000_IVAR_VALID;
2532                         }
2533                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2534                 }
2535                 /* TX entries */
2536                 for (int i = 0; i < adapter->num_queues; i++) {
2537                         u32 index = i >> 1;
2538                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2539                         que = &adapter->queues[i];
2540                         if (i & 1) {
2541                                 ivar &= 0x00FFFFFF;
2542                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2543                         } else {
2544                                 ivar &= 0xFFFF00FF;
2545                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2546                         }
2547                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2548                         adapter->que_mask |= que->eims;
2549                 }
2550
2551                 /* And for the link interrupt */
2552                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2553                 adapter->link_mask = 1 << adapter->linkvec;
2554                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2555                 break;
2556         case e1000_82576:
2557                 /* RX entries */
2558                 for (int i = 0; i < adapter->num_queues; i++) {
2559                         u32 index = i & 0x7; /* Each IVAR has two entries */
2560                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2561                         que = &adapter->queues[i];
2562                         if (i < 8) {
2563                                 ivar &= 0xFFFFFF00;
2564                                 ivar |= que->msix | E1000_IVAR_VALID;
2565                         } else {
2566                                 ivar &= 0xFF00FFFF;
2567                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2568                         }
2569                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2570                         adapter->que_mask |= que->eims;
2571                 }
2572                 /* TX entries */
2573                 for (int i = 0; i < adapter->num_queues; i++) {
2574                         u32 index = i & 0x7; /* Each IVAR has two entries */
2575                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2576                         que = &adapter->queues[i];
2577                         if (i < 8) {
2578                                 ivar &= 0xFFFF00FF;
2579                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2580                         } else {
2581                                 ivar &= 0x00FFFFFF;
2582                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2583                         }
2584                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2585                         adapter->que_mask |= que->eims;
2586                 }
2587
2588                 /* And for the link interrupt */
2589                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2590                 adapter->link_mask = 1 << adapter->linkvec;
2591                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2592                 break;
2593
2594         case e1000_82575:
2595                 /* enable MSI-X support*/
2596                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2597                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2598                 /* Auto-Mask interrupts upon ICR read. */
2599                 tmp |= E1000_CTRL_EXT_EIAME;
2600                 tmp |= E1000_CTRL_EXT_IRCA;
2601                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2602
2603                 /* Queues */
2604                 for (int i = 0; i < adapter->num_queues; i++) {
2605                         que = &adapter->queues[i];
2606                         tmp = E1000_EICR_RX_QUEUE0 << i;
2607                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2608                         que->eims = tmp;
2609                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2610                             i, que->eims);
2611                         adapter->que_mask |= que->eims;
2612                 }
2613
2614                 /* Link */
2615                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2616                     E1000_EIMS_OTHER);
2617                 adapter->link_mask |= E1000_EIMS_OTHER;
2618         default:
2619                 break;
2620         }
2621
2622         /* Set the starting interrupt rate */
2623         if (igb_max_interrupt_rate > 0)
2624                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2625
2626         if (hw->mac.type == e1000_82575)
2627                 newitr |= newitr << 16;
2628         else
2629                 newitr |= E1000_EITR_CNT_IGNR;
2630
2631         for (int i = 0; i < adapter->num_queues; i++) {
2632                 que = &adapter->queues[i];
2633                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2634         }
2635
2636         return;
2637 }
2638
2639
2640 static void
2641 igb_free_pci_resources(struct adapter *adapter)
2642 {
2643         struct          igb_queue *que = adapter->queues;
2644         device_t        dev = adapter->dev;
2645         int             rid;
2646
2647         /*
2648         ** There is a slight possibility of a failure mode
2649         ** in attach that will result in entering this function
2650         ** before interrupt resources have been initialized, and
2651         ** in that case we do not want to execute the loops below
2652         ** We can detect this reliably by the state of the adapter
2653         ** res pointer.
2654         */
2655         if (adapter->res == NULL)
2656                 goto mem;
2657
2658         /*
2659          * First release all the interrupt resources:
2660          */
2661         for (int i = 0; i < adapter->num_queues; i++, que++) {
2662                 rid = que->msix + 1;
2663                 if (que->tag != NULL) {
2664                         bus_teardown_intr(dev, que->res, que->tag);
2665                         que->tag = NULL;
2666                 }
2667                 if (que->res != NULL)
2668                         bus_release_resource(dev,
2669                             SYS_RES_IRQ, rid, que->res);
2670         }
2671
2672         /* Clean the Legacy or Link interrupt last */
2673         if (adapter->linkvec) /* we are doing MSIX */
2674                 rid = adapter->linkvec + 1;
2675         else
2676                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2677
2678         que = adapter->queues;
2679         if (adapter->tag != NULL) {
2680                 taskqueue_drain(que->tq, &adapter->link_task);
2681                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2682                 adapter->tag = NULL;
2683         }
2684         if (adapter->res != NULL)
2685                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2686
2687         for (int i = 0; i < adapter->num_queues; i++, que++) {
2688                 if (que->tq != NULL) {
2689 #ifndef IGB_LEGACY_TX
2690                         taskqueue_drain(que->tq, &que->txr->txq_task);
2691 #endif
2692                         taskqueue_drain(que->tq, &que->que_task);
2693                         taskqueue_free(que->tq);
2694                 }
2695         }
2696 mem:
2697         if (adapter->msix)
2698                 pci_release_msi(dev);
2699
2700         if (adapter->msix_mem != NULL)
2701                 bus_release_resource(dev, SYS_RES_MEMORY,
2702                     adapter->memrid, adapter->msix_mem);
2703
2704         if (adapter->pci_mem != NULL)
2705                 bus_release_resource(dev, SYS_RES_MEMORY,
2706                     PCIR_BAR(0), adapter->pci_mem);
2707
2708 }
2709
2710 /*
2711  * Setup Either MSI/X or MSI
2712  */
2713 static int
2714 igb_setup_msix(struct adapter *adapter)
2715 {
2716         device_t        dev = adapter->dev;
2717         int             bar, want, queues, msgs, maxqueues;
2718
2719         /* tuneable override */
2720         if (igb_enable_msix == 0)
2721                 goto msi;
2722
2723         /* First try MSI/X */
2724         msgs = pci_msix_count(dev); 
2725         if (msgs == 0)
2726                 goto msi;
2727         /*
2728         ** Some new devices, as with ixgbe, now may
2729         ** use a different BAR, so we need to keep
2730         ** track of which is used.
2731         */
2732         adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2733         bar = pci_read_config(dev, adapter->memrid, 4);
2734         if (bar == 0) /* use next bar */
2735                 adapter->memrid += 4;
2736         adapter->msix_mem = bus_alloc_resource_any(dev,
2737             SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2738         if (adapter->msix_mem == NULL) {
2739                 /* May not be enabled */
2740                 device_printf(adapter->dev,
2741                     "Unable to map MSIX table \n");
2742                 goto msi;
2743         }
2744
2745         /* Figure out a reasonable auto config value */
2746         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2747
2748         /* Manual override */
2749         if (igb_num_queues != 0)
2750                 queues = igb_num_queues;
2751
2752         /* Sanity check based on HW */
2753         switch (adapter->hw.mac.type) {
2754                 case e1000_82575:
2755                         maxqueues = 4;
2756                         break;
2757                 case e1000_82576:
2758                 case e1000_82580:
2759                 case e1000_i350:
2760                 case e1000_i354:
2761                         maxqueues = 8;
2762                         break;
2763                 case e1000_i210:
2764                         maxqueues = 4;
2765                         break;
2766                 case e1000_i211:
2767                         maxqueues = 2;
2768                         break;
2769                 default:  /* VF interfaces */
2770                         maxqueues = 1;
2771                         break;
2772         }
2773         if (queues > maxqueues)
2774                 queues = maxqueues;
2775
2776         /* Manual override */
2777         if (igb_num_queues != 0)
2778                 queues = igb_num_queues;
2779
2780         /*
2781         ** One vector (RX/TX pair) per queue
2782         ** plus an additional for Link interrupt
2783         */
2784         want = queues + 1;
2785         if (msgs >= want)
2786                 msgs = want;
2787         else {
2788                 device_printf(adapter->dev,
2789                     "MSIX Configuration Problem, "
2790                     "%d vectors configured, but %d queues wanted!\n",
2791                     msgs, want);
2792                 goto msi;
2793         }
2794         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2795                 device_printf(adapter->dev,
2796                     "Using MSIX interrupts with %d vectors\n", msgs);
2797                 adapter->num_queues = queues;
2798                 return (msgs);
2799         }
2800         /*
2801         ** If MSIX alloc failed or provided us with
2802         ** less than needed, free and fall through to MSI
2803         */
2804         pci_release_msi(dev);
2805
2806 msi:
2807         if (adapter->msix_mem != NULL) {
2808                 bus_release_resource(dev, SYS_RES_MEMORY,
2809                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2810                 adapter->msix_mem = NULL;
2811         }
2812         msgs = 1;
2813         if (pci_alloc_msi(dev, &msgs) == 0) {
2814                 device_printf(adapter->dev," Using an MSI interrupt\n");
2815                 return (msgs);
2816         }
2817         device_printf(adapter->dev," Using a Legacy interrupt\n");
2818         return (0);
2819 }
2820
2821 /*********************************************************************
2822  *
2823  *  Initialize the DMA Coalescing feature
2824  *
2825  **********************************************************************/
2826 static void
2827 igb_init_dmac(struct adapter *adapter, u32 pba)
2828 {
2829         device_t        dev = adapter->dev;
2830         struct e1000_hw *hw = &adapter->hw;
2831         u32             dmac, reg = ~E1000_DMACR_DMAC_EN;
2832         u16             hwm;
2833
2834         if (hw->mac.type == e1000_i211)
2835                 return;
2836
2837         if (hw->mac.type > e1000_82580) {
2838
2839                 if (adapter->dmac == 0) { /* Disabling it */
2840                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2841                         return;
2842                 } else
2843                         device_printf(dev, "DMA Coalescing enabled\n");
2844
2845                 /* Set starting threshold */
2846                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2847
2848                 hwm = 64 * pba - adapter->max_frame_size / 16;
2849                 if (hwm < 64 * (pba - 6))
2850                         hwm = 64 * (pba - 6);
2851                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2852                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2853                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2854                     & E1000_FCRTC_RTH_COAL_MASK);
2855                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2856
2857
2858                 dmac = pba - adapter->max_frame_size / 512;
2859                 if (dmac < pba - 10)
2860                         dmac = pba - 10;
2861                 reg = E1000_READ_REG(hw, E1000_DMACR);
2862                 reg &= ~E1000_DMACR_DMACTHR_MASK;
2863                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2864                     & E1000_DMACR_DMACTHR_MASK);
2865
2866                 /* transition to L0x or L1 if available..*/
2867                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2868
2869                 /* Check if status is 2.5Gb backplane connection
2870                 * before configuration of watchdog timer, which is
2871                 * in msec values in 12.8usec intervals
2872                 * watchdog timer= msec values in 32usec intervals
2873                 * for non 2.5Gb connection
2874                 */
2875                 if (hw->mac.type == e1000_i354) {
2876                         int status = E1000_READ_REG(hw, E1000_STATUS);
2877                         if ((status & E1000_STATUS_2P5_SKU) &&
2878                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2879                                 reg |= ((adapter->dmac * 5) >> 6);
2880                         else
2881                                 reg |= (adapter->dmac >> 5);
2882                 } else {
2883                         reg |= (adapter->dmac >> 5);
2884                 }
2885
2886                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2887
2888                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2889
2890                 /* Set the interval before transition */
2891                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2892                 if (hw->mac.type == e1000_i350)
2893                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
2894                 /*
2895                 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2896                 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2897                 */
2898                 if (hw->mac.type == e1000_i354) {
2899                         int status = E1000_READ_REG(hw, E1000_STATUS);
2900                         if ((status & E1000_STATUS_2P5_SKU) &&
2901                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2902                                 reg |= 0xA;
2903                         else
2904                                 reg |= 0x4;
2905                 } else {
2906                         reg |= 0x4;
2907                 }
2908
2909                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2910
2911                 /* free space in tx packet buffer to wake from DMA coal */
2912                 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2913                     (2 * adapter->max_frame_size)) >> 6);
2914
2915                 /* make low power state decision controlled by DMA coal */
2916                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2917                 reg &= ~E1000_PCIEMISC_LX_DECISION;
2918                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2919
2920         } else if (hw->mac.type == e1000_82580) {
2921                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2922                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2923                     reg & ~E1000_PCIEMISC_LX_DECISION);
2924                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
2925         }
2926 }
2927
2928
2929 /*********************************************************************
2930  *
2931  *  Set up an fresh starting state
2932  *
2933  **********************************************************************/
2934 static void
2935 igb_reset(struct adapter *adapter)
2936 {
2937         device_t        dev = adapter->dev;
2938         struct e1000_hw *hw = &adapter->hw;
2939         struct e1000_fc_info *fc = &hw->fc;
2940         struct ifnet    *ifp = adapter->ifp;
2941         u32             pba = 0;
2942         u16             hwm;
2943
2944         INIT_DEBUGOUT("igb_reset: begin");
2945
2946         /* Let the firmware know the OS is in control */
2947         igb_get_hw_control(adapter);
2948
2949         /*
2950          * Packet Buffer Allocation (PBA)
2951          * Writing PBA sets the receive portion of the buffer
2952          * the remainder is used for the transmit buffer.
2953          */
2954         switch (hw->mac.type) {
2955         case e1000_82575:
2956                 pba = E1000_PBA_32K;
2957                 break;
2958         case e1000_82576:
2959         case e1000_vfadapt:
2960                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2961                 pba &= E1000_RXPBS_SIZE_MASK_82576;
2962                 break;
2963         case e1000_82580:
2964         case e1000_i350:
2965         case e1000_i354:
2966         case e1000_vfadapt_i350:
2967                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2968                 pba = e1000_rxpbs_adjust_82580(pba);
2969                 break;
2970         case e1000_i210:
2971         case e1000_i211:
2972                 pba = E1000_PBA_34K;
2973         default:
2974                 break;
2975         }
2976
2977         /* Special needs in case of Jumbo frames */
2978         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2979                 u32 tx_space, min_tx, min_rx;
2980                 pba = E1000_READ_REG(hw, E1000_PBA);
2981                 tx_space = pba >> 16;
2982                 pba &= 0xffff;
2983                 min_tx = (adapter->max_frame_size +
2984                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2985                 min_tx = roundup2(min_tx, 1024);
2986                 min_tx >>= 10;
2987                 min_rx = adapter->max_frame_size;
2988                 min_rx = roundup2(min_rx, 1024);
2989                 min_rx >>= 10;
2990                 if (tx_space < min_tx &&
2991                     ((min_tx - tx_space) < pba)) {
2992                         pba = pba - (min_tx - tx_space);
2993                         /*
2994                          * if short on rx space, rx wins
2995                          * and must trump tx adjustment
2996                          */
2997                         if (pba < min_rx)
2998                                 pba = min_rx;
2999                 }
3000                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3001         }
3002
3003         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3004
3005         /*
3006          * These parameters control the automatic generation (Tx) and
3007          * response (Rx) to Ethernet PAUSE frames.
3008          * - High water mark should allow for at least two frames to be
3009          *   received after sending an XOFF.
3010          * - Low water mark works best when it is very near the high water mark.
3011          *   This allows the receiver to restart by sending XON when it has
3012          *   drained a bit.
3013          */
3014         hwm = min(((pba << 10) * 9 / 10),
3015             ((pba << 10) - 2 * adapter->max_frame_size));
3016
3017         if (hw->mac.type < e1000_82576) {
3018                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3019                 fc->low_water = fc->high_water - 8;
3020         } else {
3021                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3022                 fc->low_water = fc->high_water - 16;
3023         }
3024
3025         fc->pause_time = IGB_FC_PAUSE_TIME;
3026         fc->send_xon = TRUE;
3027         if (adapter->fc)
3028                 fc->requested_mode = adapter->fc;
3029         else
3030                 fc->requested_mode = e1000_fc_default;
3031
3032         /* Issue a global reset */
3033         e1000_reset_hw(hw);
3034         E1000_WRITE_REG(hw, E1000_WUC, 0);
3035
3036         /* Reset for AutoMediaDetect */
3037         if (adapter->flags & IGB_MEDIA_RESET) {
3038                 e1000_setup_init_funcs(hw, TRUE);
3039                 e1000_get_bus_info(hw);
3040                 adapter->flags &= ~IGB_MEDIA_RESET;
3041         }
3042
3043         if (e1000_init_hw(hw) < 0)
3044                 device_printf(dev, "Hardware Initialization Failed\n");
3045
3046         /* Setup DMA Coalescing */
3047         igb_init_dmac(adapter, pba);
3048
3049         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3050         e1000_get_phy_info(hw);
3051         e1000_check_for_link(hw);
3052         return;
3053 }
3054
3055 /*********************************************************************
3056  *
3057  *  Setup networking device structure and register an interface.
3058  *
3059  **********************************************************************/
3060 static int
3061 igb_setup_interface(device_t dev, struct adapter *adapter)
3062 {
3063         struct ifnet   *ifp;
3064
3065         INIT_DEBUGOUT("igb_setup_interface: begin");
3066
3067         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3068         if (ifp == NULL) {
3069                 device_printf(dev, "can not allocate ifnet structure\n");
3070                 return (-1);
3071         }
3072         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3073         ifp->if_init =  igb_init;
3074         ifp->if_softc = adapter;
3075         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3076         ifp->if_ioctl = igb_ioctl;
3077
3078         /* TSO parameters */
3079         ifp->if_hw_tsomax = IP_MAXPACKET;
3080         ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3081         ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3082
3083 #ifndef IGB_LEGACY_TX
3084         ifp->if_transmit = igb_mq_start;
3085         ifp->if_qflush = igb_qflush;
3086 #else
3087         ifp->if_start = igb_start;
3088         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3089         ifp->if_snd.ifq_drv_maxlen = 0;
3090         IFQ_SET_READY(&ifp->if_snd);
3091 #endif
3092
3093         ether_ifattach(ifp, adapter->hw.mac.addr);
3094
3095         ifp->if_capabilities = ifp->if_capenable = 0;
3096
3097         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3098 #if __FreeBSD_version >= 1000000
3099         ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
3100 #endif
3101         ifp->if_capabilities |= IFCAP_TSO;
3102         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3103         ifp->if_capenable = ifp->if_capabilities;
3104
3105         /* Don't enable LRO by default */
3106         ifp->if_capabilities |= IFCAP_LRO;
3107
3108 #ifdef DEVICE_POLLING
3109         ifp->if_capabilities |= IFCAP_POLLING;
3110 #endif
3111
3112         /*
3113          * Tell the upper layer(s) we
3114          * support full VLAN capability.
3115          */
3116         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3117         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3118                              |  IFCAP_VLAN_HWTSO
3119                              |  IFCAP_VLAN_MTU;
3120         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3121                           |  IFCAP_VLAN_HWTSO
3122                           |  IFCAP_VLAN_MTU;
3123
3124         /*
3125         ** Don't turn this on by default, if vlans are
3126         ** created on another pseudo device (eg. lagg)
3127         ** then vlan events are not passed thru, breaking
3128         ** operation, but with HW FILTER off it works. If
3129         ** using vlans directly on the igb driver you can
3130         ** enable this and get full hardware tag filtering.
3131         */
3132         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3133
3134         /*
3135          * Specify the media types supported by this adapter and register
3136          * callbacks to update media and link information
3137          */
3138         ifmedia_init(&adapter->media, IFM_IMASK,
3139             igb_media_change, igb_media_status);
3140         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3141             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3142                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3143                             0, NULL);
3144                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3145         } else {
3146                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3147                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3148                             0, NULL);
3149                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3150                             0, NULL);
3151                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3152                             0, NULL);
3153                 if (adapter->hw.phy.type != e1000_phy_ife) {
3154                         ifmedia_add(&adapter->media,
3155                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3156                         ifmedia_add(&adapter->media,
3157                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3158                 }
3159         }
3160         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3161         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3162         return (0);
3163 }
3164
3165
3166 /*
3167  * Manage DMA'able memory.
3168  */
3169 static void
3170 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3171 {
3172         if (error)
3173                 return;
3174         *(bus_addr_t *) arg = segs[0].ds_addr;
3175 }
3176
3177 static int
3178 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3179         struct igb_dma_alloc *dma, int mapflags)
3180 {
3181         int error;
3182
3183         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3184                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3185                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3186                                 BUS_SPACE_MAXADDR,      /* highaddr */
3187                                 NULL, NULL,             /* filter, filterarg */
3188                                 size,                   /* maxsize */
3189                                 1,                      /* nsegments */
3190                                 size,                   /* maxsegsize */
3191                                 0,                      /* flags */
3192                                 NULL,                   /* lockfunc */
3193                                 NULL,                   /* lockarg */
3194                                 &dma->dma_tag);
3195         if (error) {
3196                 device_printf(adapter->dev,
3197                     "%s: bus_dma_tag_create failed: %d\n",
3198                     __func__, error);
3199                 goto fail_0;
3200         }
3201
3202         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3203             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3204         if (error) {
3205                 device_printf(adapter->dev,
3206                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3207                     __func__, (uintmax_t)size, error);
3208                 goto fail_2;
3209         }
3210
3211         dma->dma_paddr = 0;
3212         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3213             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3214         if (error || dma->dma_paddr == 0) {
3215                 device_printf(adapter->dev,
3216                     "%s: bus_dmamap_load failed: %d\n",
3217                     __func__, error);
3218                 goto fail_3;
3219         }
3220
3221         return (0);
3222
3223 fail_3:
3224         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3225 fail_2:
3226         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3227         bus_dma_tag_destroy(dma->dma_tag);
3228 fail_0:
3229         dma->dma_tag = NULL;
3230
3231         return (error);
3232 }
3233
3234 static void
3235 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3236 {
3237         if (dma->dma_tag == NULL)
3238                 return;
3239         if (dma->dma_paddr != 0) {
3240                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3241                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3242                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3243                 dma->dma_paddr = 0;
3244         }
3245         if (dma->dma_vaddr != NULL) {
3246                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3247                 dma->dma_vaddr = NULL;
3248         }
3249         bus_dma_tag_destroy(dma->dma_tag);
3250         dma->dma_tag = NULL;
3251 }
3252
3253
3254 /*********************************************************************
3255  *
3256  *  Allocate memory for the transmit and receive rings, and then
3257  *  the descriptors associated with each, called only once at attach.
3258  *
3259  **********************************************************************/
3260 static int
3261 igb_allocate_queues(struct adapter *adapter)
3262 {
3263         device_t dev = adapter->dev;
3264         struct igb_queue        *que = NULL;
3265         struct tx_ring          *txr = NULL;
3266         struct rx_ring          *rxr = NULL;
3267         int rsize, tsize, error = E1000_SUCCESS;
3268         int txconf = 0, rxconf = 0;
3269
3270         /* First allocate the top level queue structs */
3271         if (!(adapter->queues =
3272             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3273             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3274                 device_printf(dev, "Unable to allocate queue memory\n");
3275                 error = ENOMEM;
3276                 goto fail;
3277         }
3278
3279         /* Next allocate the TX ring struct memory */
3280         if (!(adapter->tx_rings =
3281             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3282             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3283                 device_printf(dev, "Unable to allocate TX ring memory\n");
3284                 error = ENOMEM;
3285                 goto tx_fail;
3286         }
3287
3288         /* Now allocate the RX */
3289         if (!(adapter->rx_rings =
3290             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3291             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3292                 device_printf(dev, "Unable to allocate RX ring memory\n");
3293                 error = ENOMEM;
3294                 goto rx_fail;
3295         }
3296
3297         tsize = roundup2(adapter->num_tx_desc *
3298             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3299         /*
3300          * Now set up the TX queues, txconf is needed to handle the
3301          * possibility that things fail midcourse and we need to
3302          * undo memory gracefully
3303          */ 
3304         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3305                 /* Set up some basics */
3306                 txr = &adapter->tx_rings[i];
3307                 txr->adapter = adapter;
3308                 txr->me = i;
3309                 txr->num_desc = adapter->num_tx_desc;
3310
3311                 /* Initialize the TX lock */
3312                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3313                     device_get_nameunit(dev), txr->me);
3314                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3315
3316                 if (igb_dma_malloc(adapter, tsize,
3317                         &txr->txdma, BUS_DMA_NOWAIT)) {
3318                         device_printf(dev,
3319                             "Unable to allocate TX Descriptor memory\n");
3320                         error = ENOMEM;
3321                         goto err_tx_desc;
3322                 }
3323                 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3324                 bzero((void *)txr->tx_base, tsize);
3325
3326                 /* Now allocate transmit buffers for the ring */
3327                 if (igb_allocate_transmit_buffers(txr)) {
3328                         device_printf(dev,
3329                             "Critical Failure setting up transmit buffers\n");
3330                         error = ENOMEM;
3331                         goto err_tx_desc;
3332                 }
3333 #ifndef IGB_LEGACY_TX
3334                 /* Allocate a buf ring */
3335                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3336                     M_WAITOK, &txr->tx_mtx);
3337 #endif
3338         }
3339
3340         /*
3341          * Next the RX queues...
3342          */ 
3343         rsize = roundup2(adapter->num_rx_desc *
3344             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3345         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3346                 rxr = &adapter->rx_rings[i];
3347                 rxr->adapter = adapter;
3348                 rxr->me = i;
3349
3350                 /* Initialize the RX lock */
3351                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3352                     device_get_nameunit(dev), txr->me);
3353                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3354
3355                 if (igb_dma_malloc(adapter, rsize,
3356                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3357                         device_printf(dev,
3358                             "Unable to allocate RxDescriptor memory\n");
3359                         error = ENOMEM;
3360                         goto err_rx_desc;
3361                 }
3362                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3363                 bzero((void *)rxr->rx_base, rsize);
3364
3365                 /* Allocate receive buffers for the ring*/
3366                 if (igb_allocate_receive_buffers(rxr)) {
3367                         device_printf(dev,
3368                             "Critical Failure setting up receive buffers\n");
3369                         error = ENOMEM;
3370                         goto err_rx_desc;
3371                 }
3372         }
3373
3374         /*
3375         ** Finally set up the queue holding structs
3376         */
3377         for (int i = 0; i < adapter->num_queues; i++) {
3378                 que = &adapter->queues[i];
3379                 que->adapter = adapter;
3380                 que->txr = &adapter->tx_rings[i];
3381                 que->rxr = &adapter->rx_rings[i];
3382         }
3383
3384         return (0);
3385
3386 err_rx_desc:
3387         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3388                 igb_dma_free(adapter, &rxr->rxdma);
3389 err_tx_desc:
3390         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3391                 igb_dma_free(adapter, &txr->txdma);
3392         free(adapter->rx_rings, M_DEVBUF);
3393 rx_fail:
3394 #ifndef IGB_LEGACY_TX
3395         buf_ring_free(txr->br, M_DEVBUF);
3396 #endif
3397         free(adapter->tx_rings, M_DEVBUF);
3398 tx_fail:
3399         free(adapter->queues, M_DEVBUF);
3400 fail:
3401         return (error);
3402 }
3403
3404 /*********************************************************************
3405  *
3406  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3407  *  the information needed to transmit a packet on the wire. This is
3408  *  called only once at attach, setup is done every reset.
3409  *
3410  **********************************************************************/
3411 static int
3412 igb_allocate_transmit_buffers(struct tx_ring *txr)
3413 {
3414         struct adapter *adapter = txr->adapter;
3415         device_t dev = adapter->dev;
3416         struct igb_tx_buf *txbuf;
3417         int error, i;
3418
3419         /*
3420          * Setup DMA descriptor areas.
3421          */
3422         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3423                                1, 0,                    /* alignment, bounds */
3424                                BUS_SPACE_MAXADDR,       /* lowaddr */
3425                                BUS_SPACE_MAXADDR,       /* highaddr */
3426                                NULL, NULL,              /* filter, filterarg */
3427                                IGB_TSO_SIZE,            /* maxsize */
3428                                IGB_MAX_SCATTER,         /* nsegments */
3429                                PAGE_SIZE,               /* maxsegsize */
3430                                0,                       /* flags */
3431                                NULL,                    /* lockfunc */
3432                                NULL,                    /* lockfuncarg */
3433                                &txr->txtag))) {
3434                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3435                 goto fail;
3436         }
3437
3438         if (!(txr->tx_buffers =
3439             (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3440             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3441                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3442                 error = ENOMEM;
3443                 goto fail;
3444         }
3445
3446         /* Create the descriptor buffer dma maps */
3447         txbuf = txr->tx_buffers;
3448         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3449                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3450                 if (error != 0) {
3451                         device_printf(dev, "Unable to create TX DMA map\n");
3452                         goto fail;
3453                 }
3454         }
3455
3456         return 0;
3457 fail:
3458         /* We free all, it handles case where we are in the middle */
3459         igb_free_transmit_structures(adapter);
3460         return (error);
3461 }
3462
3463 /*********************************************************************
3464  *
3465  *  Initialize a transmit ring.
3466  *
3467  **********************************************************************/
3468 static void
3469 igb_setup_transmit_ring(struct tx_ring *txr)
3470 {
3471         struct adapter *adapter = txr->adapter;
3472         struct igb_tx_buf *txbuf;
3473         int i;
3474 #ifdef DEV_NETMAP
3475         struct netmap_adapter *na = NA(adapter->ifp);
3476         struct netmap_slot *slot;
3477 #endif /* DEV_NETMAP */
3478
3479         /* Clear the old descriptor contents */
3480         IGB_TX_LOCK(txr);
3481 #ifdef DEV_NETMAP
3482         slot = netmap_reset(na, NR_TX, txr->me, 0);
3483 #endif /* DEV_NETMAP */
3484         bzero((void *)txr->tx_base,
3485               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3486         /* Reset indices */
3487         txr->next_avail_desc = 0;
3488         txr->next_to_clean = 0;
3489
3490         /* Free any existing tx buffers. */
3491         txbuf = txr->tx_buffers;
3492         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3493                 if (txbuf->m_head != NULL) {
3494                         bus_dmamap_sync(txr->txtag, txbuf->map,
3495                             BUS_DMASYNC_POSTWRITE);
3496                         bus_dmamap_unload(txr->txtag, txbuf->map);
3497                         m_freem(txbuf->m_head);
3498                         txbuf->m_head = NULL;
3499                 }
3500 #ifdef DEV_NETMAP
3501                 if (slot) {
3502                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3503                         /* no need to set the address */
3504                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3505                 }
3506 #endif /* DEV_NETMAP */
3507                 /* clear the watch index */
3508                 txbuf->eop = NULL;
3509         }
3510
3511         /* Set number of descriptors available */
3512         txr->tx_avail = adapter->num_tx_desc;
3513
3514         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3515             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3516         IGB_TX_UNLOCK(txr);
3517 }
3518
3519 /*********************************************************************
3520  *
3521  *  Initialize all transmit rings.
3522  *
3523  **********************************************************************/
3524 static void
3525 igb_setup_transmit_structures(struct adapter *adapter)
3526 {
3527         struct tx_ring *txr = adapter->tx_rings;
3528
3529         for (int i = 0; i < adapter->num_queues; i++, txr++)
3530                 igb_setup_transmit_ring(txr);
3531
3532         return;
3533 }
3534
3535 /*********************************************************************
3536  *
3537  *  Enable transmit unit.
3538  *
3539  **********************************************************************/
3540 static void
3541 igb_initialize_transmit_units(struct adapter *adapter)
3542 {
3543         struct tx_ring  *txr = adapter->tx_rings;
3544         struct e1000_hw *hw = &adapter->hw;
3545         u32             tctl, txdctl;
3546
3547         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3548         tctl = txdctl = 0;
3549
3550         /* Setup the Tx Descriptor Rings */
3551         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3552                 u64 bus_addr = txr->txdma.dma_paddr;
3553
3554                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3555                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3556                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3557                     (uint32_t)(bus_addr >> 32));
3558                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3559                     (uint32_t)bus_addr);
3560
3561                 /* Setup the HW Tx Head and Tail descriptor pointers */
3562                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3563                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3564
3565                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3566                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3567                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3568
3569                 txr->queue_status = IGB_QUEUE_IDLE;
3570
3571                 txdctl |= IGB_TX_PTHRESH;
3572                 txdctl |= IGB_TX_HTHRESH << 8;
3573                 txdctl |= IGB_TX_WTHRESH << 16;
3574                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3575                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3576         }
3577
3578         if (adapter->vf_ifp)
3579                 return;
3580
3581         e1000_config_collision_dist(hw);
3582
3583         /* Program the Transmit Control Register */
3584         tctl = E1000_READ_REG(hw, E1000_TCTL);
3585         tctl &= ~E1000_TCTL_CT;
3586         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3587                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3588
3589         /* This write will effectively turn on the transmit unit. */
3590         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3591 }
3592
3593 /*********************************************************************
3594  *
3595  *  Free all transmit rings.
3596  *
3597  **********************************************************************/
3598 static void
3599 igb_free_transmit_structures(struct adapter *adapter)
3600 {
3601         struct tx_ring *txr = adapter->tx_rings;
3602
3603         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3604                 IGB_TX_LOCK(txr);
3605                 igb_free_transmit_buffers(txr);
3606                 igb_dma_free(adapter, &txr->txdma);
3607                 IGB_TX_UNLOCK(txr);
3608                 IGB_TX_LOCK_DESTROY(txr);
3609         }
3610         free(adapter->tx_rings, M_DEVBUF);
3611 }
3612
3613 /*********************************************************************
3614  *
3615  *  Free transmit ring related data structures.
3616  *
3617  **********************************************************************/
3618 static void
3619 igb_free_transmit_buffers(struct tx_ring *txr)
3620 {
3621         struct adapter *adapter = txr->adapter;
3622         struct igb_tx_buf *tx_buffer;
3623         int             i;
3624
3625         INIT_DEBUGOUT("free_transmit_ring: begin");
3626
3627         if (txr->tx_buffers == NULL)
3628                 return;
3629
3630         tx_buffer = txr->tx_buffers;
3631         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3632                 if (tx_buffer->m_head != NULL) {
3633                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3634                             BUS_DMASYNC_POSTWRITE);
3635                         bus_dmamap_unload(txr->txtag,
3636                             tx_buffer->map);
3637                         m_freem(tx_buffer->m_head);
3638                         tx_buffer->m_head = NULL;
3639                         if (tx_buffer->map != NULL) {
3640                                 bus_dmamap_destroy(txr->txtag,
3641                                     tx_buffer->map);
3642                                 tx_buffer->map = NULL;
3643                         }
3644                 } else if (tx_buffer->map != NULL) {
3645                         bus_dmamap_unload(txr->txtag,
3646                             tx_buffer->map);
3647                         bus_dmamap_destroy(txr->txtag,
3648                             tx_buffer->map);
3649                         tx_buffer->map = NULL;
3650                 }
3651         }
3652 #ifndef IGB_LEGACY_TX
3653         if (txr->br != NULL)
3654                 buf_ring_free(txr->br, M_DEVBUF);
3655 #endif
3656         if (txr->tx_buffers != NULL) {
3657                 free(txr->tx_buffers, M_DEVBUF);
3658                 txr->tx_buffers = NULL;
3659         }
3660         if (txr->txtag != NULL) {
3661                 bus_dma_tag_destroy(txr->txtag);
3662                 txr->txtag = NULL;
3663         }
3664         return;
3665 }
3666
3667 /**********************************************************************
3668  *
3669  *  Setup work for hardware segmentation offload (TSO) on
3670  *  adapters using advanced tx descriptors
3671  *
3672  **********************************************************************/
3673 static int
3674 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3675     u32 *cmd_type_len, u32 *olinfo_status)
3676 {
3677         struct adapter *adapter = txr->adapter;
3678         struct e1000_adv_tx_context_desc *TXD;
3679         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3680         u32 mss_l4len_idx = 0, paylen;
3681         u16 vtag = 0, eh_type;
3682         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3683         struct ether_vlan_header *eh;
3684 #ifdef INET6
3685         struct ip6_hdr *ip6;
3686 #endif
3687 #ifdef INET
3688         struct ip *ip;
3689 #endif
3690         struct tcphdr *th;
3691
3692
3693         /*
3694          * Determine where frame payload starts.
3695          * Jump over vlan headers if already present
3696          */
3697         eh = mtod(mp, struct ether_vlan_header *);
3698         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3699                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3700                 eh_type = eh->evl_proto;
3701         } else {
3702                 ehdrlen = ETHER_HDR_LEN;
3703                 eh_type = eh->evl_encap_proto;
3704         }
3705
3706         switch (ntohs(eh_type)) {
3707 #ifdef INET6
3708         case ETHERTYPE_IPV6:
3709                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3710                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3711                 if (ip6->ip6_nxt != IPPROTO_TCP)
3712                         return (ENXIO);
3713                 ip_hlen = sizeof(struct ip6_hdr);
3714                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3715                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3716                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3717                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3718                 break;
3719 #endif
3720 #ifdef INET
3721         case ETHERTYPE_IP:
3722                 ip = (struct ip *)(mp->m_data + ehdrlen);
3723                 if (ip->ip_p != IPPROTO_TCP)
3724                         return (ENXIO);
3725                 ip->ip_sum = 0;
3726                 ip_hlen = ip->ip_hl << 2;
3727                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3728                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3729                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3730                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3731                 /* Tell transmit desc to also do IPv4 checksum. */
3732                 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3733                 break;
3734 #endif
3735         default:
3736                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3737                     __func__, ntohs(eh_type));
3738                 break;
3739         }
3740
3741         ctxd = txr->next_avail_desc;
3742         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3743
3744         tcp_hlen = th->th_off << 2;
3745
3746         /* This is used in the transmit desc in encap */
3747         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3748
3749         /* VLAN MACLEN IPLEN */
3750         if (mp->m_flags & M_VLANTAG) {
3751                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3752                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3753         }
3754
3755         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3756         vlan_macip_lens |= ip_hlen;
3757         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3758
3759         /* ADV DTYPE TUCMD */
3760         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3761         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3762         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3763
3764         /* MSS L4LEN IDX */
3765         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3766         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3767         /* 82575 needs the queue index added */
3768         if (adapter->hw.mac.type == e1000_82575)
3769                 mss_l4len_idx |= txr->me << 4;
3770         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3771
3772         TXD->seqnum_seed = htole32(0);
3773
3774         if (++ctxd == txr->num_desc)
3775                 ctxd = 0;
3776
3777         txr->tx_avail--;
3778         txr->next_avail_desc = ctxd;
3779         *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3780         *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3781         *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3782         ++txr->tso_tx;
3783         return (0);
3784 }
3785
3786 /*********************************************************************
3787  *
3788  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3789  *
3790  **********************************************************************/
3791
3792 static int
3793 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3794     u32 *cmd_type_len, u32 *olinfo_status)
3795 {
3796         struct e1000_adv_tx_context_desc *TXD;
3797         struct adapter *adapter = txr->adapter;
3798         struct ether_vlan_header *eh;
3799         struct ip *ip;
3800         struct ip6_hdr *ip6;
3801         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3802         int     ehdrlen, ip_hlen = 0;
3803         u16     etype;
3804         u8      ipproto = 0;
3805         int     offload = TRUE;
3806         int     ctxd = txr->next_avail_desc;
3807         u16     vtag = 0;
3808
3809         /* First check if TSO is to be used */
3810         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3811                 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3812
3813         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3814                 offload = FALSE;
3815
3816         /* Indicate the whole packet as payload when not doing TSO */
3817         *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3818
3819         /* Now ready a context descriptor */
3820         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3821
3822         /*
3823         ** In advanced descriptors the vlan tag must 
3824         ** be placed into the context descriptor. Hence
3825         ** we need to make one even if not doing offloads.
3826         */
3827         if (mp->m_flags & M_VLANTAG) {
3828                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3829                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3830         } else if (offload == FALSE) /* ... no offload to do */
3831                 return (0);
3832
3833         /*
3834          * Determine where frame payload starts.
3835          * Jump over vlan headers if already present,
3836          * helpful for QinQ too.
3837          */
3838         eh = mtod(mp, struct ether_vlan_header *);
3839         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3840                 etype = ntohs(eh->evl_proto);
3841                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3842         } else {
3843                 etype = ntohs(eh->evl_encap_proto);
3844                 ehdrlen = ETHER_HDR_LEN;
3845         }
3846
3847         /* Set the ether header length */
3848         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3849
3850         switch (etype) {
3851                 case ETHERTYPE_IP:
3852                         ip = (struct ip *)(mp->m_data + ehdrlen);
3853                         ip_hlen = ip->ip_hl << 2;
3854                         ipproto = ip->ip_p;
3855                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3856                         break;
3857                 case ETHERTYPE_IPV6:
3858                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3859                         ip_hlen = sizeof(struct ip6_hdr);
3860                         /* XXX-BZ this will go badly in case of ext hdrs. */
3861                         ipproto = ip6->ip6_nxt;
3862                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3863                         break;
3864                 default:
3865                         offload = FALSE;
3866                         break;
3867         }
3868
3869         vlan_macip_lens |= ip_hlen;
3870         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3871
3872         switch (ipproto) {
3873                 case IPPROTO_TCP:
3874 #if __FreeBSD_version >= 1000000
3875                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
3876 #else
3877                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3878 #endif
3879                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3880                         break;
3881                 case IPPROTO_UDP:
3882 #if __FreeBSD_version >= 1000000
3883                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
3884 #else
3885                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3886 #endif
3887                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3888                         break;
3889
3890 #if __FreeBSD_version >= 800000
3891                 case IPPROTO_SCTP:
3892 #if __FreeBSD_version >= 1000000
3893                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
3894 #else
3895                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3896 #endif
3897                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3898                         break;
3899 #endif
3900                 default:
3901                         offload = FALSE;
3902                         break;
3903         }
3904
3905         if (offload) /* For the TX descriptor setup */
3906                 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3907
3908         /* 82575 needs the queue index added */
3909         if (adapter->hw.mac.type == e1000_82575)
3910                 mss_l4len_idx = txr->me << 4;
3911
3912         /* Now copy bits into descriptor */
3913         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3914         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3915         TXD->seqnum_seed = htole32(0);
3916         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3917
3918         /* We've consumed the first desc, adjust counters */
3919         if (++ctxd == txr->num_desc)
3920                 ctxd = 0;
3921         txr->next_avail_desc = ctxd;
3922         --txr->tx_avail;
3923
3924         return (0);
3925 }
3926
3927 /**********************************************************************
3928  *
3929  *  Examine each tx_buffer in the used queue. If the hardware is done
3930  *  processing the packet then free associated resources. The
3931  *  tx_buffer is put back on the free queue.
3932  *
3933  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3934  **********************************************************************/
3935 static bool
3936 igb_txeof(struct tx_ring *txr)
3937 {
3938         struct adapter          *adapter = txr->adapter;
3939         struct ifnet            *ifp = adapter->ifp;
3940         u32                     work, processed = 0;
3941         int                     limit = adapter->tx_process_limit;
3942         struct igb_tx_buf       *buf;
3943         union e1000_adv_tx_desc *txd;
3944
3945         mtx_assert(&txr->tx_mtx, MA_OWNED);
3946
3947 #ifdef DEV_NETMAP
3948         if (netmap_tx_irq(ifp, txr->me))
3949                 return (FALSE);
3950 #endif /* DEV_NETMAP */
3951
3952         if (txr->tx_avail == txr->num_desc) {
3953                 txr->queue_status = IGB_QUEUE_IDLE;
3954                 return FALSE;
3955         }
3956
3957         /* Get work starting point */
3958         work = txr->next_to_clean;
3959         buf = &txr->tx_buffers[work];
3960         txd = &txr->tx_base[work];
3961         work -= txr->num_desc; /* The distance to ring end */
3962         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3963             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3964         do {
3965                 union e1000_adv_tx_desc *eop = buf->eop;
3966                 if (eop == NULL) /* No work */
3967                         break;
3968
3969                 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
3970                         break;  /* I/O not complete */
3971
3972                 if (buf->m_head) {
3973                         txr->bytes +=
3974                             buf->m_head->m_pkthdr.len;
3975                         bus_dmamap_sync(txr->txtag,
3976                             buf->map,
3977                             BUS_DMASYNC_POSTWRITE);
3978                         bus_dmamap_unload(txr->txtag,
3979                             buf->map);
3980                         m_freem(buf->m_head);
3981                         buf->m_head = NULL;
3982                 }
3983                 buf->eop = NULL;
3984                 ++txr->tx_avail;
3985
3986                 /* We clean the range if multi segment */
3987                 while (txd != eop) {
3988                         ++txd;
3989                         ++buf;
3990                         ++work;
3991                         /* wrap the ring? */
3992                         if (__predict_false(!work)) {
3993                                 work -= txr->num_desc;
3994                                 buf = txr->tx_buffers;
3995                                 txd = txr->tx_base;
3996                         }
3997                         if (buf->m_head) {
3998                                 txr->bytes +=
3999                                     buf->m_head->m_pkthdr.len;
4000                                 bus_dmamap_sync(txr->txtag,
4001                                     buf->map,
4002                                     BUS_DMASYNC_POSTWRITE);
4003                                 bus_dmamap_unload(txr->txtag,
4004                                     buf->map);
4005                                 m_freem(buf->m_head);
4006                                 buf->m_head = NULL;
4007                         }
4008                         ++txr->tx_avail;
4009                         buf->eop = NULL;
4010
4011                 }
4012                 ++txr->packets;
4013                 ++processed;
4014                 ++ifp->if_opackets;
4015                 txr->watchdog_time = ticks;
4016
4017                 /* Try the next packet */
4018                 ++txd;
4019                 ++buf;
4020                 ++work;
4021                 /* reset with a wrap */
4022                 if (__predict_false(!work)) {
4023                         work -= txr->num_desc;
4024                         buf = txr->tx_buffers;
4025                         txd = txr->tx_base;
4026                 }
4027                 prefetch(txd);
4028         } while (__predict_true(--limit));
4029
4030         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4031             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4032
4033         work += txr->num_desc;
4034         txr->next_to_clean = work;
4035
4036         /*
4037         ** Watchdog calculation, we know there's
4038         ** work outstanding or the first return
4039         ** would have been taken, so none processed
4040         ** for too long indicates a hang.
4041         */
4042         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4043                 txr->queue_status |= IGB_QUEUE_HUNG;
4044
4045         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4046                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;       
4047
4048         if (txr->tx_avail == txr->num_desc) {
4049                 txr->queue_status = IGB_QUEUE_IDLE;
4050                 return (FALSE);
4051         }
4052
4053         return (TRUE);
4054 }
4055
4056 /*********************************************************************
4057  *
4058  *  Refresh mbuf buffers for RX descriptor rings
4059  *   - now keeps its own state so discards due to resource
4060  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4061  *     it just returns, keeping its placeholder, thus it can simply
4062  *     be recalled to try again.
4063  *
4064  **********************************************************************/
4065 static void
4066 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4067 {
4068         struct adapter          *adapter = rxr->adapter;
4069         bus_dma_segment_t       hseg[1];
4070         bus_dma_segment_t       pseg[1];
4071         struct igb_rx_buf       *rxbuf;
4072         struct mbuf             *mh, *mp;
4073         int                     i, j, nsegs, error;
4074         bool                    refreshed = FALSE;
4075
4076         i = j = rxr->next_to_refresh;
4077         /*
4078         ** Get one descriptor beyond
4079         ** our work mark to control
4080         ** the loop.
4081         */
4082         if (++j == adapter->num_rx_desc)
4083                 j = 0;
4084
4085         while (j != limit) {
4086                 rxbuf = &rxr->rx_buffers[i];
4087                 /* No hdr mbuf used with header split off */
4088                 if (rxr->hdr_split == FALSE)
4089                         goto no_split;
4090                 if (rxbuf->m_head == NULL) {
4091                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4092                         if (mh == NULL)
4093                                 goto update;
4094                 } else
4095                         mh = rxbuf->m_head;
4096
4097                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4098                 mh->m_len = MHLEN;
4099                 mh->m_flags |= M_PKTHDR;
4100                 /* Get the memory mapping */
4101                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4102                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4103                 if (error != 0) {
4104                         printf("Refresh mbufs: hdr dmamap load"
4105                             " failure - %d\n", error);
4106                         m_free(mh);
4107                         rxbuf->m_head = NULL;
4108                         goto update;
4109                 }
4110                 rxbuf->m_head = mh;
4111                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4112                     BUS_DMASYNC_PREREAD);
4113                 rxr->rx_base[i].read.hdr_addr =
4114                     htole64(hseg[0].ds_addr);
4115 no_split:
4116                 if (rxbuf->m_pack == NULL) {
4117                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4118                             M_PKTHDR, adapter->rx_mbuf_sz);
4119                         if (mp == NULL)
4120                                 goto update;
4121                 } else
4122                         mp = rxbuf->m_pack;
4123
4124                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4125                 /* Get the memory mapping */
4126                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4127                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4128                 if (error != 0) {
4129                         printf("Refresh mbufs: payload dmamap load"
4130                             " failure - %d\n", error);
4131                         m_free(mp);
4132                         rxbuf->m_pack = NULL;
4133                         goto update;
4134                 }
4135                 rxbuf->m_pack = mp;
4136                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4137                     BUS_DMASYNC_PREREAD);
4138                 rxr->rx_base[i].read.pkt_addr =
4139                     htole64(pseg[0].ds_addr);
4140                 refreshed = TRUE; /* I feel wefreshed :) */
4141
4142                 i = j; /* our next is precalculated */
4143                 rxr->next_to_refresh = i;
4144                 if (++j == adapter->num_rx_desc)
4145                         j = 0;
4146         }
4147 update:
4148         if (refreshed) /* update tail */
4149                 E1000_WRITE_REG(&adapter->hw,
4150                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4151         return;
4152 }
4153
4154
4155 /*********************************************************************
4156  *
4157  *  Allocate memory for rx_buffer structures. Since we use one
4158  *  rx_buffer per received packet, the maximum number of rx_buffer's
4159  *  that we'll need is equal to the number of receive descriptors
4160  *  that we've allocated.
4161  *
4162  **********************************************************************/
4163 static int
4164 igb_allocate_receive_buffers(struct rx_ring *rxr)
4165 {
4166         struct  adapter         *adapter = rxr->adapter;
4167         device_t                dev = adapter->dev;
4168         struct igb_rx_buf       *rxbuf;
4169         int                     i, bsize, error;
4170
4171         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4172         if (!(rxr->rx_buffers =
4173             (struct igb_rx_buf *) malloc(bsize,
4174             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4175                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4176                 error = ENOMEM;
4177                 goto fail;
4178         }
4179
4180         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4181                                    1, 0,                /* alignment, bounds */
4182                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4183                                    BUS_SPACE_MAXADDR,   /* highaddr */
4184                                    NULL, NULL,          /* filter, filterarg */
4185                                    MSIZE,               /* maxsize */
4186                                    1,                   /* nsegments */
4187                                    MSIZE,               /* maxsegsize */
4188                                    0,                   /* flags */
4189                                    NULL,                /* lockfunc */
4190                                    NULL,                /* lockfuncarg */
4191                                    &rxr->htag))) {
4192                 device_printf(dev, "Unable to create RX DMA tag\n");
4193                 goto fail;
4194         }
4195
4196         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4197                                    1, 0,                /* alignment, bounds */
4198                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4199                                    BUS_SPACE_MAXADDR,   /* highaddr */
4200                                    NULL, NULL,          /* filter, filterarg */
4201                                    MJUM9BYTES,          /* maxsize */
4202                                    1,                   /* nsegments */
4203                                    MJUM9BYTES,          /* maxsegsize */
4204                                    0,                   /* flags */
4205                                    NULL,                /* lockfunc */
4206                                    NULL,                /* lockfuncarg */
4207                                    &rxr->ptag))) {
4208                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4209                 goto fail;
4210         }
4211
4212         for (i = 0; i < adapter->num_rx_desc; i++) {
4213                 rxbuf = &rxr->rx_buffers[i];
4214                 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4215                 if (error) {
4216                         device_printf(dev,
4217                             "Unable to create RX head DMA maps\n");
4218                         goto fail;
4219                 }
4220                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4221                 if (error) {
4222                         device_printf(dev,
4223                             "Unable to create RX packet DMA maps\n");
4224                         goto fail;
4225                 }
4226         }
4227
4228         return (0);
4229
4230 fail:
4231         /* Frees all, but can handle partial completion */
4232         igb_free_receive_structures(adapter);
4233         return (error);
4234 }
4235
4236
4237 static void
4238 igb_free_receive_ring(struct rx_ring *rxr)
4239 {
4240         struct  adapter         *adapter = rxr->adapter;
4241         struct igb_rx_buf       *rxbuf;
4242
4243
4244         for (int i = 0; i < adapter->num_rx_desc; i++) {
4245                 rxbuf = &rxr->rx_buffers[i];
4246                 if (rxbuf->m_head != NULL) {
4247                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4248                             BUS_DMASYNC_POSTREAD);
4249                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4250                         rxbuf->m_head->m_flags |= M_PKTHDR;
4251                         m_freem(rxbuf->m_head);
4252                 }
4253                 if (rxbuf->m_pack != NULL) {
4254                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4255                             BUS_DMASYNC_POSTREAD);
4256                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4257                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4258                         m_freem(rxbuf->m_pack);
4259                 }
4260                 rxbuf->m_head = NULL;
4261                 rxbuf->m_pack = NULL;
4262         }
4263 }
4264
4265
4266 /*********************************************************************
4267  *
4268  *  Initialize a receive ring and its buffers.
4269  *
4270  **********************************************************************/
4271 static int
4272 igb_setup_receive_ring(struct rx_ring *rxr)
4273 {
4274         struct  adapter         *adapter;
4275         struct  ifnet           *ifp;
4276         device_t                dev;
4277         struct igb_rx_buf       *rxbuf;
4278         bus_dma_segment_t       pseg[1], hseg[1];
4279         struct lro_ctrl         *lro = &rxr->lro;
4280         int                     rsize, nsegs, error = 0;
4281 #ifdef DEV_NETMAP
4282         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4283         struct netmap_slot *slot;
4284 #endif /* DEV_NETMAP */
4285
4286         adapter = rxr->adapter;
4287         dev = adapter->dev;
4288         ifp = adapter->ifp;
4289
4290         /* Clear the ring contents */
4291         IGB_RX_LOCK(rxr);
4292 #ifdef DEV_NETMAP
4293         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4294 #endif /* DEV_NETMAP */
4295         rsize = roundup2(adapter->num_rx_desc *
4296             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4297         bzero((void *)rxr->rx_base, rsize);
4298
4299         /*
4300         ** Free current RX buffer structures and their mbufs
4301         */
4302         igb_free_receive_ring(rxr);
4303
4304         /* Configure for header split? */
4305         if (igb_header_split)
4306                 rxr->hdr_split = TRUE;
4307
4308         /* Now replenish the ring mbufs */
4309         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4310                 struct mbuf     *mh, *mp;
4311
4312                 rxbuf = &rxr->rx_buffers[j];
4313 #ifdef DEV_NETMAP
4314                 if (slot) {
4315                         /* slot sj is mapped to the i-th NIC-ring entry */
4316                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4317                         uint64_t paddr;
4318                         void *addr;
4319
4320                         addr = PNMB(na, slot + sj, &paddr);
4321                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4322                         /* Update descriptor */
4323                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4324                         continue;
4325                 }
4326 #endif /* DEV_NETMAP */
4327                 if (rxr->hdr_split == FALSE)
4328                         goto skip_head;
4329
4330                 /* First the header */
4331                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4332                 if (rxbuf->m_head == NULL) {
4333                         error = ENOBUFS;
4334                         goto fail;
4335                 }
4336                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4337                 mh = rxbuf->m_head;
4338                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4339                 mh->m_flags |= M_PKTHDR;
4340                 /* Get the memory mapping */
4341                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4342                     rxbuf->hmap, rxbuf->m_head, hseg,
4343                     &nsegs, BUS_DMA_NOWAIT);
4344                 if (error != 0) /* Nothing elegant to do here */
4345                         goto fail;
4346                 bus_dmamap_sync(rxr->htag,
4347                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4348                 /* Update descriptor */
4349                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4350
4351 skip_head:
4352                 /* Now the payload cluster */
4353                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4354                     M_PKTHDR, adapter->rx_mbuf_sz);
4355                 if (rxbuf->m_pack == NULL) {
4356                         error = ENOBUFS;
4357                         goto fail;
4358                 }
4359                 mp = rxbuf->m_pack;
4360                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4361                 /* Get the memory mapping */
4362                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4363                     rxbuf->pmap, mp, pseg,
4364                     &nsegs, BUS_DMA_NOWAIT);
4365                 if (error != 0)
4366                         goto fail;
4367                 bus_dmamap_sync(rxr->ptag,
4368                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4369                 /* Update descriptor */
4370                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4371         }
4372
4373         /* Setup our descriptor indices */
4374         rxr->next_to_check = 0;
4375         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4376         rxr->lro_enabled = FALSE;
4377         rxr->rx_split_packets = 0;
4378         rxr->rx_bytes = 0;
4379
4380         rxr->fmp = NULL;
4381         rxr->lmp = NULL;
4382
4383         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4384             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4385
4386         /*
4387         ** Now set up the LRO interface, we
4388         ** also only do head split when LRO
4389         ** is enabled, since so often they
4390         ** are undesireable in similar setups.
4391         */
4392         if (ifp->if_capenable & IFCAP_LRO) {
4393                 error = tcp_lro_init(lro);
4394                 if (error) {
4395                         device_printf(dev, "LRO Initialization failed!\n");
4396                         goto fail;
4397                 }
4398                 INIT_DEBUGOUT("RX LRO Initialized\n");
4399                 rxr->lro_enabled = TRUE;
4400                 lro->ifp = adapter->ifp;
4401         }
4402
4403         IGB_RX_UNLOCK(rxr);
4404         return (0);
4405
4406 fail:
4407         igb_free_receive_ring(rxr);
4408         IGB_RX_UNLOCK(rxr);
4409         return (error);
4410 }
4411
4412
4413 /*********************************************************************
4414  *
4415  *  Initialize all receive rings.
4416  *
4417  **********************************************************************/
4418 static int
4419 igb_setup_receive_structures(struct adapter *adapter)
4420 {
4421         struct rx_ring *rxr = adapter->rx_rings;
4422         int i;
4423
4424         for (i = 0; i < adapter->num_queues; i++, rxr++)
4425                 if (igb_setup_receive_ring(rxr))
4426                         goto fail;
4427
4428         return (0);
4429 fail:
4430         /*
4431          * Free RX buffers allocated so far, we will only handle
4432          * the rings that completed, the failing case will have
4433          * cleaned up for itself. 'i' is the endpoint.
4434          */
4435         for (int j = 0; j < i; ++j) {
4436                 rxr = &adapter->rx_rings[j];
4437                 IGB_RX_LOCK(rxr);
4438                 igb_free_receive_ring(rxr);
4439                 IGB_RX_UNLOCK(rxr);
4440         }
4441
4442         return (ENOBUFS);
4443 }
4444
4445 /*********************************************************************
4446  *
4447  *  Enable receive unit.
4448  *
4449  **********************************************************************/
4450 static void
4451 igb_initialize_receive_units(struct adapter *adapter)
4452 {
4453         struct rx_ring  *rxr = adapter->rx_rings;
4454         struct ifnet    *ifp = adapter->ifp;
4455         struct e1000_hw *hw = &adapter->hw;
4456         u32             rctl, rxcsum, psize, srrctl = 0;
4457
4458         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4459
4460         /*
4461          * Make sure receives are disabled while setting
4462          * up the descriptor ring
4463          */
4464         rctl = E1000_READ_REG(hw, E1000_RCTL);
4465         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4466
4467         /*
4468         ** Set up for header split
4469         */
4470         if (igb_header_split) {
4471                 /* Use a standard mbuf for the header */
4472                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4473                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4474         } else
4475                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4476
4477         /*
4478         ** Set up for jumbo frames
4479         */
4480         if (ifp->if_mtu > ETHERMTU) {
4481                 rctl |= E1000_RCTL_LPE;
4482                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4483                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4484                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4485                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4486                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4487                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4488                 }
4489                 /* Set maximum packet len */
4490                 psize = adapter->max_frame_size;
4491                 /* are we on a vlan? */
4492                 if (adapter->ifp->if_vlantrunk != NULL)
4493                         psize += VLAN_TAG_SIZE;
4494                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4495         } else {
4496                 rctl &= ~E1000_RCTL_LPE;
4497                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4498                 rctl |= E1000_RCTL_SZ_2048;
4499         }
4500
4501         /*
4502          * If TX flow control is disabled and there's >1 queue defined,
4503          * enable DROP.
4504          *
4505          * This drops frames rather than hanging the RX MAC for all queues.
4506          */
4507         if ((adapter->num_queues > 1) &&
4508             (adapter->fc == e1000_fc_none ||
4509              adapter->fc == e1000_fc_rx_pause)) {
4510                 srrctl |= E1000_SRRCTL_DROP_EN;
4511         }
4512
4513         /* Setup the Base and Length of the Rx Descriptor Rings */
4514         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4515                 u64 bus_addr = rxr->rxdma.dma_paddr;
4516                 u32 rxdctl;
4517
4518                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4519                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4520                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4521                     (uint32_t)(bus_addr >> 32));
4522                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4523                     (uint32_t)bus_addr);
4524                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4525                 /* Enable this Queue */
4526                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4527                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4528                 rxdctl &= 0xFFF00000;
4529                 rxdctl |= IGB_RX_PTHRESH;
4530                 rxdctl |= IGB_RX_HTHRESH << 8;
4531                 rxdctl |= IGB_RX_WTHRESH << 16;
4532                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4533         }
4534
4535         /*
4536         ** Setup for RX MultiQueue
4537         */
4538         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4539         if (adapter->num_queues >1) {
4540                 u32 random[10], mrqc, shift = 0;
4541                 union igb_reta {
4542                         u32 dword;
4543                         u8  bytes[4];
4544                 } reta;
4545
4546                 arc4rand(&random, sizeof(random), 0);
4547                 if (adapter->hw.mac.type == e1000_82575)
4548                         shift = 6;
4549                 /* Warning FM follows */
4550                 for (int i = 0; i < 128; i++) {
4551                         reta.bytes[i & 3] =
4552                             (i % adapter->num_queues) << shift;
4553                         if ((i & 3) == 3)
4554                                 E1000_WRITE_REG(hw,
4555                                     E1000_RETA(i >> 2), reta.dword);
4556                 }
4557                 /* Now fill in hash table */
4558                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4559                 for (int i = 0; i < 10; i++)
4560                         E1000_WRITE_REG_ARRAY(hw,
4561                             E1000_RSSRK(0), i, random[i]);
4562
4563                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4564                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4565                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4566                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4567                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4568                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4569                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4570                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4571
4572                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4573
4574                 /*
4575                 ** NOTE: Receive Full-Packet Checksum Offload 
4576                 ** is mutually exclusive with Multiqueue. However
4577                 ** this is not the same as TCP/IP checksums which
4578                 ** still work.
4579                 */
4580                 rxcsum |= E1000_RXCSUM_PCSD;
4581 #if __FreeBSD_version >= 800000
4582                 /* For SCTP Offload */
4583                 if ((hw->mac.type != e1000_82575) &&
4584                     (ifp->if_capenable & IFCAP_RXCSUM))
4585                         rxcsum |= E1000_RXCSUM_CRCOFL;
4586 #endif
4587         } else {
4588                 /* Non RSS setup */
4589                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4590                         rxcsum |= E1000_RXCSUM_IPPCSE;
4591 #if __FreeBSD_version >= 800000
4592                         if (adapter->hw.mac.type != e1000_82575)
4593                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4594 #endif
4595                 } else
4596                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4597         }
4598         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4599
4600         /* Setup the Receive Control Register */
4601         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4602         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4603                    E1000_RCTL_RDMTS_HALF |
4604                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4605         /* Strip CRC bytes. */
4606         rctl |= E1000_RCTL_SECRC;
4607         /* Make sure VLAN Filters are off */
4608         rctl &= ~E1000_RCTL_VFE;
4609         /* Don't store bad packets */
4610         rctl &= ~E1000_RCTL_SBP;
4611
4612         /* Enable Receives */
4613         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4614
4615         /*
4616          * Setup the HW Rx Head and Tail Descriptor Pointers
4617          *   - needs to be after enable
4618          */
4619         for (int i = 0; i < adapter->num_queues; i++) {
4620                 rxr = &adapter->rx_rings[i];
4621                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4622 #ifdef DEV_NETMAP
4623                 /*
4624                  * an init() while a netmap client is active must
4625                  * preserve the rx buffers passed to userspace.
4626                  * In this driver it means we adjust RDT to
4627                  * something different from next_to_refresh
4628                  * (which is not used in netmap mode).
4629                  */
4630                 if (ifp->if_capenable & IFCAP_NETMAP) {
4631                         struct netmap_adapter *na = NA(adapter->ifp);
4632                         struct netmap_kring *kring = &na->rx_rings[i];
4633                         int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4634
4635                         if (t >= adapter->num_rx_desc)
4636                                 t -= adapter->num_rx_desc;
4637                         else if (t < 0)
4638                                 t += adapter->num_rx_desc;
4639                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4640                 } else
4641 #endif /* DEV_NETMAP */
4642                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4643         }
4644         return;
4645 }
4646
4647 /*********************************************************************
4648  *
4649  *  Free receive rings.
4650  *
4651  **********************************************************************/
4652 static void
4653 igb_free_receive_structures(struct adapter *adapter)
4654 {
4655         struct rx_ring *rxr = adapter->rx_rings;
4656
4657         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4658                 struct lro_ctrl *lro = &rxr->lro;
4659                 igb_free_receive_buffers(rxr);
4660                 tcp_lro_free(lro);
4661                 igb_dma_free(adapter, &rxr->rxdma);
4662         }
4663
4664         free(adapter->rx_rings, M_DEVBUF);
4665 }
4666
4667 /*********************************************************************
4668  *
4669  *  Free receive ring data structures.
4670  *
4671  **********************************************************************/
4672 static void
4673 igb_free_receive_buffers(struct rx_ring *rxr)
4674 {
4675         struct adapter          *adapter = rxr->adapter;
4676         struct igb_rx_buf       *rxbuf;
4677         int i;
4678
4679         INIT_DEBUGOUT("free_receive_structures: begin");
4680
4681         /* Cleanup any existing buffers */
4682         if (rxr->rx_buffers != NULL) {
4683                 for (i = 0; i < adapter->num_rx_desc; i++) {
4684                         rxbuf = &rxr->rx_buffers[i];
4685                         if (rxbuf->m_head != NULL) {
4686                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4687                                     BUS_DMASYNC_POSTREAD);
4688                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4689                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4690                                 m_freem(rxbuf->m_head);
4691                         }
4692                         if (rxbuf->m_pack != NULL) {
4693                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4694                                     BUS_DMASYNC_POSTREAD);
4695                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4696                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4697                                 m_freem(rxbuf->m_pack);
4698                         }
4699                         rxbuf->m_head = NULL;
4700                         rxbuf->m_pack = NULL;
4701                         if (rxbuf->hmap != NULL) {
4702                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4703                                 rxbuf->hmap = NULL;
4704                         }
4705                         if (rxbuf->pmap != NULL) {
4706                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4707                                 rxbuf->pmap = NULL;
4708                         }
4709                 }
4710                 if (rxr->rx_buffers != NULL) {
4711                         free(rxr->rx_buffers, M_DEVBUF);
4712                         rxr->rx_buffers = NULL;
4713                 }
4714         }
4715
4716         if (rxr->htag != NULL) {
4717                 bus_dma_tag_destroy(rxr->htag);
4718                 rxr->htag = NULL;
4719         }
4720         if (rxr->ptag != NULL) {
4721                 bus_dma_tag_destroy(rxr->ptag);
4722                 rxr->ptag = NULL;
4723         }
4724 }
4725
4726 static __inline void
4727 igb_rx_discard(struct rx_ring *rxr, int i)
4728 {
4729         struct igb_rx_buf       *rbuf;
4730
4731         rbuf = &rxr->rx_buffers[i];
4732
4733         /* Partially received? Free the chain */
4734         if (rxr->fmp != NULL) {
4735                 rxr->fmp->m_flags |= M_PKTHDR;
4736                 m_freem(rxr->fmp);
4737                 rxr->fmp = NULL;
4738                 rxr->lmp = NULL;
4739         }
4740
4741         /*
4742         ** With advanced descriptors the writeback
4743         ** clobbers the buffer addrs, so its easier
4744         ** to just free the existing mbufs and take
4745         ** the normal refresh path to get new buffers
4746         ** and mapping.
4747         */
4748         if (rbuf->m_head) {
4749                 m_free(rbuf->m_head);
4750                 rbuf->m_head = NULL;
4751                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4752         }
4753
4754         if (rbuf->m_pack) {
4755                 m_free(rbuf->m_pack);
4756                 rbuf->m_pack = NULL;
4757                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4758         }
4759
4760         return;
4761 }
4762
4763 static __inline void
4764 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4765 {
4766
4767         /*
4768          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4769          * should be computed by hardware. Also it should not have VLAN tag in
4770          * ethernet header.
4771          */
4772         if (rxr->lro_enabled &&
4773             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4774             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4775             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4776             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4777             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4778             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4779                 /*
4780                  * Send to the stack if:
4781                  **  - LRO not enabled, or
4782                  **  - no LRO resources, or
4783                  **  - lro enqueue fails
4784                  */
4785                 if (rxr->lro.lro_cnt != 0)
4786                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4787                                 return;
4788         }
4789         IGB_RX_UNLOCK(rxr);
4790         (*ifp->if_input)(ifp, m);
4791         IGB_RX_LOCK(rxr);
4792 }
4793
4794 /*********************************************************************
4795  *
4796  *  This routine executes in interrupt context. It replenishes
4797  *  the mbufs in the descriptor and sends data which has been
4798  *  dma'ed into host memory to upper layer.
4799  *
4800  *  We loop at most count times if count is > 0, or until done if
4801  *  count < 0.
4802  *
4803  *  Return TRUE if more to clean, FALSE otherwise
4804  *********************************************************************/
4805 static bool
4806 igb_rxeof(struct igb_queue *que, int count, int *done)
4807 {
4808         struct adapter          *adapter = que->adapter;
4809         struct rx_ring          *rxr = que->rxr;
4810         struct ifnet            *ifp = adapter->ifp;
4811         struct lro_ctrl         *lro = &rxr->lro;
4812         struct lro_entry        *queued;
4813         int                     i, processed = 0, rxdone = 0;
4814         u32                     ptype, staterr = 0;
4815         union e1000_adv_rx_desc *cur;
4816
4817         IGB_RX_LOCK(rxr);
4818         /* Sync the ring. */
4819         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4820             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4821
4822 #ifdef DEV_NETMAP
4823         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4824                 IGB_RX_UNLOCK(rxr);
4825                 return (FALSE);
4826         }
4827 #endif /* DEV_NETMAP */
4828
4829         /* Main clean loop */
4830         for (i = rxr->next_to_check; count != 0;) {
4831                 struct mbuf             *sendmp, *mh, *mp;
4832                 struct igb_rx_buf       *rxbuf;
4833                 u16                     hlen, plen, hdr, vtag;
4834                 bool                    eop = FALSE;
4835  
4836                 cur = &rxr->rx_base[i];
4837                 staterr = le32toh(cur->wb.upper.status_error);
4838                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4839                         break;
4840                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4841                         break;
4842                 count--;
4843                 sendmp = mh = mp = NULL;
4844                 cur->wb.upper.status_error = 0;
4845                 rxbuf = &rxr->rx_buffers[i];
4846                 plen = le16toh(cur->wb.upper.length);
4847                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4848                 if (((adapter->hw.mac.type == e1000_i350) ||
4849                     (adapter->hw.mac.type == e1000_i354)) &&
4850                     (staterr & E1000_RXDEXT_STATERR_LB))
4851                         vtag = be16toh(cur->wb.upper.vlan);
4852                 else
4853                         vtag = le16toh(cur->wb.upper.vlan);
4854                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4855                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4856
4857                 /*
4858                  * Free the frame (all segments) if we're at EOP and
4859                  * it's an error.
4860                  *
4861                  * The datasheet states that EOP + status is only valid for
4862                  * the final segment in a multi-segment frame.
4863                  */
4864                 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4865                         adapter->dropped_pkts++;
4866                         ++rxr->rx_discarded;
4867                         igb_rx_discard(rxr, i);
4868                         goto next_desc;
4869                 }
4870
4871                 /*
4872                 ** The way the hardware is configured to
4873                 ** split, it will ONLY use the header buffer
4874                 ** when header split is enabled, otherwise we
4875                 ** get normal behavior, ie, both header and
4876                 ** payload are DMA'd into the payload buffer.
4877                 **
4878                 ** The fmp test is to catch the case where a
4879                 ** packet spans multiple descriptors, in that
4880                 ** case only the first header is valid.
4881                 */
4882                 if (rxr->hdr_split && rxr->fmp == NULL) {
4883                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4884                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4885                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4886                         if (hlen > IGB_HDR_BUF)
4887                                 hlen = IGB_HDR_BUF;
4888                         mh = rxr->rx_buffers[i].m_head;
4889                         mh->m_len = hlen;
4890                         /* clear buf pointer for refresh */
4891                         rxbuf->m_head = NULL;
4892                         /*
4893                         ** Get the payload length, this
4894                         ** could be zero if its a small
4895                         ** packet.
4896                         */
4897                         if (plen > 0) {
4898                                 mp = rxr->rx_buffers[i].m_pack;
4899                                 mp->m_len = plen;
4900                                 mh->m_next = mp;
4901                                 /* clear buf pointer */
4902                                 rxbuf->m_pack = NULL;
4903                                 rxr->rx_split_packets++;
4904                         }
4905                 } else {
4906                         /*
4907                         ** Either no header split, or a
4908                         ** secondary piece of a fragmented
4909                         ** split packet.
4910                         */
4911                         mh = rxr->rx_buffers[i].m_pack;
4912                         mh->m_len = plen;
4913                         /* clear buf info for refresh */
4914                         rxbuf->m_pack = NULL;
4915                 }
4916                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4917
4918                 ++processed; /* So we know when to refresh */
4919
4920                 /* Initial frame - setup */
4921                 if (rxr->fmp == NULL) {
4922                         mh->m_pkthdr.len = mh->m_len;
4923                         /* Save the head of the chain */
4924                         rxr->fmp = mh;
4925                         rxr->lmp = mh;
4926                         if (mp != NULL) {
4927                                 /* Add payload if split */
4928                                 mh->m_pkthdr.len += mp->m_len;
4929                                 rxr->lmp = mh->m_next;
4930                         }
4931                 } else {
4932                         /* Chain mbuf's together */
4933                         rxr->lmp->m_next = mh;
4934                         rxr->lmp = rxr->lmp->m_next;
4935                         rxr->fmp->m_pkthdr.len += mh->m_len;
4936                 }
4937
4938                 if (eop) {
4939                         rxr->fmp->m_pkthdr.rcvif = ifp;
4940                         ifp->if_ipackets++;
4941                         rxr->rx_packets++;
4942                         /* capture data for AIM */
4943                         rxr->packets++;
4944                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4945                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4946
4947                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4948                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4949
4950                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4951                             (staterr & E1000_RXD_STAT_VP) != 0) {
4952                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4953                                 rxr->fmp->m_flags |= M_VLANTAG;
4954                         }
4955
4956                         /*
4957                          * In case of multiqueue, we have RXCSUM.PCSD bit set
4958                          * and never cleared. This means we have RSS hash
4959                          * available to be used.
4960                          */
4961                         if (adapter->num_queues > 1) {
4962                                 rxr->fmp->m_pkthdr.flowid = 
4963                                     le32toh(cur->wb.lower.hi_dword.rss);
4964                                 /*
4965                                  * Full RSS support is not avilable in
4966                                  * FreeBSD 10 so setting the hash type to
4967                                  * OPAQUE.
4968                                  */
4969                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
4970                         } else {
4971 #ifndef IGB_LEGACY_TX
4972                                 rxr->fmp->m_pkthdr.flowid = que->msix;
4973                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
4974 #endif
4975                         }
4976                         sendmp = rxr->fmp;
4977                         /* Make sure to set M_PKTHDR. */
4978                         sendmp->m_flags |= M_PKTHDR;
4979                         rxr->fmp = NULL;
4980                         rxr->lmp = NULL;
4981                 }
4982
4983 next_desc:
4984                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4985                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4986
4987                 /* Advance our pointers to the next descriptor. */
4988                 if (++i == adapter->num_rx_desc)
4989                         i = 0;
4990                 /*
4991                 ** Send to the stack or LRO
4992                 */
4993                 if (sendmp != NULL) {
4994                         rxr->next_to_check = i;
4995                         igb_rx_input(rxr, ifp, sendmp, ptype);
4996                         i = rxr->next_to_check;
4997                         rxdone++;
4998                 }
4999
5000                 /* Every 8 descriptors we go to refresh mbufs */
5001                 if (processed == 8) {
5002                         igb_refresh_mbufs(rxr, i);
5003                         processed = 0;
5004                 }
5005         }
5006
5007         /* Catch any remainders */
5008         if (igb_rx_unrefreshed(rxr))
5009                 igb_refresh_mbufs(rxr, i);
5010
5011         rxr->next_to_check = i;
5012
5013         /*
5014          * Flush any outstanding LRO work
5015          */
5016         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5017                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5018                 tcp_lro_flush(lro, queued);
5019         }
5020
5021         if (done != NULL)
5022                 *done += rxdone;
5023
5024         IGB_RX_UNLOCK(rxr);
5025         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5026 }
5027
5028 /*********************************************************************
5029  *
5030  *  Verify that the hardware indicated that the checksum is valid.
5031  *  Inform the stack about the status of checksum so that stack
5032  *  doesn't spend time verifying the checksum.
5033  *
5034  *********************************************************************/
5035 static void
5036 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5037 {
5038         u16 status = (u16)staterr;
5039         u8  errors = (u8) (staterr >> 24);
5040         int sctp;
5041
5042         /* Ignore Checksum bit is set */
5043         if (status & E1000_RXD_STAT_IXSM) {
5044                 mp->m_pkthdr.csum_flags = 0;
5045                 return;
5046         }
5047
5048         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5049             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5050                 sctp = 1;
5051         else
5052                 sctp = 0;
5053         if (status & E1000_RXD_STAT_IPCS) {
5054                 /* Did it pass? */
5055                 if (!(errors & E1000_RXD_ERR_IPE)) {
5056                         /* IP Checksum Good */
5057                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5058                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5059                 } else
5060                         mp->m_pkthdr.csum_flags = 0;
5061         }
5062
5063         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5064                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5065 #if __FreeBSD_version >= 800000
5066                 if (sctp) /* reassign */
5067                         type = CSUM_SCTP_VALID;
5068 #endif
5069                 /* Did it pass? */
5070                 if (!(errors & E1000_RXD_ERR_TCPE)) {
5071                         mp->m_pkthdr.csum_flags |= type;
5072                         if (sctp == 0)
5073                                 mp->m_pkthdr.csum_data = htons(0xffff);
5074                 }
5075         }
5076         return;
5077 }
5078
5079 /*
5080  * This routine is run via an vlan
5081  * config EVENT
5082  */
5083 static void
5084 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5085 {
5086         struct adapter  *adapter = ifp->if_softc;
5087         u32             index, bit;
5088
5089         if (ifp->if_softc !=  arg)   /* Not our event */
5090                 return;
5091
5092         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5093                 return;
5094
5095         IGB_CORE_LOCK(adapter);
5096         index = (vtag >> 5) & 0x7F;
5097         bit = vtag & 0x1F;
5098         adapter->shadow_vfta[index] |= (1 << bit);
5099         ++adapter->num_vlans;
5100         /* Change hw filter setting */
5101         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5102                 igb_setup_vlan_hw_support(adapter);
5103         IGB_CORE_UNLOCK(adapter);
5104 }
5105
5106 /*
5107  * This routine is run via an vlan
5108  * unconfig EVENT
5109  */
5110 static void
5111 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5112 {
5113         struct adapter  *adapter = ifp->if_softc;
5114         u32             index, bit;
5115
5116         if (ifp->if_softc !=  arg)
5117                 return;
5118
5119         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5120                 return;
5121
5122         IGB_CORE_LOCK(adapter);
5123         index = (vtag >> 5) & 0x7F;
5124         bit = vtag & 0x1F;
5125         adapter->shadow_vfta[index] &= ~(1 << bit);
5126         --adapter->num_vlans;
5127         /* Change hw filter setting */
5128         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5129                 igb_setup_vlan_hw_support(adapter);
5130         IGB_CORE_UNLOCK(adapter);
5131 }
5132
5133 static void
5134 igb_setup_vlan_hw_support(struct adapter *adapter)
5135 {
5136         struct e1000_hw *hw = &adapter->hw;
5137         struct ifnet    *ifp = adapter->ifp;
5138         u32             reg;
5139
5140         if (adapter->vf_ifp) {
5141                 e1000_rlpml_set_vf(hw,
5142                     adapter->max_frame_size + VLAN_TAG_SIZE);
5143                 return;
5144         }
5145
5146         reg = E1000_READ_REG(hw, E1000_CTRL);
5147         reg |= E1000_CTRL_VME;
5148         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5149
5150         /* Enable the Filter Table */
5151         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5152                 reg = E1000_READ_REG(hw, E1000_RCTL);
5153                 reg &= ~E1000_RCTL_CFIEN;
5154                 reg |= E1000_RCTL_VFE;
5155                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5156         }
5157
5158         /* Update the frame size */
5159         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5160             adapter->max_frame_size + VLAN_TAG_SIZE);
5161
5162         /* Don't bother with table if no vlans */
5163         if ((adapter->num_vlans == 0) ||
5164             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5165                 return;
5166         /*
5167         ** A soft reset zero's out the VFTA, so
5168         ** we need to repopulate it now.
5169         */
5170         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5171                 if (adapter->shadow_vfta[i] != 0) {
5172                         if (adapter->vf_ifp)
5173                                 e1000_vfta_set_vf(hw,
5174                                     adapter->shadow_vfta[i], TRUE);
5175                         else
5176                                 e1000_write_vfta(hw,
5177                                     i, adapter->shadow_vfta[i]);
5178                 }
5179 }
5180
5181 static void
5182 igb_enable_intr(struct adapter *adapter)
5183 {
5184         /* With RSS set up what to auto clear */
5185         if (adapter->msix_mem) {
5186                 u32 mask = (adapter->que_mask | adapter->link_mask);
5187                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5188                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5189                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5190                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5191                     E1000_IMS_LSC);
5192         } else {
5193                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5194                     IMS_ENABLE_MASK);
5195         }
5196         E1000_WRITE_FLUSH(&adapter->hw);
5197
5198         return;
5199 }
5200
5201 static void
5202 igb_disable_intr(struct adapter *adapter)
5203 {
5204         if (adapter->msix_mem) {
5205                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5206                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5207         } 
5208         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5209         E1000_WRITE_FLUSH(&adapter->hw);
5210         return;
5211 }
5212
5213 /*
5214  * Bit of a misnomer, what this really means is
5215  * to enable OS management of the system... aka
5216  * to disable special hardware management features 
5217  */
5218 static void
5219 igb_init_manageability(struct adapter *adapter)
5220 {
5221         if (adapter->has_manage) {
5222                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5223                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5224
5225                 /* disable hardware interception of ARP */
5226                 manc &= ~(E1000_MANC_ARP_EN);
5227
5228                 /* enable receiving management packets to the host */
5229                 manc |= E1000_MANC_EN_MNG2HOST;
5230                 manc2h |= 1 << 5;  /* Mng Port 623 */
5231                 manc2h |= 1 << 6;  /* Mng Port 664 */
5232                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5233                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5234         }
5235 }
5236
5237 /*
5238  * Give control back to hardware management
5239  * controller if there is one.
5240  */
5241 static void
5242 igb_release_manageability(struct adapter *adapter)
5243 {
5244         if (adapter->has_manage) {
5245                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5246
5247                 /* re-enable hardware interception of ARP */
5248                 manc |= E1000_MANC_ARP_EN;
5249                 manc &= ~E1000_MANC_EN_MNG2HOST;
5250
5251                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5252         }
5253 }
5254
5255 /*
5256  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5257  * For ASF and Pass Through versions of f/w this means that
5258  * the driver is loaded. 
5259  *
5260  */
5261 static void
5262 igb_get_hw_control(struct adapter *adapter)
5263 {
5264         u32 ctrl_ext;
5265
5266         if (adapter->vf_ifp)
5267                 return;
5268
5269         /* Let firmware know the driver has taken over */
5270         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5271         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5272             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5273 }
5274
5275 /*
5276  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5277  * For ASF and Pass Through versions of f/w this means that the
5278  * driver is no longer loaded.
5279  *
5280  */
5281 static void
5282 igb_release_hw_control(struct adapter *adapter)
5283 {
5284         u32 ctrl_ext;
5285
5286         if (adapter->vf_ifp)
5287                 return;
5288
5289         /* Let firmware taken over control of h/w */
5290         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5291         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5292             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5293 }
5294
5295 static int
5296 igb_is_valid_ether_addr(uint8_t *addr)
5297 {
5298         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5299
5300         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5301                 return (FALSE);
5302         }
5303
5304         return (TRUE);
5305 }
5306
5307
5308 /*
5309  * Enable PCI Wake On Lan capability
5310  */
5311 static void
5312 igb_enable_wakeup(device_t dev)
5313 {
5314         u16     cap, status;
5315         u8      id;
5316
5317         /* First find the capabilities pointer*/
5318         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5319         /* Read the PM Capabilities */
5320         id = pci_read_config(dev, cap, 1);
5321         if (id != PCIY_PMG)     /* Something wrong */
5322                 return;
5323         /* OK, we have the power capabilities, so
5324            now get the status register */
5325         cap += PCIR_POWER_STATUS;
5326         status = pci_read_config(dev, cap, 2);
5327         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5328         pci_write_config(dev, cap, status, 2);
5329         return;
5330 }
5331
5332 static void
5333 igb_led_func(void *arg, int onoff)
5334 {
5335         struct adapter  *adapter = arg;
5336
5337         IGB_CORE_LOCK(adapter);
5338         if (onoff) {
5339                 e1000_setup_led(&adapter->hw);
5340                 e1000_led_on(&adapter->hw);
5341         } else {
5342                 e1000_led_off(&adapter->hw);
5343                 e1000_cleanup_led(&adapter->hw);
5344         }
5345         IGB_CORE_UNLOCK(adapter);
5346 }
5347
5348 /**********************************************************************
5349  *
5350  *  Update the board statistics counters.
5351  *
5352  **********************************************************************/
5353 static void
5354 igb_update_stats_counters(struct adapter *adapter)
5355 {
5356         struct ifnet            *ifp;
5357         struct e1000_hw         *hw = &adapter->hw;
5358         struct e1000_hw_stats   *stats;
5359
5360         /* 
5361         ** The virtual function adapter has only a
5362         ** small controlled set of stats, do only 
5363         ** those and return.
5364         */
5365         if (adapter->vf_ifp) {
5366                 igb_update_vf_stats_counters(adapter);
5367                 return;
5368         }
5369
5370         stats = (struct e1000_hw_stats  *)adapter->stats;
5371
5372         if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5373            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5374                 stats->symerrs +=
5375                     E1000_READ_REG(hw,E1000_SYMERRS);
5376                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5377         }
5378
5379         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5380         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5381         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5382         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5383
5384         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5385         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5386         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5387         stats->dc += E1000_READ_REG(hw, E1000_DC);
5388         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5389         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5390         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5391         /*
5392         ** For watchdog management we need to know if we have been
5393         ** paused during the last interval, so capture that here.
5394         */ 
5395         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5396         stats->xoffrxc += adapter->pause_frames;
5397         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5398         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5399         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5400         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5401         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5402         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5403         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5404         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5405         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5406         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5407         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5408         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5409
5410         /* For the 64-bit byte counters the low dword must be read first. */
5411         /* Both registers clear on the read of the high dword */
5412
5413         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5414             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5415         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5416             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5417
5418         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5419         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5420         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5421         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5422         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5423
5424         stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5425         stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5426         stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5427
5428         stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5429             ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5430         stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5431             ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5432
5433         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5434         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5435         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5436         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5437         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5438         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5439         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5440         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5441         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5442         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5443
5444         /* Interrupt Counts */
5445
5446         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5447         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5448         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5449         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5450         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5451         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5452         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5453         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5454         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5455
5456         /* Host to Card Statistics */
5457
5458         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5459         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5460         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5461         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5462         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5463         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5464         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5465         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5466             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5467         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5468             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5469         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5470         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5471         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5472
5473         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5474         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5475         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5476         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5477         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5478         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5479
5480         ifp = adapter->ifp;
5481         ifp->if_collisions = stats->colc;
5482
5483         /* Rx Errors */
5484         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5485             stats->crcerrs + stats->algnerrc +
5486             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5487
5488         /* Tx Errors */
5489         ifp->if_oerrors = stats->ecol +
5490             stats->latecol + adapter->watchdog_events;
5491
5492         /* Driver specific counters */
5493         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5494         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5495         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5496         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5497         adapter->packet_buf_alloc_tx =
5498             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5499         adapter->packet_buf_alloc_rx =
5500             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5501 }
5502
5503
5504 /**********************************************************************
5505  *
5506  *  Initialize the VF board statistics counters.
5507  *
5508  **********************************************************************/
5509 static void
5510 igb_vf_init_stats(struct adapter *adapter)
5511 {
5512         struct e1000_hw *hw = &adapter->hw;
5513         struct e1000_vf_stats   *stats;
5514
5515         stats = (struct e1000_vf_stats  *)adapter->stats;
5516         if (stats == NULL)
5517                 return;
5518         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5519         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5520         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5521         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5522         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5523 }
5524  
5525 /**********************************************************************
5526  *
5527  *  Update the VF board statistics counters.
5528  *
5529  **********************************************************************/
5530 static void
5531 igb_update_vf_stats_counters(struct adapter *adapter)
5532 {
5533         struct e1000_hw *hw = &adapter->hw;
5534         struct e1000_vf_stats   *stats;
5535
5536         if (adapter->link_speed == 0)
5537                 return;
5538
5539         stats = (struct e1000_vf_stats  *)adapter->stats;
5540
5541         UPDATE_VF_REG(E1000_VFGPRC,
5542             stats->last_gprc, stats->gprc);
5543         UPDATE_VF_REG(E1000_VFGORC,
5544             stats->last_gorc, stats->gorc);
5545         UPDATE_VF_REG(E1000_VFGPTC,
5546             stats->last_gptc, stats->gptc);
5547         UPDATE_VF_REG(E1000_VFGOTC,
5548             stats->last_gotc, stats->gotc);
5549         UPDATE_VF_REG(E1000_VFMPRC,
5550             stats->last_mprc, stats->mprc);
5551 }
5552
5553 /* Export a single 32-bit register via a read-only sysctl. */
5554 static int
5555 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5556 {
5557         struct adapter *adapter;
5558         u_int val;
5559
5560         adapter = oidp->oid_arg1;
5561         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5562         return (sysctl_handle_int(oidp, &val, 0, req));
5563 }
5564
5565 /*
5566 **  Tuneable interrupt rate handler
5567 */
5568 static int
5569 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5570 {
5571         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5572         int                     error;
5573         u32                     reg, usec, rate;
5574                         
5575         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5576         usec = ((reg & 0x7FFC) >> 2);
5577         if (usec > 0)
5578                 rate = 1000000 / usec;
5579         else
5580                 rate = 0;
5581         error = sysctl_handle_int(oidp, &rate, 0, req);
5582         if (error || !req->newptr)
5583                 return error;
5584         return 0;
5585 }
5586
5587 /*
5588  * Add sysctl variables, one per statistic, to the system.
5589  */
5590 static void
5591 igb_add_hw_stats(struct adapter *adapter)
5592 {
5593         device_t dev = adapter->dev;
5594
5595         struct tx_ring *txr = adapter->tx_rings;
5596         struct rx_ring *rxr = adapter->rx_rings;
5597
5598         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5599         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5600         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5601         struct e1000_hw_stats *stats = adapter->stats;
5602
5603         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5604         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5605
5606 #define QUEUE_NAME_LEN 32
5607         char namebuf[QUEUE_NAME_LEN];
5608
5609         /* Driver Statistics */
5610         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5611                         CTLFLAG_RD, &adapter->dropped_pkts,
5612                         "Driver dropped packets");
5613         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 
5614                         CTLFLAG_RD, &adapter->link_irq,
5615                         "Link MSIX IRQ Handled");
5616         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5617                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5618                         "Defragmenting mbuf chain failed");
5619         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5620                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5621                         "Driver tx dma failure in xmit");
5622         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5623                         CTLFLAG_RD, &adapter->rx_overruns,
5624                         "RX overruns");
5625         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5626                         CTLFLAG_RD, &adapter->watchdog_events,
5627                         "Watchdog timeouts");
5628
5629         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5630                         CTLFLAG_RD, &adapter->device_control,
5631                         "Device Control Register");
5632         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5633                         CTLFLAG_RD, &adapter->rx_control,
5634                         "Receiver Control Register");
5635         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5636                         CTLFLAG_RD, &adapter->int_mask,
5637                         "Interrupt Mask");
5638         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5639                         CTLFLAG_RD, &adapter->eint_mask,
5640                         "Extended Interrupt Mask");
5641         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5642                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5643                         "Transmit Buffer Packet Allocation");
5644         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5645                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5646                         "Receive Buffer Packet Allocation");
5647         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5648                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5649                         "Flow Control High Watermark");
5650         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5651                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5652                         "Flow Control Low Watermark");
5653
5654         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5655                 struct lro_ctrl *lro = &rxr->lro;
5656
5657                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5658                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5659                                             CTLFLAG_RD, NULL, "Queue Name");
5660                 queue_list = SYSCTL_CHILDREN(queue_node);
5661
5662                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5663                                 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5664                                 sizeof(&adapter->queues[i]),
5665                                 igb_sysctl_interrupt_rate_handler,
5666                                 "IU", "Interrupt Rate");
5667
5668                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5669                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5670                                 igb_sysctl_reg_handler, "IU",
5671                                 "Transmit Descriptor Head");
5672                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5673                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5674                                 igb_sysctl_reg_handler, "IU",
5675                                 "Transmit Descriptor Tail");
5676                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5677                                 CTLFLAG_RD, &txr->no_desc_avail,
5678                                 "Queue No Descriptor Available");
5679                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5680                                 CTLFLAG_RD, &txr->total_packets,
5681                                 "Queue Packets Transmitted");
5682
5683                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5684                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5685                                 igb_sysctl_reg_handler, "IU",
5686                                 "Receive Descriptor Head");
5687                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5688                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5689                                 igb_sysctl_reg_handler, "IU",
5690                                 "Receive Descriptor Tail");
5691                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5692                                 CTLFLAG_RD, &rxr->rx_packets,
5693                                 "Queue Packets Received");
5694                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5695                                 CTLFLAG_RD, &rxr->rx_bytes,
5696                                 "Queue Bytes Received");
5697                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5698                                 CTLFLAG_RD, &lro->lro_queued, 0,
5699                                 "LRO Queued");
5700                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5701                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5702                                 "LRO Flushed");
5703         }
5704
5705         /* MAC stats get their own sub node */
5706
5707         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5708                                     CTLFLAG_RD, NULL, "MAC Statistics");
5709         stat_list = SYSCTL_CHILDREN(stat_node);
5710
5711         /*
5712         ** VF adapter has a very limited set of stats
5713         ** since its not managing the metal, so to speak.
5714         */
5715         if (adapter->vf_ifp) {
5716         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5717                         CTLFLAG_RD, &stats->gprc,
5718                         "Good Packets Received");
5719         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5720                         CTLFLAG_RD, &stats->gptc,
5721                         "Good Packets Transmitted");
5722         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5723                         CTLFLAG_RD, &stats->gorc, 
5724                         "Good Octets Received"); 
5725         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5726                         CTLFLAG_RD, &stats->gotc, 
5727                         "Good Octets Transmitted"); 
5728         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5729                         CTLFLAG_RD, &stats->mprc,
5730                         "Multicast Packets Received");
5731                 return;
5732         }
5733
5734         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5735                         CTLFLAG_RD, &stats->ecol,
5736                         "Excessive collisions");
5737         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5738                         CTLFLAG_RD, &stats->scc,
5739                         "Single collisions");
5740         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5741                         CTLFLAG_RD, &stats->mcc,
5742                         "Multiple collisions");
5743         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5744                         CTLFLAG_RD, &stats->latecol,
5745                         "Late collisions");
5746         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5747                         CTLFLAG_RD, &stats->colc,
5748                         "Collision Count");
5749         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5750                         CTLFLAG_RD, &stats->symerrs,
5751                         "Symbol Errors");
5752         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5753                         CTLFLAG_RD, &stats->sec,
5754                         "Sequence Errors");
5755         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5756                         CTLFLAG_RD, &stats->dc,
5757                         "Defer Count");
5758         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5759                         CTLFLAG_RD, &stats->mpc,
5760                         "Missed Packets");
5761         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5762                         CTLFLAG_RD, &stats->rlec,
5763                         "Receive Length Errors");
5764         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5765                         CTLFLAG_RD, &stats->rnbc,
5766                         "Receive No Buffers");
5767         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5768                         CTLFLAG_RD, &stats->ruc,
5769                         "Receive Undersize");
5770         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5771                         CTLFLAG_RD, &stats->rfc,
5772                         "Fragmented Packets Received");
5773         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5774                         CTLFLAG_RD, &stats->roc,
5775                         "Oversized Packets Received");
5776         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5777                         CTLFLAG_RD, &stats->rjc,
5778                         "Recevied Jabber");
5779         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5780                         CTLFLAG_RD, &stats->rxerrc,
5781                         "Receive Errors");
5782         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5783                         CTLFLAG_RD, &stats->crcerrs,
5784                         "CRC errors");
5785         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5786                         CTLFLAG_RD, &stats->algnerrc,
5787                         "Alignment Errors");
5788         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
5789                         CTLFLAG_RD, &stats->tncrs,
5790                         "Transmit with No CRS");
5791         /* On 82575 these are collision counts */
5792         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5793                         CTLFLAG_RD, &stats->cexterr,
5794                         "Collision/Carrier extension errors");
5795         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5796                         CTLFLAG_RD, &stats->xonrxc,
5797                         "XON Received");
5798         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5799                         CTLFLAG_RD, &stats->xontxc,
5800                         "XON Transmitted");
5801         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5802                         CTLFLAG_RD, &stats->xoffrxc,
5803                         "XOFF Received");
5804         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5805                         CTLFLAG_RD, &stats->xofftxc,
5806                         "XOFF Transmitted");
5807         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
5808                         CTLFLAG_RD, &stats->fcruc,
5809                         "Unsupported Flow Control Received");
5810         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
5811                         CTLFLAG_RD, &stats->mgprc,
5812                         "Management Packets Received");
5813         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
5814                         CTLFLAG_RD, &stats->mgpdc,
5815                         "Management Packets Dropped");
5816         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
5817                         CTLFLAG_RD, &stats->mgptc,
5818                         "Management Packets Transmitted");
5819         /* Packet Reception Stats */
5820         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5821                         CTLFLAG_RD, &stats->tpr,
5822                         "Total Packets Received");
5823         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5824                         CTLFLAG_RD, &stats->gprc,
5825                         "Good Packets Received");
5826         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5827                         CTLFLAG_RD, &stats->bprc,
5828                         "Broadcast Packets Received");
5829         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5830                         CTLFLAG_RD, &stats->mprc,
5831                         "Multicast Packets Received");
5832         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5833                         CTLFLAG_RD, &stats->prc64,
5834                         "64 byte frames received");
5835         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5836                         CTLFLAG_RD, &stats->prc127,
5837                         "65-127 byte frames received");
5838         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5839                         CTLFLAG_RD, &stats->prc255,
5840                         "128-255 byte frames received");
5841         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5842                         CTLFLAG_RD, &stats->prc511,
5843                         "256-511 byte frames received");
5844         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5845                         CTLFLAG_RD, &stats->prc1023,
5846                         "512-1023 byte frames received");
5847         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5848                         CTLFLAG_RD, &stats->prc1522,
5849                         "1023-1522 byte frames received");
5850         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5851                         CTLFLAG_RD, &stats->gorc, 
5852                         "Good Octets Received");
5853         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 
5854                         CTLFLAG_RD, &stats->tor, 
5855                         "Total Octets Received");
5856
5857         /* Packet Transmission Stats */
5858         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5859                         CTLFLAG_RD, &stats->gotc, 
5860                         "Good Octets Transmitted"); 
5861         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 
5862                         CTLFLAG_RD, &stats->tot, 
5863                         "Total Octets Transmitted");
5864         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5865                         CTLFLAG_RD, &stats->tpt,
5866                         "Total Packets Transmitted");
5867         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5868                         CTLFLAG_RD, &stats->gptc,
5869                         "Good Packets Transmitted");
5870         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5871                         CTLFLAG_RD, &stats->bptc,
5872                         "Broadcast Packets Transmitted");
5873         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5874                         CTLFLAG_RD, &stats->mptc,
5875                         "Multicast Packets Transmitted");
5876         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5877                         CTLFLAG_RD, &stats->ptc64,
5878                         "64 byte frames transmitted");
5879         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5880                         CTLFLAG_RD, &stats->ptc127,
5881                         "65-127 byte frames transmitted");
5882         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5883                         CTLFLAG_RD, &stats->ptc255,
5884                         "128-255 byte frames transmitted");
5885         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5886                         CTLFLAG_RD, &stats->ptc511,
5887                         "256-511 byte frames transmitted");
5888         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5889                         CTLFLAG_RD, &stats->ptc1023,
5890                         "512-1023 byte frames transmitted");
5891         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5892                         CTLFLAG_RD, &stats->ptc1522,
5893                         "1024-1522 byte frames transmitted");
5894         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5895                         CTLFLAG_RD, &stats->tsctc,
5896                         "TSO Contexts Transmitted");
5897         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5898                         CTLFLAG_RD, &stats->tsctfc,
5899                         "TSO Contexts Failed");
5900
5901
5902         /* Interrupt Stats */
5903
5904         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5905                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5906         int_list = SYSCTL_CHILDREN(int_node);
5907
5908         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5909                         CTLFLAG_RD, &stats->iac,
5910                         "Interrupt Assertion Count");
5911
5912         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5913                         CTLFLAG_RD, &stats->icrxptc,
5914                         "Interrupt Cause Rx Pkt Timer Expire Count");
5915
5916         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5917                         CTLFLAG_RD, &stats->icrxatc,
5918                         "Interrupt Cause Rx Abs Timer Expire Count");
5919
5920         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5921                         CTLFLAG_RD, &stats->ictxptc,
5922                         "Interrupt Cause Tx Pkt Timer Expire Count");
5923
5924         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5925                         CTLFLAG_RD, &stats->ictxatc,
5926                         "Interrupt Cause Tx Abs Timer Expire Count");
5927
5928         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5929                         CTLFLAG_RD, &stats->ictxqec,
5930                         "Interrupt Cause Tx Queue Empty Count");
5931
5932         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5933                         CTLFLAG_RD, &stats->ictxqmtc,
5934                         "Interrupt Cause Tx Queue Min Thresh Count");
5935
5936         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5937                         CTLFLAG_RD, &stats->icrxdmtc,
5938                         "Interrupt Cause Rx Desc Min Thresh Count");
5939
5940         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5941                         CTLFLAG_RD, &stats->icrxoc,
5942                         "Interrupt Cause Receiver Overrun Count");
5943
5944         /* Host to Card Stats */
5945
5946         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5947                                     CTLFLAG_RD, NULL, 
5948                                     "Host to Card Statistics");
5949
5950         host_list = SYSCTL_CHILDREN(host_node);
5951
5952         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5953                         CTLFLAG_RD, &stats->cbtmpc,
5954                         "Circuit Breaker Tx Packet Count");
5955
5956         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5957                         CTLFLAG_RD, &stats->htdpmc,
5958                         "Host Transmit Discarded Packets");
5959
5960         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5961                         CTLFLAG_RD, &stats->rpthc,
5962                         "Rx Packets To Host");
5963
5964         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5965                         CTLFLAG_RD, &stats->cbrmpc,
5966                         "Circuit Breaker Rx Packet Count");
5967
5968         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5969                         CTLFLAG_RD, &stats->cbrdpc,
5970                         "Circuit Breaker Rx Dropped Count");
5971
5972         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5973                         CTLFLAG_RD, &stats->hgptc,
5974                         "Host Good Packets Tx Count");
5975
5976         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5977                         CTLFLAG_RD, &stats->htcbdpc,
5978                         "Host Tx Circuit Breaker Dropped Count");
5979
5980         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5981                         CTLFLAG_RD, &stats->hgorc,
5982                         "Host Good Octets Received Count");
5983
5984         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5985                         CTLFLAG_RD, &stats->hgotc,
5986                         "Host Good Octets Transmit Count");
5987
5988         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5989                         CTLFLAG_RD, &stats->lenerrs,
5990                         "Length Errors");
5991
5992         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5993                         CTLFLAG_RD, &stats->scvpc,
5994                         "SerDes/SGMII Code Violation Pkt Count");
5995
5996         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5997                         CTLFLAG_RD, &stats->hrmpc,
5998                         "Header Redirection Missed Packet Count");
5999 }
6000
6001
6002 /**********************************************************************
6003  *
6004  *  This routine provides a way to dump out the adapter eeprom,
6005  *  often a useful debug/service tool. This only dumps the first
6006  *  32 words, stuff that matters is in that extent.
6007  *
6008  **********************************************************************/
6009 static int
6010 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6011 {
6012         struct adapter *adapter;
6013         int error;
6014         int result;
6015
6016         result = -1;
6017         error = sysctl_handle_int(oidp, &result, 0, req);
6018
6019         if (error || !req->newptr)
6020                 return (error);
6021
6022         /*
6023          * This value will cause a hex dump of the
6024          * first 32 16-bit words of the EEPROM to
6025          * the screen.
6026          */
6027         if (result == 1) {
6028                 adapter = (struct adapter *)arg1;
6029                 igb_print_nvm_info(adapter);
6030         }
6031
6032         return (error);
6033 }
6034
6035 static void
6036 igb_print_nvm_info(struct adapter *adapter)
6037 {
6038         u16     eeprom_data;
6039         int     i, j, row = 0;
6040
6041         /* Its a bit crude, but it gets the job done */
6042         printf("\nInterface EEPROM Dump:\n");
6043         printf("Offset\n0x0000  ");
6044         for (i = 0, j = 0; i < 32; i++, j++) {
6045                 if (j == 8) { /* Make the offset block */
6046                         j = 0; ++row;
6047                         printf("\n0x00%x0  ",row);
6048                 }
6049                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6050                 printf("%04x ", eeprom_data);
6051         }
6052         printf("\n");
6053 }
6054
6055 static void
6056 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6057         const char *description, int *limit, int value)
6058 {
6059         *limit = value;
6060         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6061             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6062             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6063 }
6064
6065 /*
6066 ** Set flow control using sysctl:
6067 ** Flow control values:
6068 **      0 - off
6069 **      1 - rx pause
6070 **      2 - tx pause
6071 **      3 - full
6072 */
6073 static int
6074 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6075 {
6076         int             error;
6077         static int      input = 3; /* default is full */
6078         struct adapter  *adapter = (struct adapter *) arg1;
6079
6080         error = sysctl_handle_int(oidp, &input, 0, req);
6081
6082         if ((error) || (req->newptr == NULL))
6083                 return (error);
6084
6085         switch (input) {
6086                 case e1000_fc_rx_pause:
6087                 case e1000_fc_tx_pause:
6088                 case e1000_fc_full:
6089                 case e1000_fc_none:
6090                         adapter->hw.fc.requested_mode = input;
6091                         adapter->fc = input;
6092                         break;
6093                 default:
6094                         /* Do nothing */
6095                         return (error);
6096         }
6097
6098         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6099         e1000_force_mac_fc(&adapter->hw);
6100         /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6101         return (error);
6102 }
6103
6104 /*
6105 ** Manage DMA Coalesce:
6106 ** Control values:
6107 **      0/1 - off/on
6108 **      Legal timer values are:
6109 **      250,500,1000-10000 in thousands
6110 */
6111 static int
6112 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6113 {
6114         struct adapter *adapter = (struct adapter *) arg1;
6115         int             error;
6116
6117         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6118
6119         if ((error) || (req->newptr == NULL))
6120                 return (error);
6121
6122         switch (adapter->dmac) {
6123                 case 0:
6124                         /* Disabling */
6125                         break;
6126                 case 1: /* Just enable and use default */
6127                         adapter->dmac = 1000;
6128                         break;
6129                 case 250:
6130                 case 500:
6131                 case 1000:
6132                 case 2000:
6133                 case 3000:
6134                 case 4000:
6135                 case 5000:
6136                 case 6000:
6137                 case 7000:
6138                 case 8000:
6139                 case 9000:
6140                 case 10000:
6141                         /* Legal values - allow */
6142                         break;
6143                 default:
6144                         /* Do nothing, illegal value */
6145                         adapter->dmac = 0;
6146                         return (EINVAL);
6147         }
6148         /* Reinit the interface */
6149         igb_init(adapter);
6150         return (error);
6151 }
6152
6153 /*
6154 ** Manage Energy Efficient Ethernet:
6155 ** Control values:
6156 **     0/1 - enabled/disabled
6157 */
6158 static int
6159 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6160 {
6161         struct adapter  *adapter = (struct adapter *) arg1;
6162         int             error, value;
6163
6164         value = adapter->hw.dev_spec._82575.eee_disable;
6165         error = sysctl_handle_int(oidp, &value, 0, req);
6166         if (error || req->newptr == NULL)
6167                 return (error);
6168         IGB_CORE_LOCK(adapter);
6169         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6170         igb_init_locked(adapter);
6171         IGB_CORE_UNLOCK(adapter);
6172         return (0);
6173 }