]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_igb.c
Update compiler-rt to trunk r224034. This brings a number of new
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "opt_rss.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #include "opt_altq.h"
43 #endif
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #ifndef IGB_LEGACY_TX
48 #include <sys/buf_ring.h>
49 #endif
50 #include <sys/bus.h>
51 #include <sys/endian.h>
52 #include <sys/kernel.h>
53 #include <sys/kthread.h>
54 #include <sys/malloc.h>
55 #include <sys/mbuf.h>
56 #include <sys/module.h>
57 #include <sys/rman.h>
58 #include <sys/socket.h>
59 #include <sys/sockio.h>
60 #include <sys/sysctl.h>
61 #include <sys/taskqueue.h>
62 #include <sys/eventhandler.h>
63 #include <sys/pcpu.h>
64 #include <sys/smp.h>
65 #include <machine/smp.h>
66 #include <machine/bus.h>
67 #include <machine/resource.h>
68
69 #include <net/bpf.h>
70 #include <net/ethernet.h>
71 #include <net/if.h>
72 #include <net/if_var.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
76
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
79
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/tcp_lro.h>
87 #include <netinet/udp.h>
88 #ifdef  RSS
89 #include <netinet/in_rss.h>
90 #endif
91
92 #include <machine/in_cksum.h>
93 #include <dev/led/led.h>
94 #include <dev/pci/pcivar.h>
95 #include <dev/pci/pcireg.h>
96
97 #include "e1000_api.h"
98 #include "e1000_82575.h"
99 #include "if_igb.h"
100
101 /*********************************************************************
102  *  Set this to one to display debug statistics
103  *********************************************************************/
104 int     igb_display_debug_stats = 0;
105
106 /*********************************************************************
107  *  Driver version:
108  *********************************************************************/
109 char igb_driver_version[] = "version - 2.4.0";
110
111
112 /*********************************************************************
113  *  PCI Device ID Table
114  *
115  *  Used by probe to select devices to load on
116  *  Last field stores an index into e1000_strings
117  *  Last entry must be all 0s
118  *
119  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
120  *********************************************************************/
121
122 static igb_vendor_info_t igb_vendor_info_array[] =
123 {
124         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
135                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
146                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
148                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
153                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_I210_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_I210_COPPER_IT,  PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
162                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS,
164                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS,
166                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_I210_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_I210_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_I210_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_I211_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS,
172                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS,
174                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_I354_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
176         /* required last entry */
177         { 0, 0, 0, 0, 0}
178 };
179
180 /*********************************************************************
181  *  Table of branding strings for all supported NICs.
182  *********************************************************************/
183
184 static char *igb_strings[] = {
185         "Intel(R) PRO/1000 Network Connection"
186 };
187
188 /*********************************************************************
189  *  Function prototypes
190  *********************************************************************/
191 static int      igb_probe(device_t);
192 static int      igb_attach(device_t);
193 static int      igb_detach(device_t);
194 static int      igb_shutdown(device_t);
195 static int      igb_suspend(device_t);
196 static int      igb_resume(device_t);
197 #ifndef IGB_LEGACY_TX
198 static int      igb_mq_start(struct ifnet *, struct mbuf *);
199 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
200 static void     igb_qflush(struct ifnet *);
201 static void     igb_deferred_mq_start(void *, int);
202 #else
203 static void     igb_start(struct ifnet *);
204 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
205 #endif
206 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
207 static uint64_t igb_get_counter(if_t, ift_counter);
208 static void     igb_init(void *);
209 static void     igb_init_locked(struct adapter *);
210 static void     igb_stop(void *);
211 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
212 static int      igb_media_change(struct ifnet *);
213 static void     igb_identify_hardware(struct adapter *);
214 static int      igb_allocate_pci_resources(struct adapter *);
215 static int      igb_allocate_msix(struct adapter *);
216 static int      igb_allocate_legacy(struct adapter *);
217 static int      igb_setup_msix(struct adapter *);
218 static void     igb_free_pci_resources(struct adapter *);
219 static void     igb_local_timer(void *);
220 static void     igb_reset(struct adapter *);
221 static int      igb_setup_interface(device_t, struct adapter *);
222 static int      igb_allocate_queues(struct adapter *);
223 static void     igb_configure_queues(struct adapter *);
224
225 static int      igb_allocate_transmit_buffers(struct tx_ring *);
226 static void     igb_setup_transmit_structures(struct adapter *);
227 static void     igb_setup_transmit_ring(struct tx_ring *);
228 static void     igb_initialize_transmit_units(struct adapter *);
229 static void     igb_free_transmit_structures(struct adapter *);
230 static void     igb_free_transmit_buffers(struct tx_ring *);
231
232 static int      igb_allocate_receive_buffers(struct rx_ring *);
233 static int      igb_setup_receive_structures(struct adapter *);
234 static int      igb_setup_receive_ring(struct rx_ring *);
235 static void     igb_initialize_receive_units(struct adapter *);
236 static void     igb_free_receive_structures(struct adapter *);
237 static void     igb_free_receive_buffers(struct rx_ring *);
238 static void     igb_free_receive_ring(struct rx_ring *);
239
240 static void     igb_enable_intr(struct adapter *);
241 static void     igb_disable_intr(struct adapter *);
242 static void     igb_update_stats_counters(struct adapter *);
243 static bool     igb_txeof(struct tx_ring *);
244
245 static __inline void igb_rx_discard(struct rx_ring *, int);
246 static __inline void igb_rx_input(struct rx_ring *,
247                     struct ifnet *, struct mbuf *, u32);
248
249 static bool     igb_rxeof(struct igb_queue *, int, int *);
250 static void     igb_rx_checksum(u32, struct mbuf *, u32);
251 static int      igb_tx_ctx_setup(struct tx_ring *,
252                     struct mbuf *, u32 *, u32 *);
253 static int      igb_tso_setup(struct tx_ring *,
254                     struct mbuf *, u32 *, u32 *);
255 static void     igb_set_promisc(struct adapter *);
256 static void     igb_disable_promisc(struct adapter *);
257 static void     igb_set_multi(struct adapter *);
258 static void     igb_update_link_status(struct adapter *);
259 static void     igb_refresh_mbufs(struct rx_ring *, int);
260
261 static void     igb_register_vlan(void *, struct ifnet *, u16);
262 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
263 static void     igb_setup_vlan_hw_support(struct adapter *);
264
265 static int      igb_xmit(struct tx_ring *, struct mbuf **);
266 static int      igb_dma_malloc(struct adapter *, bus_size_t,
267                     struct igb_dma_alloc *, int);
268 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
269 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
270 static void     igb_print_nvm_info(struct adapter *);
271 static int      igb_is_valid_ether_addr(u8 *);
272 static void     igb_add_hw_stats(struct adapter *);
273
274 static void     igb_vf_init_stats(struct adapter *);
275 static void     igb_update_vf_stats_counters(struct adapter *);
276
277 /* Management and WOL Support */
278 static void     igb_init_manageability(struct adapter *);
279 static void     igb_release_manageability(struct adapter *);
280 static void     igb_get_hw_control(struct adapter *);
281 static void     igb_release_hw_control(struct adapter *);
282 static void     igb_enable_wakeup(device_t);
283 static void     igb_led_func(void *, int);
284
285 static int      igb_irq_fast(void *);
286 static void     igb_msix_que(void *);
287 static void     igb_msix_link(void *);
288 static void     igb_handle_que(void *context, int pending);
289 static void     igb_handle_link(void *context, int pending);
290 static void     igb_handle_link_locked(struct adapter *);
291
292 static void     igb_set_sysctl_value(struct adapter *, const char *,
293                     const char *, int *, int);
294 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
295 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
296 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
297
298 #ifdef DEVICE_POLLING
299 static poll_handler_t igb_poll;
300 #endif /* POLLING */
301
302 /*********************************************************************
303  *  FreeBSD Device Interface Entry Points
304  *********************************************************************/
305
306 static device_method_t igb_methods[] = {
307         /* Device interface */
308         DEVMETHOD(device_probe, igb_probe),
309         DEVMETHOD(device_attach, igb_attach),
310         DEVMETHOD(device_detach, igb_detach),
311         DEVMETHOD(device_shutdown, igb_shutdown),
312         DEVMETHOD(device_suspend, igb_suspend),
313         DEVMETHOD(device_resume, igb_resume),
314         DEVMETHOD_END
315 };
316
317 static driver_t igb_driver = {
318         "igb", igb_methods, sizeof(struct adapter),
319 };
320
321 static devclass_t igb_devclass;
322 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
323 MODULE_DEPEND(igb, pci, 1, 1, 1);
324 MODULE_DEPEND(igb, ether, 1, 1, 1);
325
326 /*********************************************************************
327  *  Tunable default values.
328  *********************************************************************/
329
330 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
331
332 /* Descriptor defaults */
333 static int igb_rxd = IGB_DEFAULT_RXD;
334 static int igb_txd = IGB_DEFAULT_TXD;
335 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
336     "Number of receive descriptors per queue");
337 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
338     "Number of transmit descriptors per queue");
339
340 /*
341 ** AIM: Adaptive Interrupt Moderation
342 ** which means that the interrupt rate
343 ** is varied over time based on the
344 ** traffic for that interrupt vector
345 */
346 static int igb_enable_aim = TRUE;
347 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
348     "Enable adaptive interrupt moderation");
349
350 /*
351  * MSIX should be the default for best performance,
352  * but this allows it to be forced off for testing.
353  */         
354 static int igb_enable_msix = 1;
355 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
356     "Enable MSI-X interrupts");
357
358 /*
359 ** Tuneable Interrupt rate
360 */
361 static int igb_max_interrupt_rate = 8000;
362 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
363     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
364
365 #ifndef IGB_LEGACY_TX
366 /*
367 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
368 */
369 static int igb_buf_ring_size = IGB_BR_SIZE;
370 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
371     &igb_buf_ring_size, 0, "Size of the bufring");
372 #endif
373
374 /*
375 ** Header split causes the packet header to
376 ** be dma'd to a seperate mbuf from the payload.
377 ** this can have memory alignment benefits. But
378 ** another plus is that small packets often fit
379 ** into the header and thus use no cluster. Its
380 ** a very workload dependent type feature.
381 */
382 static int igb_header_split = FALSE;
383 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
384     "Enable receive mbuf header split");
385
386 /*
387 ** This will autoconfigure based on the
388 ** number of CPUs and max supported
389 ** MSIX messages if left at 0.
390 */
391 static int igb_num_queues = 0;
392 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
393     "Number of queues to configure, 0 indicates autoconfigure");
394
395 /*
396 ** Global variable to store last used CPU when binding queues
397 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
398 ** queue is bound to a cpu.
399 */
400 static int igb_last_bind_cpu = -1;
401
402 /* How many packets rxeof tries to clean at a time */
403 static int igb_rx_process_limit = 100;
404 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
405     &igb_rx_process_limit, 0,
406     "Maximum number of received packets to process at a time, -1 means unlimited");
407
408 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
409 #include <dev/netmap/if_igb_netmap.h>
410 #endif /* DEV_NETMAP */
411 /*********************************************************************
412  *  Device identification routine
413  *
414  *  igb_probe determines if the driver should be loaded on
415  *  adapter based on PCI vendor/device id of the adapter.
416  *
417  *  return BUS_PROBE_DEFAULT on success, positive on failure
418  *********************************************************************/
419
420 static int
421 igb_probe(device_t dev)
422 {
423         char            adapter_name[60];
424         uint16_t        pci_vendor_id = 0;
425         uint16_t        pci_device_id = 0;
426         uint16_t        pci_subvendor_id = 0;
427         uint16_t        pci_subdevice_id = 0;
428         igb_vendor_info_t *ent;
429
430         INIT_DEBUGOUT("igb_probe: begin");
431
432         pci_vendor_id = pci_get_vendor(dev);
433         if (pci_vendor_id != IGB_VENDOR_ID)
434                 return (ENXIO);
435
436         pci_device_id = pci_get_device(dev);
437         pci_subvendor_id = pci_get_subvendor(dev);
438         pci_subdevice_id = pci_get_subdevice(dev);
439
440         ent = igb_vendor_info_array;
441         while (ent->vendor_id != 0) {
442                 if ((pci_vendor_id == ent->vendor_id) &&
443                     (pci_device_id == ent->device_id) &&
444
445                     ((pci_subvendor_id == ent->subvendor_id) ||
446                     (ent->subvendor_id == PCI_ANY_ID)) &&
447
448                     ((pci_subdevice_id == ent->subdevice_id) ||
449                     (ent->subdevice_id == PCI_ANY_ID))) {
450                         sprintf(adapter_name, "%s %s",
451                                 igb_strings[ent->index],
452                                 igb_driver_version);
453                         device_set_desc_copy(dev, adapter_name);
454                         return (BUS_PROBE_DEFAULT);
455                 }
456                 ent++;
457         }
458
459         return (ENXIO);
460 }
461
462 /*********************************************************************
463  *  Device initialization routine
464  *
465  *  The attach entry point is called when the driver is being loaded.
466  *  This routine identifies the type of hardware, allocates all resources
467  *  and initializes the hardware.
468  *
469  *  return 0 on success, positive on failure
470  *********************************************************************/
471
472 static int
473 igb_attach(device_t dev)
474 {
475         struct adapter  *adapter;
476         int             error = 0;
477         u16             eeprom_data;
478
479         INIT_DEBUGOUT("igb_attach: begin");
480
481         if (resource_disabled("igb", device_get_unit(dev))) {
482                 device_printf(dev, "Disabled by device hint\n");
483                 return (ENXIO);
484         }
485
486         adapter = device_get_softc(dev);
487         adapter->dev = adapter->osdep.dev = dev;
488         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
489
490         /* SYSCTL stuff */
491         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494             igb_sysctl_nvm_info, "I", "NVM Information");
495
496         igb_set_sysctl_value(adapter, "enable_aim",
497             "Interrupt Moderation", &adapter->enable_aim,
498             igb_enable_aim);
499
500         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
501             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
502             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
503             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
504
505         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
506
507         /* Determine hardware and mac info */
508         igb_identify_hardware(adapter);
509
510         /* Setup PCI resources */
511         if (igb_allocate_pci_resources(adapter)) {
512                 device_printf(dev, "Allocation of PCI resources failed\n");
513                 error = ENXIO;
514                 goto err_pci;
515         }
516
517         /* Do Shared Code initialization */
518         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
519                 device_printf(dev, "Setup of Shared code failed\n");
520                 error = ENXIO;
521                 goto err_pci;
522         }
523
524         e1000_get_bus_info(&adapter->hw);
525
526         /* Sysctl for limiting the amount of work done in the taskqueue */
527         igb_set_sysctl_value(adapter, "rx_processing_limit",
528             "max number of rx packets to process",
529             &adapter->rx_process_limit, igb_rx_process_limit);
530
531         /*
532          * Validate number of transmit and receive descriptors. It
533          * must not exceed hardware maximum, and must be multiple
534          * of E1000_DBA_ALIGN.
535          */
536         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
537             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
538                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
539                     IGB_DEFAULT_TXD, igb_txd);
540                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
541         } else
542                 adapter->num_tx_desc = igb_txd;
543         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
544             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
545                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
546                     IGB_DEFAULT_RXD, igb_rxd);
547                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
548         } else
549                 adapter->num_rx_desc = igb_rxd;
550
551         adapter->hw.mac.autoneg = DO_AUTO_NEG;
552         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
553         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
554
555         /* Copper options */
556         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
557                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
558                 adapter->hw.phy.disable_polarity_correction = FALSE;
559                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
560         }
561
562         /*
563          * Set the frame limits assuming
564          * standard ethernet sized frames.
565          */
566         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
567
568         /*
569         ** Allocate and Setup Queues
570         */
571         if (igb_allocate_queues(adapter)) {
572                 error = ENOMEM;
573                 goto err_pci;
574         }
575
576         /* Allocate the appropriate stats memory */
577         if (adapter->vf_ifp) {
578                 adapter->stats =
579                     (struct e1000_vf_stats *)malloc(sizeof \
580                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
581                 igb_vf_init_stats(adapter);
582         } else
583                 adapter->stats =
584                     (struct e1000_hw_stats *)malloc(sizeof \
585                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
586         if (adapter->stats == NULL) {
587                 device_printf(dev, "Can not allocate stats memory\n");
588                 error = ENOMEM;
589                 goto err_late;
590         }
591
592         /* Allocate multicast array memory. */
593         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
594             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
595         if (adapter->mta == NULL) {
596                 device_printf(dev, "Can not allocate multicast setup array\n");
597                 error = ENOMEM;
598                 goto err_late;
599         }
600
601         /* Some adapter-specific advanced features */
602         if (adapter->hw.mac.type >= e1000_i350) {
603                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
604                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
605                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
606                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
607                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
608                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
609                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
610                     adapter, 0, igb_sysctl_eee, "I",
611                     "Disable Energy Efficient Ethernet");
612                 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
613                         if (adapter->hw.mac.type == e1000_i354)
614                                 e1000_set_eee_i354(&adapter->hw);
615                         else
616                                 e1000_set_eee_i350(&adapter->hw);
617                 }
618         }
619
620         /*
621         ** Start from a known state, this is
622         ** important in reading the nvm and
623         ** mac from that.
624         */
625         e1000_reset_hw(&adapter->hw);
626
627         /* Make sure we have a good EEPROM before we read from it */
628         if (((adapter->hw.mac.type != e1000_i210) &&
629             (adapter->hw.mac.type != e1000_i211)) &&
630             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
631                 /*
632                 ** Some PCI-E parts fail the first check due to
633                 ** the link being in sleep state, call it again,
634                 ** if it fails a second time its a real issue.
635                 */
636                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
637                         device_printf(dev,
638                             "The EEPROM Checksum Is Not Valid\n");
639                         error = EIO;
640                         goto err_late;
641                 }
642         }
643
644         /*
645         ** Copy the permanent MAC address out of the EEPROM
646         */
647         if (e1000_read_mac_addr(&adapter->hw) < 0) {
648                 device_printf(dev, "EEPROM read error while reading MAC"
649                     " address\n");
650                 error = EIO;
651                 goto err_late;
652         }
653         /* Check its sanity */
654         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
655                 device_printf(dev, "Invalid MAC address\n");
656                 error = EIO;
657                 goto err_late;
658         }
659
660         /* Setup OS specific network interface */
661         if (igb_setup_interface(dev, adapter) != 0)
662                 goto err_late;
663
664         /* Now get a good starting state */
665         igb_reset(adapter);
666
667         /* Initialize statistics */
668         igb_update_stats_counters(adapter);
669
670         adapter->hw.mac.get_link_status = 1;
671         igb_update_link_status(adapter);
672
673         /* Indicate SOL/IDER usage */
674         if (e1000_check_reset_block(&adapter->hw))
675                 device_printf(dev,
676                     "PHY reset is blocked due to SOL/IDER session.\n");
677
678         /* Determine if we have to control management hardware */
679         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
680
681         /*
682          * Setup Wake-on-Lan
683          */
684         /* APME bit in EEPROM is mapped to WUC.APME */
685         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
686         if (eeprom_data)
687                 adapter->wol = E1000_WUFC_MAG;
688
689         /* Register for VLAN events */
690         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
691              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
692         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
693              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
694
695         igb_add_hw_stats(adapter);
696
697         /* Tell the stack that the interface is not active */
698         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
699         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
700
701         adapter->led_dev = led_create(igb_led_func, adapter,
702             device_get_nameunit(dev));
703
704         /* 
705         ** Configure Interrupts
706         */
707         if ((adapter->msix > 1) && (igb_enable_msix))
708                 error = igb_allocate_msix(adapter);
709         else /* MSI or Legacy */
710                 error = igb_allocate_legacy(adapter);
711         if (error)
712                 goto err_late;
713
714 #ifdef DEV_NETMAP
715         igb_netmap_attach(adapter);
716 #endif /* DEV_NETMAP */
717         INIT_DEBUGOUT("igb_attach: end");
718
719         return (0);
720
721 err_late:
722         igb_detach(dev);
723         igb_free_transmit_structures(adapter);
724         igb_free_receive_structures(adapter);
725         igb_release_hw_control(adapter);
726 err_pci:
727         igb_free_pci_resources(adapter);
728         if (adapter->ifp != NULL)
729                 if_free(adapter->ifp);
730         free(adapter->mta, M_DEVBUF);
731         IGB_CORE_LOCK_DESTROY(adapter);
732
733         return (error);
734 }
735
736 /*********************************************************************
737  *  Device removal routine
738  *
739  *  The detach entry point is called when the driver is being removed.
740  *  This routine stops the adapter and deallocates all the resources
741  *  that were allocated for driver operation.
742  *
743  *  return 0 on success, positive on failure
744  *********************************************************************/
745
746 static int
747 igb_detach(device_t dev)
748 {
749         struct adapter  *adapter = device_get_softc(dev);
750         struct ifnet    *ifp = adapter->ifp;
751
752         INIT_DEBUGOUT("igb_detach: begin");
753
754         /* Make sure VLANS are not using driver */
755         if (adapter->ifp->if_vlantrunk != NULL) {
756                 device_printf(dev,"Vlan in use, detach first\n");
757                 return (EBUSY);
758         }
759
760         ether_ifdetach(adapter->ifp);
761
762         if (adapter->led_dev != NULL)
763                 led_destroy(adapter->led_dev);
764
765 #ifdef DEVICE_POLLING
766         if (ifp->if_capenable & IFCAP_POLLING)
767                 ether_poll_deregister(ifp);
768 #endif
769
770         IGB_CORE_LOCK(adapter);
771         adapter->in_detach = 1;
772         igb_stop(adapter);
773         IGB_CORE_UNLOCK(adapter);
774
775         e1000_phy_hw_reset(&adapter->hw);
776
777         /* Give control back to firmware */
778         igb_release_manageability(adapter);
779         igb_release_hw_control(adapter);
780
781         if (adapter->wol) {
782                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
783                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
784                 igb_enable_wakeup(dev);
785         }
786
787         /* Unregister VLAN events */
788         if (adapter->vlan_attach != NULL)
789                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
790         if (adapter->vlan_detach != NULL)
791                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
792
793         callout_drain(&adapter->timer);
794
795 #ifdef DEV_NETMAP
796         netmap_detach(adapter->ifp);
797 #endif /* DEV_NETMAP */
798         igb_free_pci_resources(adapter);
799         bus_generic_detach(dev);
800         if_free(ifp);
801
802         igb_free_transmit_structures(adapter);
803         igb_free_receive_structures(adapter);
804         if (adapter->mta != NULL)
805                 free(adapter->mta, M_DEVBUF);
806
807         IGB_CORE_LOCK_DESTROY(adapter);
808
809         return (0);
810 }
811
812 /*********************************************************************
813  *
814  *  Shutdown entry point
815  *
816  **********************************************************************/
817
818 static int
819 igb_shutdown(device_t dev)
820 {
821         return igb_suspend(dev);
822 }
823
824 /*
825  * Suspend/resume device methods.
826  */
827 static int
828 igb_suspend(device_t dev)
829 {
830         struct adapter *adapter = device_get_softc(dev);
831
832         IGB_CORE_LOCK(adapter);
833
834         igb_stop(adapter);
835
836         igb_release_manageability(adapter);
837         igb_release_hw_control(adapter);
838
839         if (adapter->wol) {
840                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
841                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
842                 igb_enable_wakeup(dev);
843         }
844
845         IGB_CORE_UNLOCK(adapter);
846
847         return bus_generic_suspend(dev);
848 }
849
850 static int
851 igb_resume(device_t dev)
852 {
853         struct adapter *adapter = device_get_softc(dev);
854         struct tx_ring  *txr = adapter->tx_rings;
855         struct ifnet *ifp = adapter->ifp;
856
857         IGB_CORE_LOCK(adapter);
858         igb_init_locked(adapter);
859         igb_init_manageability(adapter);
860
861         if ((ifp->if_flags & IFF_UP) &&
862             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
863                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
864                         IGB_TX_LOCK(txr);
865 #ifndef IGB_LEGACY_TX
866                         /* Process the stack queue only if not depleted */
867                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
868                             !drbr_empty(ifp, txr->br))
869                                 igb_mq_start_locked(ifp, txr);
870 #else
871                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
872                                 igb_start_locked(txr, ifp);
873 #endif
874                         IGB_TX_UNLOCK(txr);
875                 }
876         }
877         IGB_CORE_UNLOCK(adapter);
878
879         return bus_generic_resume(dev);
880 }
881
882
883 #ifdef IGB_LEGACY_TX
884
885 /*********************************************************************
886  *  Transmit entry point
887  *
888  *  igb_start is called by the stack to initiate a transmit.
889  *  The driver will remain in this routine as long as there are
890  *  packets to transmit and transmit resources are available.
891  *  In case resources are not available stack is notified and
892  *  the packet is requeued.
893  **********************************************************************/
894
895 static void
896 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
897 {
898         struct adapter  *adapter = ifp->if_softc;
899         struct mbuf     *m_head;
900
901         IGB_TX_LOCK_ASSERT(txr);
902
903         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
904             IFF_DRV_RUNNING)
905                 return;
906         if (!adapter->link_active)
907                 return;
908
909         /* Call cleanup if number of TX descriptors low */
910         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
911                 igb_txeof(txr);
912
913         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
914                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
915                         txr->queue_status |= IGB_QUEUE_DEPLETED;
916                         break;
917                 }
918                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
919                 if (m_head == NULL)
920                         break;
921                 /*
922                  *  Encapsulation can modify our pointer, and or make it
923                  *  NULL on failure.  In that event, we can't requeue.
924                  */
925                 if (igb_xmit(txr, &m_head)) {
926                         if (m_head != NULL)
927                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
928                         if (txr->tx_avail <= IGB_MAX_SCATTER)
929                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
930                         break;
931                 }
932
933                 /* Send a copy of the frame to the BPF listener */
934                 ETHER_BPF_MTAP(ifp, m_head);
935
936                 /* Set watchdog on */
937                 txr->watchdog_time = ticks;
938                 txr->queue_status |= IGB_QUEUE_WORKING;
939         }
940 }
941  
942 /*
943  * Legacy TX driver routine, called from the
944  * stack, always uses tx[0], and spins for it.
945  * Should not be used with multiqueue tx
946  */
947 static void
948 igb_start(struct ifnet *ifp)
949 {
950         struct adapter  *adapter = ifp->if_softc;
951         struct tx_ring  *txr = adapter->tx_rings;
952
953         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
954                 IGB_TX_LOCK(txr);
955                 igb_start_locked(txr, ifp);
956                 IGB_TX_UNLOCK(txr);
957         }
958         return;
959 }
960
961 #else /* ~IGB_LEGACY_TX */
962
963 /*
964 ** Multiqueue Transmit Entry:
965 **  quick turnaround to the stack
966 **
967 */
968 static int
969 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
970 {
971         struct adapter          *adapter = ifp->if_softc;
972         struct igb_queue        *que;
973         struct tx_ring          *txr;
974         int                     i, err = 0;
975 #ifdef  RSS
976         uint32_t                bucket_id;
977 #endif
978
979         /* Which queue to use */
980         /*
981          * When doing RSS, map it to the same outbound queue
982          * as the incoming flow would be mapped to.
983          *
984          * If everything is setup correctly, it should be the
985          * same bucket that the current CPU we're on is.
986          */
987         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
988 #ifdef  RSS
989                 if (rss_hash2bucket(m->m_pkthdr.flowid,
990                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
991                         /* XXX TODO: spit out something if bucket_id > num_queues? */
992                         i = bucket_id % adapter->num_queues;
993                 } else {
994 #endif
995                         i = m->m_pkthdr.flowid % adapter->num_queues;
996 #ifdef  RSS
997                 }
998 #endif
999         } else {
1000                 i = curcpu % adapter->num_queues;
1001         }
1002         txr = &adapter->tx_rings[i];
1003         que = &adapter->queues[i];
1004
1005         err = drbr_enqueue(ifp, txr->br, m);
1006         if (err)
1007                 return (err);
1008         if (IGB_TX_TRYLOCK(txr)) {
1009                 igb_mq_start_locked(ifp, txr);
1010                 IGB_TX_UNLOCK(txr);
1011         } else
1012                 taskqueue_enqueue(que->tq, &txr->txq_task);
1013
1014         return (0);
1015 }
1016
1017 static int
1018 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1019 {
1020         struct adapter  *adapter = txr->adapter;
1021         struct mbuf     *next;
1022         int             err = 0, enq = 0;
1023
1024         IGB_TX_LOCK_ASSERT(txr);
1025
1026         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
1027             adapter->link_active == 0)
1028                 return (ENETDOWN);
1029
1030
1031         /* Process the queue */
1032         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1033                 if ((err = igb_xmit(txr, &next)) != 0) {
1034                         if (next == NULL) {
1035                                 /* It was freed, move forward */
1036                                 drbr_advance(ifp, txr->br);
1037                         } else {
1038                                 /* 
1039                                  * Still have one left, it may not be
1040                                  * the same since the transmit function
1041                                  * may have changed it.
1042                                  */
1043                                 drbr_putback(ifp, txr->br, next);
1044                         }
1045                         break;
1046                 }
1047                 drbr_advance(ifp, txr->br);
1048                 enq++;
1049                 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1050                 if (next->m_flags & M_MCAST)
1051                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1052                 ETHER_BPF_MTAP(ifp, next);
1053                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1054                         break;
1055         }
1056         if (enq > 0) {
1057                 /* Set the watchdog */
1058                 txr->queue_status |= IGB_QUEUE_WORKING;
1059                 txr->watchdog_time = ticks;
1060         }
1061         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1062                 igb_txeof(txr);
1063         if (txr->tx_avail <= IGB_MAX_SCATTER)
1064                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1065         return (err);
1066 }
1067
1068 /*
1069  * Called from a taskqueue to drain queued transmit packets.
1070  */
1071 static void
1072 igb_deferred_mq_start(void *arg, int pending)
1073 {
1074         struct tx_ring *txr = arg;
1075         struct adapter *adapter = txr->adapter;
1076         struct ifnet *ifp = adapter->ifp;
1077
1078         IGB_TX_LOCK(txr);
1079         if (!drbr_empty(ifp, txr->br))
1080                 igb_mq_start_locked(ifp, txr);
1081         IGB_TX_UNLOCK(txr);
1082 }
1083
1084 /*
1085 ** Flush all ring buffers
1086 */
1087 static void
1088 igb_qflush(struct ifnet *ifp)
1089 {
1090         struct adapter  *adapter = ifp->if_softc;
1091         struct tx_ring  *txr = adapter->tx_rings;
1092         struct mbuf     *m;
1093
1094         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1095                 IGB_TX_LOCK(txr);
1096                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1097                         m_freem(m);
1098                 IGB_TX_UNLOCK(txr);
1099         }
1100         if_qflush(ifp);
1101 }
1102 #endif /* ~IGB_LEGACY_TX */
1103
1104 /*********************************************************************
1105  *  Ioctl entry point
1106  *
1107  *  igb_ioctl is called when the user wants to configure the
1108  *  interface.
1109  *
1110  *  return 0 on success, positive on failure
1111  **********************************************************************/
1112
1113 static int
1114 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1115 {
1116         struct adapter  *adapter = ifp->if_softc;
1117         struct ifreq    *ifr = (struct ifreq *)data;
1118 #if defined(INET) || defined(INET6)
1119         struct ifaddr   *ifa = (struct ifaddr *)data;
1120 #endif
1121         bool            avoid_reset = FALSE;
1122         int             error = 0;
1123
1124         if (adapter->in_detach)
1125                 return (error);
1126
1127         switch (command) {
1128         case SIOCSIFADDR:
1129 #ifdef INET
1130                 if (ifa->ifa_addr->sa_family == AF_INET)
1131                         avoid_reset = TRUE;
1132 #endif
1133 #ifdef INET6
1134                 if (ifa->ifa_addr->sa_family == AF_INET6)
1135                         avoid_reset = TRUE;
1136 #endif
1137                 /*
1138                 ** Calling init results in link renegotiation,
1139                 ** so we avoid doing it when possible.
1140                 */
1141                 if (avoid_reset) {
1142                         ifp->if_flags |= IFF_UP;
1143                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1144                                 igb_init(adapter);
1145 #ifdef INET
1146                         if (!(ifp->if_flags & IFF_NOARP))
1147                                 arp_ifinit(ifp, ifa);
1148 #endif
1149                 } else
1150                         error = ether_ioctl(ifp, command, data);
1151                 break;
1152         case SIOCSIFMTU:
1153             {
1154                 int max_frame_size;
1155
1156                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1157
1158                 IGB_CORE_LOCK(adapter);
1159                 max_frame_size = 9234;
1160                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1161                     ETHER_CRC_LEN) {
1162                         IGB_CORE_UNLOCK(adapter);
1163                         error = EINVAL;
1164                         break;
1165                 }
1166
1167                 ifp->if_mtu = ifr->ifr_mtu;
1168                 adapter->max_frame_size =
1169                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1170                 igb_init_locked(adapter);
1171                 IGB_CORE_UNLOCK(adapter);
1172                 break;
1173             }
1174         case SIOCSIFFLAGS:
1175                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1176                     SIOCSIFFLAGS (Set Interface Flags)");
1177                 IGB_CORE_LOCK(adapter);
1178                 if (ifp->if_flags & IFF_UP) {
1179                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1180                                 if ((ifp->if_flags ^ adapter->if_flags) &
1181                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1182                                         igb_disable_promisc(adapter);
1183                                         igb_set_promisc(adapter);
1184                                 }
1185                         } else
1186                                 igb_init_locked(adapter);
1187                 } else
1188                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1189                                 igb_stop(adapter);
1190                 adapter->if_flags = ifp->if_flags;
1191                 IGB_CORE_UNLOCK(adapter);
1192                 break;
1193         case SIOCADDMULTI:
1194         case SIOCDELMULTI:
1195                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1196                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1197                         IGB_CORE_LOCK(adapter);
1198                         igb_disable_intr(adapter);
1199                         igb_set_multi(adapter);
1200 #ifdef DEVICE_POLLING
1201                         if (!(ifp->if_capenable & IFCAP_POLLING))
1202 #endif
1203                                 igb_enable_intr(adapter);
1204                         IGB_CORE_UNLOCK(adapter);
1205                 }
1206                 break;
1207         case SIOCSIFMEDIA:
1208                 /* Check SOL/IDER usage */
1209                 IGB_CORE_LOCK(adapter);
1210                 if (e1000_check_reset_block(&adapter->hw)) {
1211                         IGB_CORE_UNLOCK(adapter);
1212                         device_printf(adapter->dev, "Media change is"
1213                             " blocked due to SOL/IDER session.\n");
1214                         break;
1215                 }
1216                 IGB_CORE_UNLOCK(adapter);
1217         case SIOCGIFMEDIA:
1218                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1219                     SIOCxIFMEDIA (Get/Set Interface Media)");
1220                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1221                 break;
1222         case SIOCSIFCAP:
1223             {
1224                 int mask, reinit;
1225
1226                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1227                 reinit = 0;
1228                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1229 #ifdef DEVICE_POLLING
1230                 if (mask & IFCAP_POLLING) {
1231                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1232                                 error = ether_poll_register(igb_poll, ifp);
1233                                 if (error)
1234                                         return (error);
1235                                 IGB_CORE_LOCK(adapter);
1236                                 igb_disable_intr(adapter);
1237                                 ifp->if_capenable |= IFCAP_POLLING;
1238                                 IGB_CORE_UNLOCK(adapter);
1239                         } else {
1240                                 error = ether_poll_deregister(ifp);
1241                                 /* Enable interrupt even in error case */
1242                                 IGB_CORE_LOCK(adapter);
1243                                 igb_enable_intr(adapter);
1244                                 ifp->if_capenable &= ~IFCAP_POLLING;
1245                                 IGB_CORE_UNLOCK(adapter);
1246                         }
1247                 }
1248 #endif
1249                 if (mask & IFCAP_HWCSUM) {
1250                         ifp->if_capenable ^= IFCAP_HWCSUM;
1251                         reinit = 1;
1252                 }
1253                 if (mask & IFCAP_TSO4) {
1254                         ifp->if_capenable ^= IFCAP_TSO4;
1255                         reinit = 1;
1256                 }
1257                 if (mask & IFCAP_TSO6) {
1258                         ifp->if_capenable ^= IFCAP_TSO6;
1259                         reinit = 1;
1260                 }
1261                 if (mask & IFCAP_VLAN_HWTAGGING) {
1262                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1263                         reinit = 1;
1264                 }
1265                 if (mask & IFCAP_VLAN_HWFILTER) {
1266                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1267                         reinit = 1;
1268                 }
1269                 if (mask & IFCAP_VLAN_HWTSO) {
1270                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1271                         reinit = 1;
1272                 }
1273                 if (mask & IFCAP_LRO) {
1274                         ifp->if_capenable ^= IFCAP_LRO;
1275                         reinit = 1;
1276                 }
1277                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1278                         igb_init(adapter);
1279                 VLAN_CAPABILITIES(ifp);
1280                 break;
1281             }
1282
1283         default:
1284                 error = ether_ioctl(ifp, command, data);
1285                 break;
1286         }
1287
1288         return (error);
1289 }
1290
1291
1292 /*********************************************************************
1293  *  Init entry point
1294  *
1295  *  This routine is used in two ways. It is used by the stack as
1296  *  init entry point in network interface structure. It is also used
1297  *  by the driver as a hw/sw initialization routine to get to a
1298  *  consistent state.
1299  *
1300  *  return 0 on success, positive on failure
1301  **********************************************************************/
1302
1303 static void
1304 igb_init_locked(struct adapter *adapter)
1305 {
1306         struct ifnet    *ifp = adapter->ifp;
1307         device_t        dev = adapter->dev;
1308
1309         INIT_DEBUGOUT("igb_init: begin");
1310
1311         IGB_CORE_LOCK_ASSERT(adapter);
1312
1313         igb_disable_intr(adapter);
1314         callout_stop(&adapter->timer);
1315
1316         /* Get the latest mac address, User can use a LAA */
1317         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1318               ETHER_ADDR_LEN);
1319
1320         /* Put the address into the Receive Address Array */
1321         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1322
1323         igb_reset(adapter);
1324         igb_update_link_status(adapter);
1325
1326         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1327
1328         /* Set hardware offload abilities */
1329         ifp->if_hwassist = 0;
1330         if (ifp->if_capenable & IFCAP_TXCSUM) {
1331                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1332 #if __FreeBSD_version >= 800000
1333                 if (adapter->hw.mac.type == e1000_82576)
1334                         ifp->if_hwassist |= CSUM_SCTP;
1335 #endif
1336         }
1337
1338         if (ifp->if_capenable & IFCAP_TSO)
1339                 ifp->if_hwassist |= CSUM_TSO;
1340
1341         /* Configure for OS presence */
1342         igb_init_manageability(adapter);
1343
1344         /* Prepare transmit descriptors and buffers */
1345         igb_setup_transmit_structures(adapter);
1346         igb_initialize_transmit_units(adapter);
1347
1348         /* Setup Multicast table */
1349         igb_set_multi(adapter);
1350
1351         /*
1352         ** Figure out the desired mbuf pool
1353         ** for doing jumbo/packetsplit
1354         */
1355         if (adapter->max_frame_size <= 2048)
1356                 adapter->rx_mbuf_sz = MCLBYTES;
1357         else if (adapter->max_frame_size <= 4096)
1358                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1359         else
1360                 adapter->rx_mbuf_sz = MJUM9BYTES;
1361
1362         /* Prepare receive descriptors and buffers */
1363         if (igb_setup_receive_structures(adapter)) {
1364                 device_printf(dev, "Could not setup receive structures\n");
1365                 return;
1366         }
1367         igb_initialize_receive_units(adapter);
1368
1369         /* Enable VLAN support */
1370         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1371                 igb_setup_vlan_hw_support(adapter);
1372                                 
1373         /* Don't lose promiscuous settings */
1374         igb_set_promisc(adapter);
1375
1376         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1377         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1378
1379         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1380         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1381
1382         if (adapter->msix > 1) /* Set up queue routing */
1383                 igb_configure_queues(adapter);
1384
1385         /* this clears any pending interrupts */
1386         E1000_READ_REG(&adapter->hw, E1000_ICR);
1387 #ifdef DEVICE_POLLING
1388         /*
1389          * Only enable interrupts if we are not polling, make sure
1390          * they are off otherwise.
1391          */
1392         if (ifp->if_capenable & IFCAP_POLLING)
1393                 igb_disable_intr(adapter);
1394         else
1395 #endif /* DEVICE_POLLING */
1396         {
1397                 igb_enable_intr(adapter);
1398                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1399         }
1400
1401         /* Set Energy Efficient Ethernet */
1402         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1403                 if (adapter->hw.mac.type == e1000_i354)
1404                         e1000_set_eee_i354(&adapter->hw);
1405                 else
1406                         e1000_set_eee_i350(&adapter->hw);
1407         }
1408 }
1409
1410 static void
1411 igb_init(void *arg)
1412 {
1413         struct adapter *adapter = arg;
1414
1415         IGB_CORE_LOCK(adapter);
1416         igb_init_locked(adapter);
1417         IGB_CORE_UNLOCK(adapter);
1418 }
1419
1420
1421 static void
1422 igb_handle_que(void *context, int pending)
1423 {
1424         struct igb_queue *que = context;
1425         struct adapter *adapter = que->adapter;
1426         struct tx_ring *txr = que->txr;
1427         struct ifnet    *ifp = adapter->ifp;
1428
1429         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1430                 bool    more;
1431
1432                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1433
1434                 IGB_TX_LOCK(txr);
1435                 igb_txeof(txr);
1436 #ifndef IGB_LEGACY_TX
1437                 /* Process the stack queue only if not depleted */
1438                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1439                     !drbr_empty(ifp, txr->br))
1440                         igb_mq_start_locked(ifp, txr);
1441 #else
1442                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1443                         igb_start_locked(txr, ifp);
1444 #endif
1445                 IGB_TX_UNLOCK(txr);
1446                 /* Do we need another? */
1447                 if (more) {
1448                         taskqueue_enqueue(que->tq, &que->que_task);
1449                         return;
1450                 }
1451         }
1452
1453 #ifdef DEVICE_POLLING
1454         if (ifp->if_capenable & IFCAP_POLLING)
1455                 return;
1456 #endif
1457         /* Reenable this interrupt */
1458         if (que->eims)
1459                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1460         else
1461                 igb_enable_intr(adapter);
1462 }
1463
1464 /* Deal with link in a sleepable context */
1465 static void
1466 igb_handle_link(void *context, int pending)
1467 {
1468         struct adapter *adapter = context;
1469
1470         IGB_CORE_LOCK(adapter);
1471         igb_handle_link_locked(adapter);
1472         IGB_CORE_UNLOCK(adapter);
1473 }
1474
1475 static void
1476 igb_handle_link_locked(struct adapter *adapter)
1477 {
1478         struct tx_ring  *txr = adapter->tx_rings;
1479         struct ifnet *ifp = adapter->ifp;
1480
1481         IGB_CORE_LOCK_ASSERT(adapter);
1482         adapter->hw.mac.get_link_status = 1;
1483         igb_update_link_status(adapter);
1484         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1485                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1486                         IGB_TX_LOCK(txr);
1487 #ifndef IGB_LEGACY_TX
1488                         /* Process the stack queue only if not depleted */
1489                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1490                             !drbr_empty(ifp, txr->br))
1491                                 igb_mq_start_locked(ifp, txr);
1492 #else
1493                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1494                                 igb_start_locked(txr, ifp);
1495 #endif
1496                         IGB_TX_UNLOCK(txr);
1497                 }
1498         }
1499 }
1500
1501 /*********************************************************************
1502  *
1503  *  MSI/Legacy Deferred
1504  *  Interrupt Service routine  
1505  *
1506  *********************************************************************/
1507 static int
1508 igb_irq_fast(void *arg)
1509 {
1510         struct adapter          *adapter = arg;
1511         struct igb_queue        *que = adapter->queues;
1512         u32                     reg_icr;
1513
1514
1515         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1516
1517         /* Hot eject?  */
1518         if (reg_icr == 0xffffffff)
1519                 return FILTER_STRAY;
1520
1521         /* Definitely not our interrupt.  */
1522         if (reg_icr == 0x0)
1523                 return FILTER_STRAY;
1524
1525         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1526                 return FILTER_STRAY;
1527
1528         /*
1529          * Mask interrupts until the taskqueue is finished running.  This is
1530          * cheap, just assume that it is needed.  This also works around the
1531          * MSI message reordering errata on certain systems.
1532          */
1533         igb_disable_intr(adapter);
1534         taskqueue_enqueue(que->tq, &que->que_task);
1535
1536         /* Link status change */
1537         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1538                 taskqueue_enqueue(que->tq, &adapter->link_task);
1539
1540         if (reg_icr & E1000_ICR_RXO)
1541                 adapter->rx_overruns++;
1542         return FILTER_HANDLED;
1543 }
1544
1545 #ifdef DEVICE_POLLING
1546 #if __FreeBSD_version >= 800000
1547 #define POLL_RETURN_COUNT(a) (a)
1548 static int
1549 #else
1550 #define POLL_RETURN_COUNT(a)
1551 static void
1552 #endif
1553 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1554 {
1555         struct adapter          *adapter = ifp->if_softc;
1556         struct igb_queue        *que;
1557         struct tx_ring          *txr;
1558         u32                     reg_icr, rx_done = 0;
1559         u32                     loop = IGB_MAX_LOOP;
1560         bool                    more;
1561
1562         IGB_CORE_LOCK(adapter);
1563         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1564                 IGB_CORE_UNLOCK(adapter);
1565                 return POLL_RETURN_COUNT(rx_done);
1566         }
1567
1568         if (cmd == POLL_AND_CHECK_STATUS) {
1569                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1570                 /* Link status change */
1571                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1572                         igb_handle_link_locked(adapter);
1573
1574                 if (reg_icr & E1000_ICR_RXO)
1575                         adapter->rx_overruns++;
1576         }
1577         IGB_CORE_UNLOCK(adapter);
1578
1579         for (int i = 0; i < adapter->num_queues; i++) {
1580                 que = &adapter->queues[i];
1581                 txr = que->txr;
1582
1583                 igb_rxeof(que, count, &rx_done);
1584
1585                 IGB_TX_LOCK(txr);
1586                 do {
1587                         more = igb_txeof(txr);
1588                 } while (loop-- && more);
1589 #ifndef IGB_LEGACY_TX
1590                 if (!drbr_empty(ifp, txr->br))
1591                         igb_mq_start_locked(ifp, txr);
1592 #else
1593                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1594                         igb_start_locked(txr, ifp);
1595 #endif
1596                 IGB_TX_UNLOCK(txr);
1597         }
1598
1599         return POLL_RETURN_COUNT(rx_done);
1600 }
1601 #endif /* DEVICE_POLLING */
1602
1603 /*********************************************************************
1604  *
1605  *  MSIX Que Interrupt Service routine
1606  *
1607  **********************************************************************/
1608 static void
1609 igb_msix_que(void *arg)
1610 {
1611         struct igb_queue *que = arg;
1612         struct adapter *adapter = que->adapter;
1613         struct ifnet   *ifp = adapter->ifp;
1614         struct tx_ring *txr = que->txr;
1615         struct rx_ring *rxr = que->rxr;
1616         u32             newitr = 0;
1617         bool            more_rx;
1618
1619         /* Ignore spurious interrupts */
1620         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1621                 return;
1622
1623         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1624         ++que->irqs;
1625
1626         IGB_TX_LOCK(txr);
1627         igb_txeof(txr);
1628 #ifndef IGB_LEGACY_TX
1629         /* Process the stack queue only if not depleted */
1630         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1631             !drbr_empty(ifp, txr->br))
1632                 igb_mq_start_locked(ifp, txr);
1633 #else
1634         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1635                 igb_start_locked(txr, ifp);
1636 #endif
1637         IGB_TX_UNLOCK(txr);
1638
1639         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1640
1641         if (adapter->enable_aim == FALSE)
1642                 goto no_calc;
1643         /*
1644         ** Do Adaptive Interrupt Moderation:
1645         **  - Write out last calculated setting
1646         **  - Calculate based on average size over
1647         **    the last interval.
1648         */
1649         if (que->eitr_setting)
1650                 E1000_WRITE_REG(&adapter->hw,
1651                     E1000_EITR(que->msix), que->eitr_setting);
1652  
1653         que->eitr_setting = 0;
1654
1655         /* Idle, do nothing */
1656         if ((txr->bytes == 0) && (rxr->bytes == 0))
1657                 goto no_calc;
1658                                 
1659         /* Used half Default if sub-gig */
1660         if (adapter->link_speed != 1000)
1661                 newitr = IGB_DEFAULT_ITR / 2;
1662         else {
1663                 if ((txr->bytes) && (txr->packets))
1664                         newitr = txr->bytes/txr->packets;
1665                 if ((rxr->bytes) && (rxr->packets))
1666                         newitr = max(newitr,
1667                             (rxr->bytes / rxr->packets));
1668                 newitr += 24; /* account for hardware frame, crc */
1669                 /* set an upper boundary */
1670                 newitr = min(newitr, 3000);
1671                 /* Be nice to the mid range */
1672                 if ((newitr > 300) && (newitr < 1200))
1673                         newitr = (newitr / 3);
1674                 else
1675                         newitr = (newitr / 2);
1676         }
1677         newitr &= 0x7FFC;  /* Mask invalid bits */
1678         if (adapter->hw.mac.type == e1000_82575)
1679                 newitr |= newitr << 16;
1680         else
1681                 newitr |= E1000_EITR_CNT_IGNR;
1682                  
1683         /* save for next interrupt */
1684         que->eitr_setting = newitr;
1685
1686         /* Reset state */
1687         txr->bytes = 0;
1688         txr->packets = 0;
1689         rxr->bytes = 0;
1690         rxr->packets = 0;
1691
1692 no_calc:
1693         /* Schedule a clean task if needed*/
1694         if (more_rx)
1695                 taskqueue_enqueue(que->tq, &que->que_task);
1696         else
1697                 /* Reenable this interrupt */
1698                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1699         return;
1700 }
1701
1702
1703 /*********************************************************************
1704  *
1705  *  MSIX Link Interrupt Service routine
1706  *
1707  **********************************************************************/
1708
1709 static void
1710 igb_msix_link(void *arg)
1711 {
1712         struct adapter  *adapter = arg;
1713         u32             icr;
1714
1715         ++adapter->link_irq;
1716         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1717         if (!(icr & E1000_ICR_LSC))
1718                 goto spurious;
1719         igb_handle_link(adapter, 0);
1720
1721 spurious:
1722         /* Rearm */
1723         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1724         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1725         return;
1726 }
1727
1728
1729 /*********************************************************************
1730  *
1731  *  Media Ioctl callback
1732  *
1733  *  This routine is called whenever the user queries the status of
1734  *  the interface using ifconfig.
1735  *
1736  **********************************************************************/
1737 static void
1738 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1739 {
1740         struct adapter *adapter = ifp->if_softc;
1741
1742         INIT_DEBUGOUT("igb_media_status: begin");
1743
1744         IGB_CORE_LOCK(adapter);
1745         igb_update_link_status(adapter);
1746
1747         ifmr->ifm_status = IFM_AVALID;
1748         ifmr->ifm_active = IFM_ETHER;
1749
1750         if (!adapter->link_active) {
1751                 IGB_CORE_UNLOCK(adapter);
1752                 return;
1753         }
1754
1755         ifmr->ifm_status |= IFM_ACTIVE;
1756
1757         switch (adapter->link_speed) {
1758         case 10:
1759                 ifmr->ifm_active |= IFM_10_T;
1760                 break;
1761         case 100:
1762                 /*
1763                 ** Support for 100Mb SFP - these are Fiber 
1764                 ** but the media type appears as serdes
1765                 */
1766                 if (adapter->hw.phy.media_type ==
1767                     e1000_media_type_internal_serdes)
1768                         ifmr->ifm_active |= IFM_100_FX;
1769                 else
1770                         ifmr->ifm_active |= IFM_100_TX;
1771                 break;
1772         case 1000:
1773                 ifmr->ifm_active |= IFM_1000_T;
1774                 break;
1775         case 2500:
1776                 ifmr->ifm_active |= IFM_2500_SX;
1777                 break;
1778         }
1779
1780         if (adapter->link_duplex == FULL_DUPLEX)
1781                 ifmr->ifm_active |= IFM_FDX;
1782         else
1783                 ifmr->ifm_active |= IFM_HDX;
1784
1785         IGB_CORE_UNLOCK(adapter);
1786 }
1787
1788 /*********************************************************************
1789  *
1790  *  Media Ioctl callback
1791  *
1792  *  This routine is called when the user changes speed/duplex using
1793  *  media/mediopt option with ifconfig.
1794  *
1795  **********************************************************************/
1796 static int
1797 igb_media_change(struct ifnet *ifp)
1798 {
1799         struct adapter *adapter = ifp->if_softc;
1800         struct ifmedia  *ifm = &adapter->media;
1801
1802         INIT_DEBUGOUT("igb_media_change: begin");
1803
1804         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1805                 return (EINVAL);
1806
1807         IGB_CORE_LOCK(adapter);
1808         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1809         case IFM_AUTO:
1810                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1811                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1812                 break;
1813         case IFM_1000_LX:
1814         case IFM_1000_SX:
1815         case IFM_1000_T:
1816                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1817                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1818                 break;
1819         case IFM_100_TX:
1820                 adapter->hw.mac.autoneg = FALSE;
1821                 adapter->hw.phy.autoneg_advertised = 0;
1822                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1823                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1824                 else
1825                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1826                 break;
1827         case IFM_10_T:
1828                 adapter->hw.mac.autoneg = FALSE;
1829                 adapter->hw.phy.autoneg_advertised = 0;
1830                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1831                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1832                 else
1833                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1834                 break;
1835         default:
1836                 device_printf(adapter->dev, "Unsupported media type\n");
1837         }
1838
1839         igb_init_locked(adapter);
1840         IGB_CORE_UNLOCK(adapter);
1841
1842         return (0);
1843 }
1844
1845
1846 /*********************************************************************
1847  *
1848  *  This routine maps the mbufs to Advanced TX descriptors.
1849  *  
1850  **********************************************************************/
1851 static int
1852 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1853 {
1854         struct adapter  *adapter = txr->adapter;
1855         u32             olinfo_status = 0, cmd_type_len;
1856         int             i, j, error, nsegs;
1857         int             first;
1858         bool            remap = TRUE;
1859         struct mbuf     *m_head;
1860         bus_dma_segment_t segs[IGB_MAX_SCATTER];
1861         bus_dmamap_t    map;
1862         struct igb_tx_buf *txbuf;
1863         union e1000_adv_tx_desc *txd = NULL;
1864
1865         m_head = *m_headp;
1866
1867         /* Basic descriptor defines */
1868         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1869             E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1870
1871         if (m_head->m_flags & M_VLANTAG)
1872                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1873
1874         /*
1875          * Important to capture the first descriptor
1876          * used because it will contain the index of
1877          * the one we tell the hardware to report back
1878          */
1879         first = txr->next_avail_desc;
1880         txbuf = &txr->tx_buffers[first];
1881         map = txbuf->map;
1882
1883         /*
1884          * Map the packet for DMA.
1885          */
1886 retry:
1887         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1888             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1889
1890         if (__predict_false(error)) {
1891                 struct mbuf *m;
1892
1893                 switch (error) {
1894                 case EFBIG:
1895                         /* Try it again? - one try */
1896                         if (remap == TRUE) {
1897                                 remap = FALSE;
1898                                 m = m_defrag(*m_headp, M_NOWAIT);
1899                                 if (m == NULL) {
1900                                         adapter->mbuf_defrag_failed++;
1901                                         m_freem(*m_headp);
1902                                         *m_headp = NULL;
1903                                         return (ENOBUFS);
1904                                 }
1905                                 *m_headp = m;
1906                                 goto retry;
1907                         } else
1908                                 return (error);
1909                 case ENOMEM:
1910                         txr->no_tx_dma_setup++;
1911                         return (error);
1912                 default:
1913                         txr->no_tx_dma_setup++;
1914                         m_freem(*m_headp);
1915                         *m_headp = NULL;
1916                         return (error);
1917                 }
1918         }
1919
1920         /* Make certain there are enough descriptors */
1921         if (nsegs > txr->tx_avail - 2) {
1922                 txr->no_desc_avail++;
1923                 bus_dmamap_unload(txr->txtag, map);
1924                 return (ENOBUFS);
1925         }
1926         m_head = *m_headp;
1927
1928         /*
1929         ** Set up the appropriate offload context
1930         ** this will consume the first descriptor
1931         */
1932         error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1933         if (__predict_false(error)) {
1934                 m_freem(*m_headp);
1935                 *m_headp = NULL;
1936                 return (error);
1937         }
1938
1939         /* 82575 needs the queue index added */
1940         if (adapter->hw.mac.type == e1000_82575)
1941                 olinfo_status |= txr->me << 4;
1942
1943         i = txr->next_avail_desc;
1944         for (j = 0; j < nsegs; j++) {
1945                 bus_size_t seglen;
1946                 bus_addr_t segaddr;
1947
1948                 txbuf = &txr->tx_buffers[i];
1949                 txd = &txr->tx_base[i];
1950                 seglen = segs[j].ds_len;
1951                 segaddr = htole64(segs[j].ds_addr);
1952
1953                 txd->read.buffer_addr = segaddr;
1954                 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1955                     cmd_type_len | seglen);
1956                 txd->read.olinfo_status = htole32(olinfo_status);
1957
1958                 if (++i == txr->num_desc)
1959                         i = 0;
1960         }
1961
1962         txd->read.cmd_type_len |=
1963             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1964         txr->tx_avail -= nsegs;
1965         txr->next_avail_desc = i;
1966
1967         txbuf->m_head = m_head;
1968         /*
1969         ** Here we swap the map so the last descriptor,
1970         ** which gets the completion interrupt has the
1971         ** real map, and the first descriptor gets the
1972         ** unused map from this descriptor.
1973         */
1974         txr->tx_buffers[first].map = txbuf->map;
1975         txbuf->map = map;
1976         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1977
1978         /* Set the EOP descriptor that will be marked done */
1979         txbuf = &txr->tx_buffers[first];
1980         txbuf->eop = txd;
1981
1982         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1983             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1984         /*
1985          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1986          * hardware that this frame is available to transmit.
1987          */
1988         ++txr->total_packets;
1989         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1990
1991         return (0);
1992 }
1993 static void
1994 igb_set_promisc(struct adapter *adapter)
1995 {
1996         struct ifnet    *ifp = adapter->ifp;
1997         struct e1000_hw *hw = &adapter->hw;
1998         u32             reg;
1999
2000         if (adapter->vf_ifp) {
2001                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2002                 return;
2003         }
2004
2005         reg = E1000_READ_REG(hw, E1000_RCTL);
2006         if (ifp->if_flags & IFF_PROMISC) {
2007                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2008                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2009         } else if (ifp->if_flags & IFF_ALLMULTI) {
2010                 reg |= E1000_RCTL_MPE;
2011                 reg &= ~E1000_RCTL_UPE;
2012                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2013         }
2014 }
2015
2016 static void
2017 igb_disable_promisc(struct adapter *adapter)
2018 {
2019         struct e1000_hw *hw = &adapter->hw;
2020         struct ifnet    *ifp = adapter->ifp;
2021         u32             reg;
2022         int             mcnt = 0;
2023
2024         if (adapter->vf_ifp) {
2025                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2026                 return;
2027         }
2028         reg = E1000_READ_REG(hw, E1000_RCTL);
2029         reg &=  (~E1000_RCTL_UPE);
2030         if (ifp->if_flags & IFF_ALLMULTI)
2031                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2032         else {
2033                 struct  ifmultiaddr *ifma;
2034 #if __FreeBSD_version < 800000
2035                 IF_ADDR_LOCK(ifp);
2036 #else   
2037                 if_maddr_rlock(ifp);
2038 #endif
2039                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2040                         if (ifma->ifma_addr->sa_family != AF_LINK)
2041                                 continue;
2042                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2043                                 break;
2044                         mcnt++;
2045                 }
2046 #if __FreeBSD_version < 800000
2047                 IF_ADDR_UNLOCK(ifp);
2048 #else
2049                 if_maddr_runlock(ifp);
2050 #endif
2051         }
2052         /* Don't disable if in MAX groups */
2053         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2054                 reg &=  (~E1000_RCTL_MPE);
2055         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2056 }
2057
2058
2059 /*********************************************************************
2060  *  Multicast Update
2061  *
2062  *  This routine is called whenever multicast address list is updated.
2063  *
2064  **********************************************************************/
2065
2066 static void
2067 igb_set_multi(struct adapter *adapter)
2068 {
2069         struct ifnet    *ifp = adapter->ifp;
2070         struct ifmultiaddr *ifma;
2071         u32 reg_rctl = 0;
2072         u8  *mta;
2073
2074         int mcnt = 0;
2075
2076         IOCTL_DEBUGOUT("igb_set_multi: begin");
2077
2078         mta = adapter->mta;
2079         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2080             MAX_NUM_MULTICAST_ADDRESSES);
2081
2082 #if __FreeBSD_version < 800000
2083         IF_ADDR_LOCK(ifp);
2084 #else
2085         if_maddr_rlock(ifp);
2086 #endif
2087         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2088                 if (ifma->ifma_addr->sa_family != AF_LINK)
2089                         continue;
2090
2091                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2092                         break;
2093
2094                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2095                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2096                 mcnt++;
2097         }
2098 #if __FreeBSD_version < 800000
2099         IF_ADDR_UNLOCK(ifp);
2100 #else
2101         if_maddr_runlock(ifp);
2102 #endif
2103
2104         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2105                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2106                 reg_rctl |= E1000_RCTL_MPE;
2107                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2108         } else
2109                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2110 }
2111
2112
2113 /*********************************************************************
2114  *  Timer routine:
2115  *      This routine checks for link status,
2116  *      updates statistics, and does the watchdog.
2117  *
2118  **********************************************************************/
2119
2120 static void
2121 igb_local_timer(void *arg)
2122 {
2123         struct adapter          *adapter = arg;
2124         device_t                dev = adapter->dev;
2125         struct ifnet            *ifp = adapter->ifp;
2126         struct tx_ring          *txr = adapter->tx_rings;
2127         struct igb_queue        *que = adapter->queues;
2128         int                     hung = 0, busy = 0;
2129
2130
2131         IGB_CORE_LOCK_ASSERT(adapter);
2132
2133         igb_update_link_status(adapter);
2134         igb_update_stats_counters(adapter);
2135
2136         /*
2137         ** Check the TX queues status
2138         **      - central locked handling of OACTIVE
2139         **      - watchdog only if all queues show hung
2140         */
2141         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2142                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2143                     (adapter->pause_frames == 0))
2144                         ++hung;
2145                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2146                         ++busy;
2147                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2148                         taskqueue_enqueue(que->tq, &que->que_task);
2149         }
2150         if (hung == adapter->num_queues)
2151                 goto timeout;
2152         if (busy == adapter->num_queues)
2153                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2154         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2155             (busy < adapter->num_queues))
2156                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2157
2158         adapter->pause_frames = 0;
2159         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2160 #ifndef DEVICE_POLLING
2161         /* Schedule all queue interrupts - deadlock protection */
2162         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2163 #endif
2164         return;
2165
2166 timeout:
2167         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2168         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2169             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2170             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2171         device_printf(dev,"TX(%d) desc avail = %d,"
2172             "Next TX to Clean = %d\n",
2173             txr->me, txr->tx_avail, txr->next_to_clean);
2174         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2175         adapter->watchdog_events++;
2176         igb_init_locked(adapter);
2177 }
2178
2179 static void
2180 igb_update_link_status(struct adapter *adapter)
2181 {
2182         struct e1000_hw         *hw = &adapter->hw;
2183         struct e1000_fc_info    *fc = &hw->fc;
2184         struct ifnet            *ifp = adapter->ifp;
2185         device_t                dev = adapter->dev;
2186         struct tx_ring          *txr = adapter->tx_rings;
2187         u32                     link_check, thstat, ctrl;
2188         char                    *flowctl = NULL;
2189
2190         link_check = thstat = ctrl = 0;
2191
2192         /* Get the cached link value or read for real */
2193         switch (hw->phy.media_type) {
2194         case e1000_media_type_copper:
2195                 if (hw->mac.get_link_status) {
2196                         /* Do the work to read phy */
2197                         e1000_check_for_link(hw);
2198                         link_check = !hw->mac.get_link_status;
2199                 } else
2200                         link_check = TRUE;
2201                 break;
2202         case e1000_media_type_fiber:
2203                 e1000_check_for_link(hw);
2204                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2205                                  E1000_STATUS_LU);
2206                 break;
2207         case e1000_media_type_internal_serdes:
2208                 e1000_check_for_link(hw);
2209                 link_check = adapter->hw.mac.serdes_has_link;
2210                 break;
2211         /* VF device is type_unknown */
2212         case e1000_media_type_unknown:
2213                 e1000_check_for_link(hw);
2214                 link_check = !hw->mac.get_link_status;
2215                 /* Fall thru */
2216         default:
2217                 break;
2218         }
2219
2220         /* Check for thermal downshift or shutdown */
2221         if (hw->mac.type == e1000_i350) {
2222                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2223                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2224         }
2225
2226         /* Get the flow control for display */
2227         switch (fc->current_mode) {
2228         case e1000_fc_rx_pause:
2229                 flowctl = "RX";
2230                 break;  
2231         case e1000_fc_tx_pause:
2232                 flowctl = "TX";
2233                 break;  
2234         case e1000_fc_full:
2235                 flowctl = "Full";
2236                 break;  
2237         case e1000_fc_none:
2238         default:
2239                 flowctl = "None";
2240                 break;  
2241         }
2242
2243         /* Now we check if a transition has happened */
2244         if (link_check && (adapter->link_active == 0)) {
2245                 e1000_get_speed_and_duplex(&adapter->hw, 
2246                     &adapter->link_speed, &adapter->link_duplex);
2247                 if (bootverbose)
2248                         device_printf(dev, "Link is up %d Mbps %s,"
2249                             " Flow Control: %s\n",
2250                             adapter->link_speed,
2251                             ((adapter->link_duplex == FULL_DUPLEX) ?
2252                             "Full Duplex" : "Half Duplex"), flowctl);
2253                 adapter->link_active = 1;
2254                 ifp->if_baudrate = adapter->link_speed * 1000000;
2255                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2256                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2257                         device_printf(dev, "Link: thermal downshift\n");
2258                 /* Delay Link Up for Phy update */
2259                 if (((hw->mac.type == e1000_i210) ||
2260                     (hw->mac.type == e1000_i211)) &&
2261                     (hw->phy.id == I210_I_PHY_ID))
2262                         msec_delay(I210_LINK_DELAY);
2263                 /* Reset if the media type changed. */
2264                 if (hw->dev_spec._82575.media_changed) {
2265                         hw->dev_spec._82575.media_changed = false;
2266                         adapter->flags |= IGB_MEDIA_RESET;
2267                         igb_reset(adapter);
2268                 }       
2269                 /* This can sleep */
2270                 if_link_state_change(ifp, LINK_STATE_UP);
2271         } else if (!link_check && (adapter->link_active == 1)) {
2272                 ifp->if_baudrate = adapter->link_speed = 0;
2273                 adapter->link_duplex = 0;
2274                 if (bootverbose)
2275                         device_printf(dev, "Link is Down\n");
2276                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2277                     (thstat & E1000_THSTAT_PWR_DOWN))
2278                         device_printf(dev, "Link: thermal shutdown\n");
2279                 adapter->link_active = 0;
2280                 /* This can sleep */
2281                 if_link_state_change(ifp, LINK_STATE_DOWN);
2282                 /* Reset queue state */
2283                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2284                         txr->queue_status = IGB_QUEUE_IDLE;
2285         }
2286 }
2287
2288 /*********************************************************************
2289  *
2290  *  This routine disables all traffic on the adapter by issuing a
2291  *  global reset on the MAC and deallocates TX/RX buffers.
2292  *
2293  **********************************************************************/
2294
2295 static void
2296 igb_stop(void *arg)
2297 {
2298         struct adapter  *adapter = arg;
2299         struct ifnet    *ifp = adapter->ifp;
2300         struct tx_ring *txr = adapter->tx_rings;
2301
2302         IGB_CORE_LOCK_ASSERT(adapter);
2303
2304         INIT_DEBUGOUT("igb_stop: begin");
2305
2306         igb_disable_intr(adapter);
2307
2308         callout_stop(&adapter->timer);
2309
2310         /* Tell the stack that the interface is no longer active */
2311         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2312         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2313
2314         /* Disarm watchdog timer. */
2315         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2316                 IGB_TX_LOCK(txr);
2317                 txr->queue_status = IGB_QUEUE_IDLE;
2318                 IGB_TX_UNLOCK(txr);
2319         }
2320
2321         e1000_reset_hw(&adapter->hw);
2322         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2323
2324         e1000_led_off(&adapter->hw);
2325         e1000_cleanup_led(&adapter->hw);
2326 }
2327
2328
2329 /*********************************************************************
2330  *
2331  *  Determine hardware revision.
2332  *
2333  **********************************************************************/
2334 static void
2335 igb_identify_hardware(struct adapter *adapter)
2336 {
2337         device_t dev = adapter->dev;
2338
2339         /* Make sure our PCI config space has the necessary stuff set */
2340         pci_enable_busmaster(dev);
2341         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2342
2343         /* Save off the information about this board */
2344         adapter->hw.vendor_id = pci_get_vendor(dev);
2345         adapter->hw.device_id = pci_get_device(dev);
2346         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2347         adapter->hw.subsystem_vendor_id =
2348             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2349         adapter->hw.subsystem_device_id =
2350             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2351
2352         /* Set MAC type early for PCI setup */
2353         e1000_set_mac_type(&adapter->hw);
2354
2355         /* Are we a VF device? */
2356         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2357             (adapter->hw.mac.type == e1000_vfadapt_i350))
2358                 adapter->vf_ifp = 1;
2359         else
2360                 adapter->vf_ifp = 0;
2361 }
2362
2363 static int
2364 igb_allocate_pci_resources(struct adapter *adapter)
2365 {
2366         device_t        dev = adapter->dev;
2367         int             rid;
2368
2369         rid = PCIR_BAR(0);
2370         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2371             &rid, RF_ACTIVE);
2372         if (adapter->pci_mem == NULL) {
2373                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2374                 return (ENXIO);
2375         }
2376         adapter->osdep.mem_bus_space_tag =
2377             rman_get_bustag(adapter->pci_mem);
2378         adapter->osdep.mem_bus_space_handle =
2379             rman_get_bushandle(adapter->pci_mem);
2380         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2381
2382         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2383
2384         /* This will setup either MSI/X or MSI */
2385         adapter->msix = igb_setup_msix(adapter);
2386         adapter->hw.back = &adapter->osdep;
2387
2388         return (0);
2389 }
2390
2391 /*********************************************************************
2392  *
2393  *  Setup the Legacy or MSI Interrupt handler
2394  *
2395  **********************************************************************/
2396 static int
2397 igb_allocate_legacy(struct adapter *adapter)
2398 {
2399         device_t                dev = adapter->dev;
2400         struct igb_queue        *que = adapter->queues;
2401 #ifndef IGB_LEGACY_TX
2402         struct tx_ring          *txr = adapter->tx_rings;
2403 #endif
2404         int                     error, rid = 0;
2405
2406         /* Turn off all interrupts */
2407         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2408
2409         /* MSI RID is 1 */
2410         if (adapter->msix == 1)
2411                 rid = 1;
2412
2413         /* We allocate a single interrupt resource */
2414         adapter->res = bus_alloc_resource_any(dev,
2415             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2416         if (adapter->res == NULL) {
2417                 device_printf(dev, "Unable to allocate bus resource: "
2418                     "interrupt\n");
2419                 return (ENXIO);
2420         }
2421
2422 #ifndef IGB_LEGACY_TX
2423         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2424 #endif
2425
2426         /*
2427          * Try allocating a fast interrupt and the associated deferred
2428          * processing contexts.
2429          */
2430         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2431         /* Make tasklet for deferred link handling */
2432         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2433         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2434             taskqueue_thread_enqueue, &que->tq);
2435         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2436             device_get_nameunit(adapter->dev));
2437         if ((error = bus_setup_intr(dev, adapter->res,
2438             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2439             adapter, &adapter->tag)) != 0) {
2440                 device_printf(dev, "Failed to register fast interrupt "
2441                             "handler: %d\n", error);
2442                 taskqueue_free(que->tq);
2443                 que->tq = NULL;
2444                 return (error);
2445         }
2446
2447         return (0);
2448 }
2449
2450
2451 /*********************************************************************
2452  *
2453  *  Setup the MSIX Queue Interrupt handlers: 
2454  *
2455  **********************************************************************/
2456 static int
2457 igb_allocate_msix(struct adapter *adapter)
2458 {
2459         device_t                dev = adapter->dev;
2460         struct igb_queue        *que = adapter->queues;
2461         int                     error, rid, vector = 0;
2462         int                     cpu_id = 0;
2463
2464         /* Be sure to start with all interrupts disabled */
2465         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2466         E1000_WRITE_FLUSH(&adapter->hw);
2467
2468 #ifdef  RSS
2469         /*
2470          * If we're doing RSS, the number of queues needs to
2471          * match the number of RSS buckets that are configured.
2472          *
2473          * + If there's more queues than RSS buckets, we'll end
2474          *   up with queues that get no traffic.
2475          *
2476          * + If there's more RSS buckets than queues, we'll end
2477          *   up having multiple RSS buckets map to the same queue,
2478          *   so there'll be some contention.
2479          */
2480         if (adapter->num_queues != rss_getnumbuckets()) {
2481                 device_printf(dev,
2482                     "%s: number of queues (%d) != number of RSS buckets (%d)"
2483                     "; performance will be impacted.\n",
2484                     __func__,
2485                     adapter->num_queues,
2486                     rss_getnumbuckets());
2487         }
2488 #endif
2489
2490         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2491                 rid = vector +1;
2492                 que->res = bus_alloc_resource_any(dev,
2493                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2494                 if (que->res == NULL) {
2495                         device_printf(dev,
2496                             "Unable to allocate bus resource: "
2497                             "MSIX Queue Interrupt\n");
2498                         return (ENXIO);
2499                 }
2500                 error = bus_setup_intr(dev, que->res,
2501                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2502                     igb_msix_que, que, &que->tag);
2503                 if (error) {
2504                         que->res = NULL;
2505                         device_printf(dev, "Failed to register Queue handler");
2506                         return (error);
2507                 }
2508 #if __FreeBSD_version >= 800504
2509                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2510 #endif
2511                 que->msix = vector;
2512                 if (adapter->hw.mac.type == e1000_82575)
2513                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2514                 else
2515                         que->eims = 1 << vector;
2516
2517 #ifdef  RSS
2518                 /*
2519                  * The queue ID is used as the RSS layer bucket ID.
2520                  * We look up the queue ID -> RSS CPU ID and select
2521                  * that.
2522                  */
2523                 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2524 #else
2525                 /*
2526                  * Bind the msix vector, and thus the
2527                  * rings to the corresponding cpu.
2528                  *
2529                  * This just happens to match the default RSS round-robin
2530                  * bucket -> queue -> CPU allocation.
2531                  */
2532                 if (adapter->num_queues > 1) {
2533                         if (igb_last_bind_cpu < 0)
2534                                 igb_last_bind_cpu = CPU_FIRST();
2535                         cpu_id = igb_last_bind_cpu;
2536                 }
2537 #endif
2538
2539                 if (adapter->num_queues > 1) {
2540                         bus_bind_intr(dev, que->res, cpu_id);
2541 #ifdef  RSS
2542                         device_printf(dev,
2543                                 "Bound queue %d to RSS bucket %d\n",
2544                                 i, cpu_id);
2545 #else
2546                         device_printf(dev,
2547                                 "Bound queue %d to cpu %d\n",
2548                                 i, cpu_id);
2549 #endif
2550                 }
2551
2552 #ifndef IGB_LEGACY_TX
2553                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2554                     que->txr);
2555 #endif
2556                 /* Make tasklet for deferred handling */
2557                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2558                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2559                     taskqueue_thread_enqueue, &que->tq);
2560                 if (adapter->num_queues > 1) {
2561                         /*
2562                          * Only pin the taskqueue thread to a CPU if
2563                          * RSS is in use.
2564                          *
2565                          * This again just happens to match the default RSS
2566                          * round-robin bucket -> queue -> CPU allocation.
2567                          */
2568 #ifdef  RSS
2569                         taskqueue_start_threads_pinned(&que->tq, 1, PI_NET,
2570                             cpu_id,
2571                             "%s que (bucket %d)",
2572                             device_get_nameunit(adapter->dev),
2573                             cpu_id);
2574 #else
2575                         taskqueue_start_threads(&que->tq, 1, PI_NET,
2576                             "%s que (qid %d)",
2577                             device_get_nameunit(adapter->dev),
2578                             cpu_id);
2579 #endif
2580                 } else {
2581                         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2582                             device_get_nameunit(adapter->dev));
2583                 }
2584
2585                 /* Finally update the last bound CPU id */
2586                 if (adapter->num_queues > 1)
2587                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2588         }
2589
2590         /* And Link */
2591         rid = vector + 1;
2592         adapter->res = bus_alloc_resource_any(dev,
2593             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2594         if (adapter->res == NULL) {
2595                 device_printf(dev,
2596                     "Unable to allocate bus resource: "
2597                     "MSIX Link Interrupt\n");
2598                 return (ENXIO);
2599         }
2600         if ((error = bus_setup_intr(dev, adapter->res,
2601             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2602             igb_msix_link, adapter, &adapter->tag)) != 0) {
2603                 device_printf(dev, "Failed to register Link handler");
2604                 return (error);
2605         }
2606 #if __FreeBSD_version >= 800504
2607         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2608 #endif
2609         adapter->linkvec = vector;
2610
2611         return (0);
2612 }
2613
2614
2615 static void
2616 igb_configure_queues(struct adapter *adapter)
2617 {
2618         struct  e1000_hw        *hw = &adapter->hw;
2619         struct  igb_queue       *que;
2620         u32                     tmp, ivar = 0, newitr = 0;
2621
2622         /* First turn on RSS capability */
2623         if (adapter->hw.mac.type != e1000_82575)
2624                 E1000_WRITE_REG(hw, E1000_GPIE,
2625                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2626                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2627
2628         /* Turn on MSIX */
2629         switch (adapter->hw.mac.type) {
2630         case e1000_82580:
2631         case e1000_i350:
2632         case e1000_i354:
2633         case e1000_i210:
2634         case e1000_i211:
2635         case e1000_vfadapt:
2636         case e1000_vfadapt_i350:
2637                 /* RX entries */
2638                 for (int i = 0; i < adapter->num_queues; i++) {
2639                         u32 index = i >> 1;
2640                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2641                         que = &adapter->queues[i];
2642                         if (i & 1) {
2643                                 ivar &= 0xFF00FFFF;
2644                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2645                         } else {
2646                                 ivar &= 0xFFFFFF00;
2647                                 ivar |= que->msix | E1000_IVAR_VALID;
2648                         }
2649                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2650                 }
2651                 /* TX entries */
2652                 for (int i = 0; i < adapter->num_queues; i++) {
2653                         u32 index = i >> 1;
2654                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2655                         que = &adapter->queues[i];
2656                         if (i & 1) {
2657                                 ivar &= 0x00FFFFFF;
2658                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2659                         } else {
2660                                 ivar &= 0xFFFF00FF;
2661                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2662                         }
2663                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2664                         adapter->que_mask |= que->eims;
2665                 }
2666
2667                 /* And for the link interrupt */
2668                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2669                 adapter->link_mask = 1 << adapter->linkvec;
2670                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2671                 break;
2672         case e1000_82576:
2673                 /* RX entries */
2674                 for (int i = 0; i < adapter->num_queues; i++) {
2675                         u32 index = i & 0x7; /* Each IVAR has two entries */
2676                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2677                         que = &adapter->queues[i];
2678                         if (i < 8) {
2679                                 ivar &= 0xFFFFFF00;
2680                                 ivar |= que->msix | E1000_IVAR_VALID;
2681                         } else {
2682                                 ivar &= 0xFF00FFFF;
2683                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2684                         }
2685                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2686                         adapter->que_mask |= que->eims;
2687                 }
2688                 /* TX entries */
2689                 for (int i = 0; i < adapter->num_queues; i++) {
2690                         u32 index = i & 0x7; /* Each IVAR has two entries */
2691                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2692                         que = &adapter->queues[i];
2693                         if (i < 8) {
2694                                 ivar &= 0xFFFF00FF;
2695                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2696                         } else {
2697                                 ivar &= 0x00FFFFFF;
2698                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2699                         }
2700                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2701                         adapter->que_mask |= que->eims;
2702                 }
2703
2704                 /* And for the link interrupt */
2705                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2706                 adapter->link_mask = 1 << adapter->linkvec;
2707                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2708                 break;
2709
2710         case e1000_82575:
2711                 /* enable MSI-X support*/
2712                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2713                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2714                 /* Auto-Mask interrupts upon ICR read. */
2715                 tmp |= E1000_CTRL_EXT_EIAME;
2716                 tmp |= E1000_CTRL_EXT_IRCA;
2717                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2718
2719                 /* Queues */
2720                 for (int i = 0; i < adapter->num_queues; i++) {
2721                         que = &adapter->queues[i];
2722                         tmp = E1000_EICR_RX_QUEUE0 << i;
2723                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2724                         que->eims = tmp;
2725                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2726                             i, que->eims);
2727                         adapter->que_mask |= que->eims;
2728                 }
2729
2730                 /* Link */
2731                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2732                     E1000_EIMS_OTHER);
2733                 adapter->link_mask |= E1000_EIMS_OTHER;
2734         default:
2735                 break;
2736         }
2737
2738         /* Set the starting interrupt rate */
2739         if (igb_max_interrupt_rate > 0)
2740                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2741
2742         if (hw->mac.type == e1000_82575)
2743                 newitr |= newitr << 16;
2744         else
2745                 newitr |= E1000_EITR_CNT_IGNR;
2746
2747         for (int i = 0; i < adapter->num_queues; i++) {
2748                 que = &adapter->queues[i];
2749                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2750         }
2751
2752         return;
2753 }
2754
2755
2756 static void
2757 igb_free_pci_resources(struct adapter *adapter)
2758 {
2759         struct          igb_queue *que = adapter->queues;
2760         device_t        dev = adapter->dev;
2761         int             rid;
2762
2763         /*
2764         ** There is a slight possibility of a failure mode
2765         ** in attach that will result in entering this function
2766         ** before interrupt resources have been initialized, and
2767         ** in that case we do not want to execute the loops below
2768         ** We can detect this reliably by the state of the adapter
2769         ** res pointer.
2770         */
2771         if (adapter->res == NULL)
2772                 goto mem;
2773
2774         /*
2775          * First release all the interrupt resources:
2776          */
2777         for (int i = 0; i < adapter->num_queues; i++, que++) {
2778                 rid = que->msix + 1;
2779                 if (que->tag != NULL) {
2780                         bus_teardown_intr(dev, que->res, que->tag);
2781                         que->tag = NULL;
2782                 }
2783                 if (que->res != NULL)
2784                         bus_release_resource(dev,
2785                             SYS_RES_IRQ, rid, que->res);
2786         }
2787
2788         /* Clean the Legacy or Link interrupt last */
2789         if (adapter->linkvec) /* we are doing MSIX */
2790                 rid = adapter->linkvec + 1;
2791         else
2792                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2793
2794         que = adapter->queues;
2795         if (adapter->tag != NULL) {
2796                 taskqueue_drain(que->tq, &adapter->link_task);
2797                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2798                 adapter->tag = NULL;
2799         }
2800         if (adapter->res != NULL)
2801                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2802
2803         for (int i = 0; i < adapter->num_queues; i++, que++) {
2804                 if (que->tq != NULL) {
2805 #ifndef IGB_LEGACY_TX
2806                         taskqueue_drain(que->tq, &que->txr->txq_task);
2807 #endif
2808                         taskqueue_drain(que->tq, &que->que_task);
2809                         taskqueue_free(que->tq);
2810                 }
2811         }
2812 mem:
2813         if (adapter->msix)
2814                 pci_release_msi(dev);
2815
2816         if (adapter->msix_mem != NULL)
2817                 bus_release_resource(dev, SYS_RES_MEMORY,
2818                     adapter->memrid, adapter->msix_mem);
2819
2820         if (adapter->pci_mem != NULL)
2821                 bus_release_resource(dev, SYS_RES_MEMORY,
2822                     PCIR_BAR(0), adapter->pci_mem);
2823
2824 }
2825
2826 /*
2827  * Setup Either MSI/X or MSI
2828  */
2829 static int
2830 igb_setup_msix(struct adapter *adapter)
2831 {
2832         device_t        dev = adapter->dev;
2833         int             bar, want, queues, msgs, maxqueues;
2834
2835         /* tuneable override */
2836         if (igb_enable_msix == 0)
2837                 goto msi;
2838
2839         /* First try MSI/X */
2840         msgs = pci_msix_count(dev); 
2841         if (msgs == 0)
2842                 goto msi;
2843         /*
2844         ** Some new devices, as with ixgbe, now may
2845         ** use a different BAR, so we need to keep
2846         ** track of which is used.
2847         */
2848         adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2849         bar = pci_read_config(dev, adapter->memrid, 4);
2850         if (bar == 0) /* use next bar */
2851                 adapter->memrid += 4;
2852         adapter->msix_mem = bus_alloc_resource_any(dev,
2853             SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2854         if (adapter->msix_mem == NULL) {
2855                 /* May not be enabled */
2856                 device_printf(adapter->dev,
2857                     "Unable to map MSIX table \n");
2858                 goto msi;
2859         }
2860
2861         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2862
2863         /* Override via tuneable */
2864         if (igb_num_queues != 0)
2865                 queues = igb_num_queues;
2866
2867 #ifdef  RSS
2868         /* If we're doing RSS, clamp at the number of RSS buckets */
2869         if (queues > rss_getnumbuckets())
2870                 queues = rss_getnumbuckets();
2871 #endif
2872
2873
2874         /* Sanity check based on HW */
2875         switch (adapter->hw.mac.type) {
2876                 case e1000_82575:
2877                         maxqueues = 4;
2878                         break;
2879                 case e1000_82576:
2880                 case e1000_82580:
2881                 case e1000_i350:
2882                 case e1000_i354:
2883                         maxqueues = 8;
2884                         break;
2885                 case e1000_i210:
2886                         maxqueues = 4;
2887                         break;
2888                 case e1000_i211:
2889                         maxqueues = 2;
2890                         break;
2891                 default:  /* VF interfaces */
2892                         maxqueues = 1;
2893                         break;
2894         }
2895
2896         /* Final clamp on the actual hardware capability */
2897         if (queues > maxqueues)
2898                 queues = maxqueues;
2899
2900         /*
2901         ** One vector (RX/TX pair) per queue
2902         ** plus an additional for Link interrupt
2903         */
2904         want = queues + 1;
2905         if (msgs >= want)
2906                 msgs = want;
2907         else {
2908                 device_printf(adapter->dev,
2909                     "MSIX Configuration Problem, "
2910                     "%d vectors configured, but %d queues wanted!\n",
2911                     msgs, want);
2912                 goto msi;
2913         }
2914         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2915                 device_printf(adapter->dev,
2916                     "Using MSIX interrupts with %d vectors\n", msgs);
2917                 adapter->num_queues = queues;
2918                 return (msgs);
2919         }
2920         /*
2921         ** If MSIX alloc failed or provided us with
2922         ** less than needed, free and fall through to MSI
2923         */
2924         pci_release_msi(dev);
2925
2926 msi:
2927         if (adapter->msix_mem != NULL) {
2928                 bus_release_resource(dev, SYS_RES_MEMORY,
2929                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2930                 adapter->msix_mem = NULL;
2931         }
2932         msgs = 1;
2933         if (pci_alloc_msi(dev, &msgs) == 0) {
2934                 device_printf(adapter->dev," Using an MSI interrupt\n");
2935                 return (msgs);
2936         }
2937         device_printf(adapter->dev," Using a Legacy interrupt\n");
2938         return (0);
2939 }
2940
2941 /*********************************************************************
2942  *
2943  *  Initialize the DMA Coalescing feature
2944  *
2945  **********************************************************************/
2946 static void
2947 igb_init_dmac(struct adapter *adapter, u32 pba)
2948 {
2949         device_t        dev = adapter->dev;
2950         struct e1000_hw *hw = &adapter->hw;
2951         u32             dmac, reg = ~E1000_DMACR_DMAC_EN;
2952         u16             hwm;
2953
2954         if (hw->mac.type == e1000_i211)
2955                 return;
2956
2957         if (hw->mac.type > e1000_82580) {
2958
2959                 if (adapter->dmac == 0) { /* Disabling it */
2960                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2961                         return;
2962                 } else
2963                         device_printf(dev, "DMA Coalescing enabled\n");
2964
2965                 /* Set starting threshold */
2966                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2967
2968                 hwm = 64 * pba - adapter->max_frame_size / 16;
2969                 if (hwm < 64 * (pba - 6))
2970                         hwm = 64 * (pba - 6);
2971                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2972                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2973                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2974                     & E1000_FCRTC_RTH_COAL_MASK);
2975                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2976
2977
2978                 dmac = pba - adapter->max_frame_size / 512;
2979                 if (dmac < pba - 10)
2980                         dmac = pba - 10;
2981                 reg = E1000_READ_REG(hw, E1000_DMACR);
2982                 reg &= ~E1000_DMACR_DMACTHR_MASK;
2983                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2984                     & E1000_DMACR_DMACTHR_MASK);
2985
2986                 /* transition to L0x or L1 if available..*/
2987                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2988
2989                 /* Check if status is 2.5Gb backplane connection
2990                 * before configuration of watchdog timer, which is
2991                 * in msec values in 12.8usec intervals
2992                 * watchdog timer= msec values in 32usec intervals
2993                 * for non 2.5Gb connection
2994                 */
2995                 if (hw->mac.type == e1000_i354) {
2996                         int status = E1000_READ_REG(hw, E1000_STATUS);
2997                         if ((status & E1000_STATUS_2P5_SKU) &&
2998                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2999                                 reg |= ((adapter->dmac * 5) >> 6);
3000                         else
3001                                 reg |= (adapter->dmac >> 5);
3002                 } else {
3003                         reg |= (adapter->dmac >> 5);
3004                 }
3005
3006                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
3007
3008 #ifdef I210_OBFF_SUPPORT
3009                 /*
3010                  * Set the OBFF Rx threshold to DMA Coalescing Rx
3011                  * threshold - 2KB and enable the feature in the
3012                  * hardware for I210.
3013                  */
3014                 if (hw->mac.type == e1000_i210) {
3015                         int obff = dmac - 2;
3016                         reg = E1000_READ_REG(hw, E1000_DOBFFCTL);
3017                         reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK;
3018                         reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK)
3019                             | E1000_DOBFFCTL_EXIT_ACT_MASK;
3020                         E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg);
3021                 }
3022 #endif
3023                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3024
3025                 /* Set the interval before transition */
3026                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
3027                 if (hw->mac.type == e1000_i350)
3028                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
3029                 /*
3030                 ** in 2.5Gb connection, TTLX unit is 0.4 usec
3031                 ** which is 0x4*2 = 0xA. But delay is still 4 usec
3032                 */
3033                 if (hw->mac.type == e1000_i354) {
3034                         int status = E1000_READ_REG(hw, E1000_STATUS);
3035                         if ((status & E1000_STATUS_2P5_SKU) &&
3036                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
3037                                 reg |= 0xA;
3038                         else
3039                                 reg |= 0x4;
3040                 } else {
3041                         reg |= 0x4;
3042                 }
3043
3044                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3045
3046                 /* free space in tx packet buffer to wake from DMA coal */
3047                 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
3048                     (2 * adapter->max_frame_size)) >> 6);
3049
3050                 /* make low power state decision controlled by DMA coal */
3051                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3052                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3053                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3054
3055         } else if (hw->mac.type == e1000_82580) {
3056                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3057                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3058                     reg & ~E1000_PCIEMISC_LX_DECISION);
3059                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3060         }
3061 }
3062
3063
3064 /*********************************************************************
3065  *
3066  *  Set up an fresh starting state
3067  *
3068  **********************************************************************/
3069 static void
3070 igb_reset(struct adapter *adapter)
3071 {
3072         device_t        dev = adapter->dev;
3073         struct e1000_hw *hw = &adapter->hw;
3074         struct e1000_fc_info *fc = &hw->fc;
3075         struct ifnet    *ifp = adapter->ifp;
3076         u32             pba = 0;
3077         u16             hwm;
3078
3079         INIT_DEBUGOUT("igb_reset: begin");
3080
3081         /* Let the firmware know the OS is in control */
3082         igb_get_hw_control(adapter);
3083
3084         /*
3085          * Packet Buffer Allocation (PBA)
3086          * Writing PBA sets the receive portion of the buffer
3087          * the remainder is used for the transmit buffer.
3088          */
3089         switch (hw->mac.type) {
3090         case e1000_82575:
3091                 pba = E1000_PBA_32K;
3092                 break;
3093         case e1000_82576:
3094         case e1000_vfadapt:
3095                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3096                 pba &= E1000_RXPBS_SIZE_MASK_82576;
3097                 break;
3098         case e1000_82580:
3099         case e1000_i350:
3100         case e1000_i354:
3101         case e1000_vfadapt_i350:
3102                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3103                 pba = e1000_rxpbs_adjust_82580(pba);
3104                 break;
3105         case e1000_i210:
3106         case e1000_i211:
3107                 pba = E1000_PBA_34K;
3108         default:
3109                 break;
3110         }
3111
3112         /* Special needs in case of Jumbo frames */
3113         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3114                 u32 tx_space, min_tx, min_rx;
3115                 pba = E1000_READ_REG(hw, E1000_PBA);
3116                 tx_space = pba >> 16;
3117                 pba &= 0xffff;
3118                 min_tx = (adapter->max_frame_size +
3119                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3120                 min_tx = roundup2(min_tx, 1024);
3121                 min_tx >>= 10;
3122                 min_rx = adapter->max_frame_size;
3123                 min_rx = roundup2(min_rx, 1024);
3124                 min_rx >>= 10;
3125                 if (tx_space < min_tx &&
3126                     ((min_tx - tx_space) < pba)) {
3127                         pba = pba - (min_tx - tx_space);
3128                         /*
3129                          * if short on rx space, rx wins
3130                          * and must trump tx adjustment
3131                          */
3132                         if (pba < min_rx)
3133                                 pba = min_rx;
3134                 }
3135                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3136         }
3137
3138         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3139
3140         /*
3141          * These parameters control the automatic generation (Tx) and
3142          * response (Rx) to Ethernet PAUSE frames.
3143          * - High water mark should allow for at least two frames to be
3144          *   received after sending an XOFF.
3145          * - Low water mark works best when it is very near the high water mark.
3146          *   This allows the receiver to restart by sending XON when it has
3147          *   drained a bit.
3148          */
3149         hwm = min(((pba << 10) * 9 / 10),
3150             ((pba << 10) - 2 * adapter->max_frame_size));
3151
3152         if (hw->mac.type < e1000_82576) {
3153                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3154                 fc->low_water = fc->high_water - 8;
3155         } else {
3156                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3157                 fc->low_water = fc->high_water - 16;
3158         }
3159
3160         fc->pause_time = IGB_FC_PAUSE_TIME;
3161         fc->send_xon = TRUE;
3162         if (adapter->fc)
3163                 fc->requested_mode = adapter->fc;
3164         else
3165                 fc->requested_mode = e1000_fc_default;
3166
3167         /* Issue a global reset */
3168         e1000_reset_hw(hw);
3169         E1000_WRITE_REG(hw, E1000_WUC, 0);
3170
3171         /* Reset for AutoMediaDetect */
3172         if (adapter->flags & IGB_MEDIA_RESET) {
3173                 e1000_setup_init_funcs(hw, TRUE);
3174                 e1000_get_bus_info(hw);
3175                 adapter->flags &= ~IGB_MEDIA_RESET;
3176         }
3177
3178         if (e1000_init_hw(hw) < 0)
3179                 device_printf(dev, "Hardware Initialization Failed\n");
3180
3181         /* Setup DMA Coalescing */
3182         igb_init_dmac(adapter, pba);
3183
3184         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3185         e1000_get_phy_info(hw);
3186         e1000_check_for_link(hw);
3187         return;
3188 }
3189
3190 /*********************************************************************
3191  *
3192  *  Setup networking device structure and register an interface.
3193  *
3194  **********************************************************************/
3195 static int
3196 igb_setup_interface(device_t dev, struct adapter *adapter)
3197 {
3198         struct ifnet   *ifp;
3199
3200         INIT_DEBUGOUT("igb_setup_interface: begin");
3201
3202         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3203         if (ifp == NULL) {
3204                 device_printf(dev, "can not allocate ifnet structure\n");
3205                 return (-1);
3206         }
3207         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3208         ifp->if_init =  igb_init;
3209         ifp->if_softc = adapter;
3210         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3211         ifp->if_ioctl = igb_ioctl;
3212         ifp->if_get_counter = igb_get_counter;
3213 #ifndef IGB_LEGACY_TX
3214         ifp->if_transmit = igb_mq_start;
3215         ifp->if_qflush = igb_qflush;
3216 #else
3217         ifp->if_start = igb_start;
3218         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3219         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3220         IFQ_SET_READY(&ifp->if_snd);
3221 #endif
3222
3223         ether_ifattach(ifp, adapter->hw.mac.addr);
3224
3225         ifp->if_capabilities = ifp->if_capenable = 0;
3226
3227         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3228         ifp->if_capabilities |= IFCAP_TSO;
3229         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3230         ifp->if_capenable = ifp->if_capabilities;
3231
3232         /* Don't enable LRO by default */
3233         ifp->if_capabilities |= IFCAP_LRO;
3234
3235 #ifdef DEVICE_POLLING
3236         ifp->if_capabilities |= IFCAP_POLLING;
3237 #endif
3238
3239         /*
3240          * Tell the upper layer(s) we
3241          * support full VLAN capability.
3242          */
3243         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3244         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3245                              |  IFCAP_VLAN_HWTSO
3246                              |  IFCAP_VLAN_MTU;
3247         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3248                           |  IFCAP_VLAN_HWTSO
3249                           |  IFCAP_VLAN_MTU;
3250
3251         /*
3252         ** Don't turn this on by default, if vlans are
3253         ** created on another pseudo device (eg. lagg)
3254         ** then vlan events are not passed thru, breaking
3255         ** operation, but with HW FILTER off it works. If
3256         ** using vlans directly on the igb driver you can
3257         ** enable this and get full hardware tag filtering.
3258         */
3259         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3260
3261         /*
3262          * Specify the media types supported by this adapter and register
3263          * callbacks to update media and link information
3264          */
3265         ifmedia_init(&adapter->media, IFM_IMASK,
3266             igb_media_change, igb_media_status);
3267         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3268             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3269                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3270                             0, NULL);
3271                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3272         } else {
3273                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3274                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3275                             0, NULL);
3276                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3277                             0, NULL);
3278                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3279                             0, NULL);
3280                 if (adapter->hw.phy.type != e1000_phy_ife) {
3281                         ifmedia_add(&adapter->media,
3282                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3283                         ifmedia_add(&adapter->media,
3284                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3285                 }
3286         }
3287         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3288         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3289         return (0);
3290 }
3291
3292
3293 /*
3294  * Manage DMA'able memory.
3295  */
3296 static void
3297 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3298 {
3299         if (error)
3300                 return;
3301         *(bus_addr_t *) arg = segs[0].ds_addr;
3302 }
3303
3304 static int
3305 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3306         struct igb_dma_alloc *dma, int mapflags)
3307 {
3308         int error;
3309
3310         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3311                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3312                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3313                                 BUS_SPACE_MAXADDR,      /* highaddr */
3314                                 NULL, NULL,             /* filter, filterarg */
3315                                 size,                   /* maxsize */
3316                                 1,                      /* nsegments */
3317                                 size,                   /* maxsegsize */
3318                                 0,                      /* flags */
3319                                 NULL,                   /* lockfunc */
3320                                 NULL,                   /* lockarg */
3321                                 &dma->dma_tag);
3322         if (error) {
3323                 device_printf(adapter->dev,
3324                     "%s: bus_dma_tag_create failed: %d\n",
3325                     __func__, error);
3326                 goto fail_0;
3327         }
3328
3329         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3330             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3331         if (error) {
3332                 device_printf(adapter->dev,
3333                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3334                     __func__, (uintmax_t)size, error);
3335                 goto fail_2;
3336         }
3337
3338         dma->dma_paddr = 0;
3339         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3340             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3341         if (error || dma->dma_paddr == 0) {
3342                 device_printf(adapter->dev,
3343                     "%s: bus_dmamap_load failed: %d\n",
3344                     __func__, error);
3345                 goto fail_3;
3346         }
3347
3348         return (0);
3349
3350 fail_3:
3351         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3352 fail_2:
3353         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3354         bus_dma_tag_destroy(dma->dma_tag);
3355 fail_0:
3356         dma->dma_tag = NULL;
3357
3358         return (error);
3359 }
3360
3361 static void
3362 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3363 {
3364         if (dma->dma_tag == NULL)
3365                 return;
3366         if (dma->dma_paddr != 0) {
3367                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3368                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3369                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3370                 dma->dma_paddr = 0;
3371         }
3372         if (dma->dma_vaddr != NULL) {
3373                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3374                 dma->dma_vaddr = NULL;
3375         }
3376         bus_dma_tag_destroy(dma->dma_tag);
3377         dma->dma_tag = NULL;
3378 }
3379
3380
3381 /*********************************************************************
3382  *
3383  *  Allocate memory for the transmit and receive rings, and then
3384  *  the descriptors associated with each, called only once at attach.
3385  *
3386  **********************************************************************/
3387 static int
3388 igb_allocate_queues(struct adapter *adapter)
3389 {
3390         device_t dev = adapter->dev;
3391         struct igb_queue        *que = NULL;
3392         struct tx_ring          *txr = NULL;
3393         struct rx_ring          *rxr = NULL;
3394         int rsize, tsize, error = E1000_SUCCESS;
3395         int txconf = 0, rxconf = 0;
3396
3397         /* First allocate the top level queue structs */
3398         if (!(adapter->queues =
3399             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3400             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3401                 device_printf(dev, "Unable to allocate queue memory\n");
3402                 error = ENOMEM;
3403                 goto fail;
3404         }
3405
3406         /* Next allocate the TX ring struct memory */
3407         if (!(adapter->tx_rings =
3408             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3409             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3410                 device_printf(dev, "Unable to allocate TX ring memory\n");
3411                 error = ENOMEM;
3412                 goto tx_fail;
3413         }
3414
3415         /* Now allocate the RX */
3416         if (!(adapter->rx_rings =
3417             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3418             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3419                 device_printf(dev, "Unable to allocate RX ring memory\n");
3420                 error = ENOMEM;
3421                 goto rx_fail;
3422         }
3423
3424         tsize = roundup2(adapter->num_tx_desc *
3425             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3426         /*
3427          * Now set up the TX queues, txconf is needed to handle the
3428          * possibility that things fail midcourse and we need to
3429          * undo memory gracefully
3430          */ 
3431         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3432                 /* Set up some basics */
3433                 txr = &adapter->tx_rings[i];
3434                 txr->adapter = adapter;
3435                 txr->me = i;
3436                 txr->num_desc = adapter->num_tx_desc;
3437
3438                 /* Initialize the TX lock */
3439                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3440                     device_get_nameunit(dev), txr->me);
3441                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3442
3443                 if (igb_dma_malloc(adapter, tsize,
3444                         &txr->txdma, BUS_DMA_NOWAIT)) {
3445                         device_printf(dev,
3446                             "Unable to allocate TX Descriptor memory\n");
3447                         error = ENOMEM;
3448                         goto err_tx_desc;
3449                 }
3450                 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3451                 bzero((void *)txr->tx_base, tsize);
3452
3453                 /* Now allocate transmit buffers for the ring */
3454                 if (igb_allocate_transmit_buffers(txr)) {
3455                         device_printf(dev,
3456                             "Critical Failure setting up transmit buffers\n");
3457                         error = ENOMEM;
3458                         goto err_tx_desc;
3459                 }
3460 #ifndef IGB_LEGACY_TX
3461                 /* Allocate a buf ring */
3462                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3463                     M_WAITOK, &txr->tx_mtx);
3464 #endif
3465         }
3466
3467         /*
3468          * Next the RX queues...
3469          */ 
3470         rsize = roundup2(adapter->num_rx_desc *
3471             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3472         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3473                 rxr = &adapter->rx_rings[i];
3474                 rxr->adapter = adapter;
3475                 rxr->me = i;
3476
3477                 /* Initialize the RX lock */
3478                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3479                     device_get_nameunit(dev), txr->me);
3480                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3481
3482                 if (igb_dma_malloc(adapter, rsize,
3483                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3484                         device_printf(dev,
3485                             "Unable to allocate RxDescriptor memory\n");
3486                         error = ENOMEM;
3487                         goto err_rx_desc;
3488                 }
3489                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3490                 bzero((void *)rxr->rx_base, rsize);
3491
3492                 /* Allocate receive buffers for the ring*/
3493                 if (igb_allocate_receive_buffers(rxr)) {
3494                         device_printf(dev,
3495                             "Critical Failure setting up receive buffers\n");
3496                         error = ENOMEM;
3497                         goto err_rx_desc;
3498                 }
3499         }
3500
3501         /*
3502         ** Finally set up the queue holding structs
3503         */
3504         for (int i = 0; i < adapter->num_queues; i++) {
3505                 que = &adapter->queues[i];
3506                 que->adapter = adapter;
3507                 que->txr = &adapter->tx_rings[i];
3508                 que->rxr = &adapter->rx_rings[i];
3509         }
3510
3511         return (0);
3512
3513 err_rx_desc:
3514         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3515                 igb_dma_free(adapter, &rxr->rxdma);
3516 err_tx_desc:
3517         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3518                 igb_dma_free(adapter, &txr->txdma);
3519         free(adapter->rx_rings, M_DEVBUF);
3520 rx_fail:
3521 #ifndef IGB_LEGACY_TX
3522         buf_ring_free(txr->br, M_DEVBUF);
3523 #endif
3524         free(adapter->tx_rings, M_DEVBUF);
3525 tx_fail:
3526         free(adapter->queues, M_DEVBUF);
3527 fail:
3528         return (error);
3529 }
3530
3531 /*********************************************************************
3532  *
3533  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3534  *  the information needed to transmit a packet on the wire. This is
3535  *  called only once at attach, setup is done every reset.
3536  *
3537  **********************************************************************/
3538 static int
3539 igb_allocate_transmit_buffers(struct tx_ring *txr)
3540 {
3541         struct adapter *adapter = txr->adapter;
3542         device_t dev = adapter->dev;
3543         struct igb_tx_buf *txbuf;
3544         int error, i;
3545
3546         /*
3547          * Setup DMA descriptor areas.
3548          */
3549         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3550                                1, 0,                    /* alignment, bounds */
3551                                BUS_SPACE_MAXADDR,       /* lowaddr */
3552                                BUS_SPACE_MAXADDR,       /* highaddr */
3553                                NULL, NULL,              /* filter, filterarg */
3554                                IGB_TSO_SIZE,            /* maxsize */
3555                                IGB_MAX_SCATTER,         /* nsegments */
3556                                PAGE_SIZE,               /* maxsegsize */
3557                                0,                       /* flags */
3558                                NULL,                    /* lockfunc */
3559                                NULL,                    /* lockfuncarg */
3560                                &txr->txtag))) {
3561                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3562                 goto fail;
3563         }
3564
3565         if (!(txr->tx_buffers =
3566             (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3567             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3568                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3569                 error = ENOMEM;
3570                 goto fail;
3571         }
3572
3573         /* Create the descriptor buffer dma maps */
3574         txbuf = txr->tx_buffers;
3575         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3576                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3577                 if (error != 0) {
3578                         device_printf(dev, "Unable to create TX DMA map\n");
3579                         goto fail;
3580                 }
3581         }
3582
3583         return 0;
3584 fail:
3585         /* We free all, it handles case where we are in the middle */
3586         igb_free_transmit_structures(adapter);
3587         return (error);
3588 }
3589
3590 /*********************************************************************
3591  *
3592  *  Initialize a transmit ring.
3593  *
3594  **********************************************************************/
3595 static void
3596 igb_setup_transmit_ring(struct tx_ring *txr)
3597 {
3598         struct adapter *adapter = txr->adapter;
3599         struct igb_tx_buf *txbuf;
3600         int i;
3601 #ifdef DEV_NETMAP
3602         struct netmap_adapter *na = NA(adapter->ifp);
3603         struct netmap_slot *slot;
3604 #endif /* DEV_NETMAP */
3605
3606         /* Clear the old descriptor contents */
3607         IGB_TX_LOCK(txr);
3608 #ifdef DEV_NETMAP
3609         slot = netmap_reset(na, NR_TX, txr->me, 0);
3610 #endif /* DEV_NETMAP */
3611         bzero((void *)txr->tx_base,
3612               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3613         /* Reset indices */
3614         txr->next_avail_desc = 0;
3615         txr->next_to_clean = 0;
3616
3617         /* Free any existing tx buffers. */
3618         txbuf = txr->tx_buffers;
3619         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3620                 if (txbuf->m_head != NULL) {
3621                         bus_dmamap_sync(txr->txtag, txbuf->map,
3622                             BUS_DMASYNC_POSTWRITE);
3623                         bus_dmamap_unload(txr->txtag, txbuf->map);
3624                         m_freem(txbuf->m_head);
3625                         txbuf->m_head = NULL;
3626                 }
3627 #ifdef DEV_NETMAP
3628                 if (slot) {
3629                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3630                         /* no need to set the address */
3631                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3632                 }
3633 #endif /* DEV_NETMAP */
3634                 /* clear the watch index */
3635                 txbuf->eop = NULL;
3636         }
3637
3638         /* Set number of descriptors available */
3639         txr->tx_avail = adapter->num_tx_desc;
3640
3641         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3642             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3643         IGB_TX_UNLOCK(txr);
3644 }
3645
3646 /*********************************************************************
3647  *
3648  *  Initialize all transmit rings.
3649  *
3650  **********************************************************************/
3651 static void
3652 igb_setup_transmit_structures(struct adapter *adapter)
3653 {
3654         struct tx_ring *txr = adapter->tx_rings;
3655
3656         for (int i = 0; i < adapter->num_queues; i++, txr++)
3657                 igb_setup_transmit_ring(txr);
3658
3659         return;
3660 }
3661
3662 /*********************************************************************
3663  *
3664  *  Enable transmit unit.
3665  *
3666  **********************************************************************/
3667 static void
3668 igb_initialize_transmit_units(struct adapter *adapter)
3669 {
3670         struct tx_ring  *txr = adapter->tx_rings;
3671         struct e1000_hw *hw = &adapter->hw;
3672         u32             tctl, txdctl;
3673
3674         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3675         tctl = txdctl = 0;
3676
3677         /* Setup the Tx Descriptor Rings */
3678         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3679                 u64 bus_addr = txr->txdma.dma_paddr;
3680
3681                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3682                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3683                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3684                     (uint32_t)(bus_addr >> 32));
3685                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3686                     (uint32_t)bus_addr);
3687
3688                 /* Setup the HW Tx Head and Tail descriptor pointers */
3689                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3690                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3691
3692                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3693                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3694                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3695
3696                 txr->queue_status = IGB_QUEUE_IDLE;
3697
3698                 txdctl |= IGB_TX_PTHRESH;
3699                 txdctl |= IGB_TX_HTHRESH << 8;
3700                 txdctl |= IGB_TX_WTHRESH << 16;
3701                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3702                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3703         }
3704
3705         if (adapter->vf_ifp)
3706                 return;
3707
3708         e1000_config_collision_dist(hw);
3709
3710         /* Program the Transmit Control Register */
3711         tctl = E1000_READ_REG(hw, E1000_TCTL);
3712         tctl &= ~E1000_TCTL_CT;
3713         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3714                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3715
3716         /* This write will effectively turn on the transmit unit. */
3717         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3718 }
3719
3720 /*********************************************************************
3721  *
3722  *  Free all transmit rings.
3723  *
3724  **********************************************************************/
3725 static void
3726 igb_free_transmit_structures(struct adapter *adapter)
3727 {
3728         struct tx_ring *txr = adapter->tx_rings;
3729
3730         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3731                 IGB_TX_LOCK(txr);
3732                 igb_free_transmit_buffers(txr);
3733                 igb_dma_free(adapter, &txr->txdma);
3734                 IGB_TX_UNLOCK(txr);
3735                 IGB_TX_LOCK_DESTROY(txr);
3736         }
3737         free(adapter->tx_rings, M_DEVBUF);
3738 }
3739
3740 /*********************************************************************
3741  *
3742  *  Free transmit ring related data structures.
3743  *
3744  **********************************************************************/
3745 static void
3746 igb_free_transmit_buffers(struct tx_ring *txr)
3747 {
3748         struct adapter *adapter = txr->adapter;
3749         struct igb_tx_buf *tx_buffer;
3750         int             i;
3751
3752         INIT_DEBUGOUT("free_transmit_ring: begin");
3753
3754         if (txr->tx_buffers == NULL)
3755                 return;
3756
3757         tx_buffer = txr->tx_buffers;
3758         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3759                 if (tx_buffer->m_head != NULL) {
3760                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3761                             BUS_DMASYNC_POSTWRITE);
3762                         bus_dmamap_unload(txr->txtag,
3763                             tx_buffer->map);
3764                         m_freem(tx_buffer->m_head);
3765                         tx_buffer->m_head = NULL;
3766                         if (tx_buffer->map != NULL) {
3767                                 bus_dmamap_destroy(txr->txtag,
3768                                     tx_buffer->map);
3769                                 tx_buffer->map = NULL;
3770                         }
3771                 } else if (tx_buffer->map != NULL) {
3772                         bus_dmamap_unload(txr->txtag,
3773                             tx_buffer->map);
3774                         bus_dmamap_destroy(txr->txtag,
3775                             tx_buffer->map);
3776                         tx_buffer->map = NULL;
3777                 }
3778         }
3779 #ifndef IGB_LEGACY_TX
3780         if (txr->br != NULL)
3781                 buf_ring_free(txr->br, M_DEVBUF);
3782 #endif
3783         if (txr->tx_buffers != NULL) {
3784                 free(txr->tx_buffers, M_DEVBUF);
3785                 txr->tx_buffers = NULL;
3786         }
3787         if (txr->txtag != NULL) {
3788                 bus_dma_tag_destroy(txr->txtag);
3789                 txr->txtag = NULL;
3790         }
3791         return;
3792 }
3793
3794 /**********************************************************************
3795  *
3796  *  Setup work for hardware segmentation offload (TSO) on
3797  *  adapters using advanced tx descriptors
3798  *
3799  **********************************************************************/
3800 static int
3801 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3802     u32 *cmd_type_len, u32 *olinfo_status)
3803 {
3804         struct adapter *adapter = txr->adapter;
3805         struct e1000_adv_tx_context_desc *TXD;
3806         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3807         u32 mss_l4len_idx = 0, paylen;
3808         u16 vtag = 0, eh_type;
3809         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3810         struct ether_vlan_header *eh;
3811 #ifdef INET6
3812         struct ip6_hdr *ip6;
3813 #endif
3814 #ifdef INET
3815         struct ip *ip;
3816 #endif
3817         struct tcphdr *th;
3818
3819
3820         /*
3821          * Determine where frame payload starts.
3822          * Jump over vlan headers if already present
3823          */
3824         eh = mtod(mp, struct ether_vlan_header *);
3825         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3826                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3827                 eh_type = eh->evl_proto;
3828         } else {
3829                 ehdrlen = ETHER_HDR_LEN;
3830                 eh_type = eh->evl_encap_proto;
3831         }
3832
3833         switch (ntohs(eh_type)) {
3834 #ifdef INET6
3835         case ETHERTYPE_IPV6:
3836                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3837                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3838                 if (ip6->ip6_nxt != IPPROTO_TCP)
3839                         return (ENXIO);
3840                 ip_hlen = sizeof(struct ip6_hdr);
3841                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3842                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3843                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3844                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3845                 break;
3846 #endif
3847 #ifdef INET
3848         case ETHERTYPE_IP:
3849                 ip = (struct ip *)(mp->m_data + ehdrlen);
3850                 if (ip->ip_p != IPPROTO_TCP)
3851                         return (ENXIO);
3852                 ip->ip_sum = 0;
3853                 ip_hlen = ip->ip_hl << 2;
3854                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3855                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3856                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3857                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3858                 /* Tell transmit desc to also do IPv4 checksum. */
3859                 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3860                 break;
3861 #endif
3862         default:
3863                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3864                     __func__, ntohs(eh_type));
3865                 break;
3866         }
3867
3868         ctxd = txr->next_avail_desc;
3869         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3870
3871         tcp_hlen = th->th_off << 2;
3872
3873         /* This is used in the transmit desc in encap */
3874         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3875
3876         /* VLAN MACLEN IPLEN */
3877         if (mp->m_flags & M_VLANTAG) {
3878                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3879                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3880         }
3881
3882         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3883         vlan_macip_lens |= ip_hlen;
3884         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3885
3886         /* ADV DTYPE TUCMD */
3887         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3888         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3889         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3890
3891         /* MSS L4LEN IDX */
3892         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3893         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3894         /* 82575 needs the queue index added */
3895         if (adapter->hw.mac.type == e1000_82575)
3896                 mss_l4len_idx |= txr->me << 4;
3897         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3898
3899         TXD->seqnum_seed = htole32(0);
3900
3901         if (++ctxd == txr->num_desc)
3902                 ctxd = 0;
3903
3904         txr->tx_avail--;
3905         txr->next_avail_desc = ctxd;
3906         *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3907         *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3908         *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3909         ++txr->tso_tx;
3910         return (0);
3911 }
3912
3913 /*********************************************************************
3914  *
3915  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3916  *
3917  **********************************************************************/
3918
3919 static int
3920 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3921     u32 *cmd_type_len, u32 *olinfo_status)
3922 {
3923         struct e1000_adv_tx_context_desc *TXD;
3924         struct adapter *adapter = txr->adapter;
3925         struct ether_vlan_header *eh;
3926         struct ip *ip;
3927         struct ip6_hdr *ip6;
3928         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3929         int     ehdrlen, ip_hlen = 0;
3930         u16     etype;
3931         u8      ipproto = 0;
3932         int     offload = TRUE;
3933         int     ctxd = txr->next_avail_desc;
3934         u16     vtag = 0;
3935
3936         /* First check if TSO is to be used */
3937         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3938                 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3939
3940         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3941                 offload = FALSE;
3942
3943         /* Indicate the whole packet as payload when not doing TSO */
3944         *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3945
3946         /* Now ready a context descriptor */
3947         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3948
3949         /*
3950         ** In advanced descriptors the vlan tag must 
3951         ** be placed into the context descriptor. Hence
3952         ** we need to make one even if not doing offloads.
3953         */
3954         if (mp->m_flags & M_VLANTAG) {
3955                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3956                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3957         } else if (offload == FALSE) /* ... no offload to do */
3958                 return (0);
3959
3960         /*
3961          * Determine where frame payload starts.
3962          * Jump over vlan headers if already present,
3963          * helpful for QinQ too.
3964          */
3965         eh = mtod(mp, struct ether_vlan_header *);
3966         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3967                 etype = ntohs(eh->evl_proto);
3968                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3969         } else {
3970                 etype = ntohs(eh->evl_encap_proto);
3971                 ehdrlen = ETHER_HDR_LEN;
3972         }
3973
3974         /* Set the ether header length */
3975         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3976
3977         switch (etype) {
3978                 case ETHERTYPE_IP:
3979                         ip = (struct ip *)(mp->m_data + ehdrlen);
3980                         ip_hlen = ip->ip_hl << 2;
3981                         ipproto = ip->ip_p;
3982                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3983                         break;
3984                 case ETHERTYPE_IPV6:
3985                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3986                         ip_hlen = sizeof(struct ip6_hdr);
3987                         /* XXX-BZ this will go badly in case of ext hdrs. */
3988                         ipproto = ip6->ip6_nxt;
3989                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3990                         break;
3991                 default:
3992                         offload = FALSE;
3993                         break;
3994         }
3995
3996         vlan_macip_lens |= ip_hlen;
3997         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3998
3999         switch (ipproto) {
4000                 case IPPROTO_TCP:
4001                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
4002                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
4003                         break;
4004                 case IPPROTO_UDP:
4005                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
4006                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
4007                         break;
4008
4009 #if __FreeBSD_version >= 800000
4010                 case IPPROTO_SCTP:
4011                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
4012                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4013                         break;
4014 #endif
4015                 default:
4016                         offload = FALSE;
4017                         break;
4018         }
4019
4020         if (offload) /* For the TX descriptor setup */
4021                 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4022
4023         /* 82575 needs the queue index added */
4024         if (adapter->hw.mac.type == e1000_82575)
4025                 mss_l4len_idx = txr->me << 4;
4026
4027         /* Now copy bits into descriptor */
4028         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
4029         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
4030         TXD->seqnum_seed = htole32(0);
4031         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
4032
4033         /* We've consumed the first desc, adjust counters */
4034         if (++ctxd == txr->num_desc)
4035                 ctxd = 0;
4036         txr->next_avail_desc = ctxd;
4037         --txr->tx_avail;
4038
4039         return (0);
4040 }
4041
4042 /**********************************************************************
4043  *
4044  *  Examine each tx_buffer in the used queue. If the hardware is done
4045  *  processing the packet then free associated resources. The
4046  *  tx_buffer is put back on the free queue.
4047  *
4048  *  TRUE return means there's work in the ring to clean, FALSE its empty.
4049  **********************************************************************/
4050 static bool
4051 igb_txeof(struct tx_ring *txr)
4052 {
4053         struct adapter          *adapter = txr->adapter;
4054         struct ifnet            *ifp = adapter->ifp;
4055         u32                     work, processed = 0;
4056         u16                     limit = txr->process_limit;
4057         struct igb_tx_buf       *buf;
4058         union e1000_adv_tx_desc *txd;
4059
4060         mtx_assert(&txr->tx_mtx, MA_OWNED);
4061
4062 #ifdef DEV_NETMAP
4063         if (netmap_tx_irq(ifp, txr->me))
4064                 return (FALSE);
4065 #endif /* DEV_NETMAP */
4066
4067         if (txr->tx_avail == txr->num_desc) {
4068                 txr->queue_status = IGB_QUEUE_IDLE;
4069                 return FALSE;
4070         }
4071
4072         /* Get work starting point */
4073         work = txr->next_to_clean;
4074         buf = &txr->tx_buffers[work];
4075         txd = &txr->tx_base[work];
4076         work -= txr->num_desc; /* The distance to ring end */
4077         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4078             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4079         do {
4080                 union e1000_adv_tx_desc *eop = buf->eop;
4081                 if (eop == NULL) /* No work */
4082                         break;
4083
4084                 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4085                         break;  /* I/O not complete */
4086
4087                 if (buf->m_head) {
4088                         txr->bytes +=
4089                             buf->m_head->m_pkthdr.len;
4090                         bus_dmamap_sync(txr->txtag,
4091                             buf->map,
4092                             BUS_DMASYNC_POSTWRITE);
4093                         bus_dmamap_unload(txr->txtag,
4094                             buf->map);
4095                         m_freem(buf->m_head);
4096                         buf->m_head = NULL;
4097                 }
4098                 buf->eop = NULL;
4099                 ++txr->tx_avail;
4100
4101                 /* We clean the range if multi segment */
4102                 while (txd != eop) {
4103                         ++txd;
4104                         ++buf;
4105                         ++work;
4106                         /* wrap the ring? */
4107                         if (__predict_false(!work)) {
4108                                 work -= txr->num_desc;
4109                                 buf = txr->tx_buffers;
4110                                 txd = txr->tx_base;
4111                         }
4112                         if (buf->m_head) {
4113                                 txr->bytes +=
4114                                     buf->m_head->m_pkthdr.len;
4115                                 bus_dmamap_sync(txr->txtag,
4116                                     buf->map,
4117                                     BUS_DMASYNC_POSTWRITE);
4118                                 bus_dmamap_unload(txr->txtag,
4119                                     buf->map);
4120                                 m_freem(buf->m_head);
4121                                 buf->m_head = NULL;
4122                         }
4123                         ++txr->tx_avail;
4124                         buf->eop = NULL;
4125
4126                 }
4127                 ++txr->packets;
4128                 ++processed;
4129                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4130                 txr->watchdog_time = ticks;
4131
4132                 /* Try the next packet */
4133                 ++txd;
4134                 ++buf;
4135                 ++work;
4136                 /* reset with a wrap */
4137                 if (__predict_false(!work)) {
4138                         work -= txr->num_desc;
4139                         buf = txr->tx_buffers;
4140                         txd = txr->tx_base;
4141                 }
4142                 prefetch(txd);
4143         } while (__predict_true(--limit));
4144
4145         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4146             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4147
4148         work += txr->num_desc;
4149         txr->next_to_clean = work;
4150
4151         /*
4152         ** Watchdog calculation, we know there's
4153         ** work outstanding or the first return
4154         ** would have been taken, so none processed
4155         ** for too long indicates a hang.
4156         */
4157         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4158                 txr->queue_status |= IGB_QUEUE_HUNG;
4159
4160         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4161                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;       
4162
4163         if (txr->tx_avail == txr->num_desc) {
4164                 txr->queue_status = IGB_QUEUE_IDLE;
4165                 return (FALSE);
4166         }
4167
4168         return (TRUE);
4169 }
4170
4171 /*********************************************************************
4172  *
4173  *  Refresh mbuf buffers for RX descriptor rings
4174  *   - now keeps its own state so discards due to resource
4175  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4176  *     it just returns, keeping its placeholder, thus it can simply
4177  *     be recalled to try again.
4178  *
4179  **********************************************************************/
4180 static void
4181 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4182 {
4183         struct adapter          *adapter = rxr->adapter;
4184         bus_dma_segment_t       hseg[1];
4185         bus_dma_segment_t       pseg[1];
4186         struct igb_rx_buf       *rxbuf;
4187         struct mbuf             *mh, *mp;
4188         int                     i, j, nsegs, error;
4189         bool                    refreshed = FALSE;
4190
4191         i = j = rxr->next_to_refresh;
4192         /*
4193         ** Get one descriptor beyond
4194         ** our work mark to control
4195         ** the loop.
4196         */
4197         if (++j == adapter->num_rx_desc)
4198                 j = 0;
4199
4200         while (j != limit) {
4201                 rxbuf = &rxr->rx_buffers[i];
4202                 /* No hdr mbuf used with header split off */
4203                 if (rxr->hdr_split == FALSE)
4204                         goto no_split;
4205                 if (rxbuf->m_head == NULL) {
4206                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4207                         if (mh == NULL)
4208                                 goto update;
4209                 } else
4210                         mh = rxbuf->m_head;
4211
4212                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4213                 mh->m_len = MHLEN;
4214                 mh->m_flags |= M_PKTHDR;
4215                 /* Get the memory mapping */
4216                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4217                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4218                 if (error != 0) {
4219                         printf("Refresh mbufs: hdr dmamap load"
4220                             " failure - %d\n", error);
4221                         m_free(mh);
4222                         rxbuf->m_head = NULL;
4223                         goto update;
4224                 }
4225                 rxbuf->m_head = mh;
4226                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4227                     BUS_DMASYNC_PREREAD);
4228                 rxr->rx_base[i].read.hdr_addr =
4229                     htole64(hseg[0].ds_addr);
4230 no_split:
4231                 if (rxbuf->m_pack == NULL) {
4232                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4233                             M_PKTHDR, adapter->rx_mbuf_sz);
4234                         if (mp == NULL)
4235                                 goto update;
4236                 } else
4237                         mp = rxbuf->m_pack;
4238
4239                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4240                 /* Get the memory mapping */
4241                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4242                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4243                 if (error != 0) {
4244                         printf("Refresh mbufs: payload dmamap load"
4245                             " failure - %d\n", error);
4246                         m_free(mp);
4247                         rxbuf->m_pack = NULL;
4248                         goto update;
4249                 }
4250                 rxbuf->m_pack = mp;
4251                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4252                     BUS_DMASYNC_PREREAD);
4253                 rxr->rx_base[i].read.pkt_addr =
4254                     htole64(pseg[0].ds_addr);
4255                 refreshed = TRUE; /* I feel wefreshed :) */
4256
4257                 i = j; /* our next is precalculated */
4258                 rxr->next_to_refresh = i;
4259                 if (++j == adapter->num_rx_desc)
4260                         j = 0;
4261         }
4262 update:
4263         if (refreshed) /* update tail */
4264                 E1000_WRITE_REG(&adapter->hw,
4265                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4266         return;
4267 }
4268
4269
4270 /*********************************************************************
4271  *
4272  *  Allocate memory for rx_buffer structures. Since we use one
4273  *  rx_buffer per received packet, the maximum number of rx_buffer's
4274  *  that we'll need is equal to the number of receive descriptors
4275  *  that we've allocated.
4276  *
4277  **********************************************************************/
4278 static int
4279 igb_allocate_receive_buffers(struct rx_ring *rxr)
4280 {
4281         struct  adapter         *adapter = rxr->adapter;
4282         device_t                dev = adapter->dev;
4283         struct igb_rx_buf       *rxbuf;
4284         int                     i, bsize, error;
4285
4286         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4287         if (!(rxr->rx_buffers =
4288             (struct igb_rx_buf *) malloc(bsize,
4289             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4290                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4291                 error = ENOMEM;
4292                 goto fail;
4293         }
4294
4295         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4296                                    1, 0,                /* alignment, bounds */
4297                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4298                                    BUS_SPACE_MAXADDR,   /* highaddr */
4299                                    NULL, NULL,          /* filter, filterarg */
4300                                    MSIZE,               /* maxsize */
4301                                    1,                   /* nsegments */
4302                                    MSIZE,               /* maxsegsize */
4303                                    0,                   /* flags */
4304                                    NULL,                /* lockfunc */
4305                                    NULL,                /* lockfuncarg */
4306                                    &rxr->htag))) {
4307                 device_printf(dev, "Unable to create RX DMA tag\n");
4308                 goto fail;
4309         }
4310
4311         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4312                                    1, 0,                /* alignment, bounds */
4313                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4314                                    BUS_SPACE_MAXADDR,   /* highaddr */
4315                                    NULL, NULL,          /* filter, filterarg */
4316                                    MJUM9BYTES,          /* maxsize */
4317                                    1,                   /* nsegments */
4318                                    MJUM9BYTES,          /* maxsegsize */
4319                                    0,                   /* flags */
4320                                    NULL,                /* lockfunc */
4321                                    NULL,                /* lockfuncarg */
4322                                    &rxr->ptag))) {
4323                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4324                 goto fail;
4325         }
4326
4327         for (i = 0; i < adapter->num_rx_desc; i++) {
4328                 rxbuf = &rxr->rx_buffers[i];
4329                 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4330                 if (error) {
4331                         device_printf(dev,
4332                             "Unable to create RX head DMA maps\n");
4333                         goto fail;
4334                 }
4335                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4336                 if (error) {
4337                         device_printf(dev,
4338                             "Unable to create RX packet DMA maps\n");
4339                         goto fail;
4340                 }
4341         }
4342
4343         return (0);
4344
4345 fail:
4346         /* Frees all, but can handle partial completion */
4347         igb_free_receive_structures(adapter);
4348         return (error);
4349 }
4350
4351
4352 static void
4353 igb_free_receive_ring(struct rx_ring *rxr)
4354 {
4355         struct  adapter         *adapter = rxr->adapter;
4356         struct igb_rx_buf       *rxbuf;
4357
4358
4359         for (int i = 0; i < adapter->num_rx_desc; i++) {
4360                 rxbuf = &rxr->rx_buffers[i];
4361                 if (rxbuf->m_head != NULL) {
4362                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4363                             BUS_DMASYNC_POSTREAD);
4364                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4365                         rxbuf->m_head->m_flags |= M_PKTHDR;
4366                         m_freem(rxbuf->m_head);
4367                 }
4368                 if (rxbuf->m_pack != NULL) {
4369                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4370                             BUS_DMASYNC_POSTREAD);
4371                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4372                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4373                         m_freem(rxbuf->m_pack);
4374                 }
4375                 rxbuf->m_head = NULL;
4376                 rxbuf->m_pack = NULL;
4377         }
4378 }
4379
4380
4381 /*********************************************************************
4382  *
4383  *  Initialize a receive ring and its buffers.
4384  *
4385  **********************************************************************/
4386 static int
4387 igb_setup_receive_ring(struct rx_ring *rxr)
4388 {
4389         struct  adapter         *adapter;
4390         struct  ifnet           *ifp;
4391         device_t                dev;
4392         struct igb_rx_buf       *rxbuf;
4393         bus_dma_segment_t       pseg[1], hseg[1];
4394         struct lro_ctrl         *lro = &rxr->lro;
4395         int                     rsize, nsegs, error = 0;
4396 #ifdef DEV_NETMAP
4397         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4398         struct netmap_slot *slot;
4399 #endif /* DEV_NETMAP */
4400
4401         adapter = rxr->adapter;
4402         dev = adapter->dev;
4403         ifp = adapter->ifp;
4404
4405         /* Clear the ring contents */
4406         IGB_RX_LOCK(rxr);
4407 #ifdef DEV_NETMAP
4408         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4409 #endif /* DEV_NETMAP */
4410         rsize = roundup2(adapter->num_rx_desc *
4411             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4412         bzero((void *)rxr->rx_base, rsize);
4413
4414         /*
4415         ** Free current RX buffer structures and their mbufs
4416         */
4417         igb_free_receive_ring(rxr);
4418
4419         /* Configure for header split? */
4420         if (igb_header_split)
4421                 rxr->hdr_split = TRUE;
4422
4423         /* Now replenish the ring mbufs */
4424         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4425                 struct mbuf     *mh, *mp;
4426
4427                 rxbuf = &rxr->rx_buffers[j];
4428 #ifdef DEV_NETMAP
4429                 if (slot) {
4430                         /* slot sj is mapped to the j-th NIC-ring entry */
4431                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4432                         uint64_t paddr;
4433                         void *addr;
4434
4435                         addr = PNMB(na, slot + sj, &paddr);
4436                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4437                         /* Update descriptor */
4438                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4439                         continue;
4440                 }
4441 #endif /* DEV_NETMAP */
4442                 if (rxr->hdr_split == FALSE)
4443                         goto skip_head;
4444
4445                 /* First the header */
4446                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4447                 if (rxbuf->m_head == NULL) {
4448                         error = ENOBUFS;
4449                         goto fail;
4450                 }
4451                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4452                 mh = rxbuf->m_head;
4453                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4454                 mh->m_flags |= M_PKTHDR;
4455                 /* Get the memory mapping */
4456                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4457                     rxbuf->hmap, rxbuf->m_head, hseg,
4458                     &nsegs, BUS_DMA_NOWAIT);
4459                 if (error != 0) /* Nothing elegant to do here */
4460                         goto fail;
4461                 bus_dmamap_sync(rxr->htag,
4462                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4463                 /* Update descriptor */
4464                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4465
4466 skip_head:
4467                 /* Now the payload cluster */
4468                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4469                     M_PKTHDR, adapter->rx_mbuf_sz);
4470                 if (rxbuf->m_pack == NULL) {
4471                         error = ENOBUFS;
4472                         goto fail;
4473                 }
4474                 mp = rxbuf->m_pack;
4475                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4476                 /* Get the memory mapping */
4477                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4478                     rxbuf->pmap, mp, pseg,
4479                     &nsegs, BUS_DMA_NOWAIT);
4480                 if (error != 0)
4481                         goto fail;
4482                 bus_dmamap_sync(rxr->ptag,
4483                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4484                 /* Update descriptor */
4485                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4486         }
4487
4488         /* Setup our descriptor indices */
4489         rxr->next_to_check = 0;
4490         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4491         rxr->lro_enabled = FALSE;
4492         rxr->rx_split_packets = 0;
4493         rxr->rx_bytes = 0;
4494
4495         rxr->fmp = NULL;
4496         rxr->lmp = NULL;
4497
4498         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4499             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4500
4501         /*
4502         ** Now set up the LRO interface, we
4503         ** also only do head split when LRO
4504         ** is enabled, since so often they
4505         ** are undesireable in similar setups.
4506         */
4507         if (ifp->if_capenable & IFCAP_LRO) {
4508                 error = tcp_lro_init(lro);
4509                 if (error) {
4510                         device_printf(dev, "LRO Initialization failed!\n");
4511                         goto fail;
4512                 }
4513                 INIT_DEBUGOUT("RX LRO Initialized\n");
4514                 rxr->lro_enabled = TRUE;
4515                 lro->ifp = adapter->ifp;
4516         }
4517
4518         IGB_RX_UNLOCK(rxr);
4519         return (0);
4520
4521 fail:
4522         igb_free_receive_ring(rxr);
4523         IGB_RX_UNLOCK(rxr);
4524         return (error);
4525 }
4526
4527
4528 /*********************************************************************
4529  *
4530  *  Initialize all receive rings.
4531  *
4532  **********************************************************************/
4533 static int
4534 igb_setup_receive_structures(struct adapter *adapter)
4535 {
4536         struct rx_ring *rxr = adapter->rx_rings;
4537         int i;
4538
4539         for (i = 0; i < adapter->num_queues; i++, rxr++)
4540                 if (igb_setup_receive_ring(rxr))
4541                         goto fail;
4542
4543         return (0);
4544 fail:
4545         /*
4546          * Free RX buffers allocated so far, we will only handle
4547          * the rings that completed, the failing case will have
4548          * cleaned up for itself. 'i' is the endpoint.
4549          */
4550         for (int j = 0; j < i; ++j) {
4551                 rxr = &adapter->rx_rings[j];
4552                 IGB_RX_LOCK(rxr);
4553                 igb_free_receive_ring(rxr);
4554                 IGB_RX_UNLOCK(rxr);
4555         }
4556
4557         return (ENOBUFS);
4558 }
4559
4560 /*
4561  * Initialise the RSS mapping for NICs that support multiple transmit/
4562  * receive rings.
4563  */
4564 static void
4565 igb_initialise_rss_mapping(struct adapter *adapter)
4566 {
4567         struct e1000_hw *hw = &adapter->hw;
4568         int i;
4569         int queue_id;
4570         u32 reta;
4571         u32 rss_key[10], mrqc, shift = 0;
4572
4573         /* XXX? */
4574         if (adapter->hw.mac.type == e1000_82575)
4575                 shift = 6;
4576
4577         /*
4578          * The redirection table controls which destination
4579          * queue each bucket redirects traffic to.
4580          * Each DWORD represents four queues, with the LSB
4581          * being the first queue in the DWORD.
4582          *
4583          * This just allocates buckets to queues using round-robin
4584          * allocation.
4585          *
4586          * NOTE: It Just Happens to line up with the default
4587          * RSS allocation method.
4588          */
4589
4590         /* Warning FM follows */
4591         reta = 0;
4592         for (i = 0; i < 128; i++) {
4593 #ifdef  RSS
4594                 queue_id = rss_get_indirection_to_bucket(i);
4595                 /*
4596                  * If we have more queues than buckets, we'll
4597                  * end up mapping buckets to a subset of the
4598                  * queues.
4599                  *
4600                  * If we have more buckets than queues, we'll
4601                  * end up instead assigning multiple buckets
4602                  * to queues.
4603                  *
4604                  * Both are suboptimal, but we need to handle
4605                  * the case so we don't go out of bounds
4606                  * indexing arrays and such.
4607                  */
4608                 queue_id = queue_id % adapter->num_queues;
4609 #else
4610                 queue_id = (i % adapter->num_queues);
4611 #endif
4612                 /* Adjust if required */
4613                 queue_id = queue_id << shift;
4614
4615                 /*
4616                  * The low 8 bits are for hash value (n+0);
4617                  * The next 8 bits are for hash value (n+1), etc.
4618                  */
4619                 reta = reta >> 8;
4620                 reta = reta | ( ((uint32_t) queue_id) << 24);
4621                 if ((i & 3) == 3) {
4622                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4623                         reta = 0;
4624                 }
4625         }
4626
4627         /* Now fill in hash table */
4628
4629         /* XXX This means RSS enable + 8 queues for my igb (82580.) */
4630         mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4631
4632 #ifdef  RSS
4633         /* XXX ew typecasting */
4634         rss_getkey((uint8_t *) &rss_key);
4635 #else
4636         arc4rand(&rss_key, sizeof(rss_key), 0);
4637 #endif
4638         for (i = 0; i < 10; i++)
4639                 E1000_WRITE_REG_ARRAY(hw,
4640                     E1000_RSSRK(0), i, rss_key[i]);
4641
4642         /*
4643          * Configure the RSS fields to hash upon.
4644          */
4645         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4646             E1000_MRQC_RSS_FIELD_IPV4_TCP);
4647         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4648             E1000_MRQC_RSS_FIELD_IPV6_TCP);
4649         mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4650             E1000_MRQC_RSS_FIELD_IPV6_UDP);
4651         mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4652             E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4653
4654         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4655 }
4656
4657 /*********************************************************************
4658  *
4659  *  Enable receive unit.
4660  *
4661  **********************************************************************/
4662 static void
4663 igb_initialize_receive_units(struct adapter *adapter)
4664 {
4665         struct rx_ring  *rxr = adapter->rx_rings;
4666         struct ifnet    *ifp = adapter->ifp;
4667         struct e1000_hw *hw = &adapter->hw;
4668         u32             rctl, rxcsum, psize, srrctl = 0;
4669
4670         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4671
4672         /*
4673          * Make sure receives are disabled while setting
4674          * up the descriptor ring
4675          */
4676         rctl = E1000_READ_REG(hw, E1000_RCTL);
4677         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4678
4679         /*
4680         ** Set up for header split
4681         */
4682         if (igb_header_split) {
4683                 /* Use a standard mbuf for the header */
4684                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4685                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4686         } else
4687                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4688
4689         /*
4690         ** Set up for jumbo frames
4691         */
4692         if (ifp->if_mtu > ETHERMTU) {
4693                 rctl |= E1000_RCTL_LPE;
4694                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4695                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4696                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4697                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4698                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4699                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4700                 }
4701                 /* Set maximum packet len */
4702                 psize = adapter->max_frame_size;
4703                 /* are we on a vlan? */
4704                 if (adapter->ifp->if_vlantrunk != NULL)
4705                         psize += VLAN_TAG_SIZE;
4706                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4707         } else {
4708                 rctl &= ~E1000_RCTL_LPE;
4709                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4710                 rctl |= E1000_RCTL_SZ_2048;
4711         }
4712
4713         /*
4714          * If TX flow control is disabled and there's >1 queue defined,
4715          * enable DROP.
4716          *
4717          * This drops frames rather than hanging the RX MAC for all queues.
4718          */
4719         if ((adapter->num_queues > 1) &&
4720             (adapter->fc == e1000_fc_none ||
4721              adapter->fc == e1000_fc_rx_pause)) {
4722                 srrctl |= E1000_SRRCTL_DROP_EN;
4723         }
4724
4725         /* Setup the Base and Length of the Rx Descriptor Rings */
4726         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4727                 u64 bus_addr = rxr->rxdma.dma_paddr;
4728                 u32 rxdctl;
4729
4730                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4731                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4732                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4733                     (uint32_t)(bus_addr >> 32));
4734                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4735                     (uint32_t)bus_addr);
4736                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4737                 /* Enable this Queue */
4738                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4739                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4740                 rxdctl &= 0xFFF00000;
4741                 rxdctl |= IGB_RX_PTHRESH;
4742                 rxdctl |= IGB_RX_HTHRESH << 8;
4743                 rxdctl |= IGB_RX_WTHRESH << 16;
4744                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4745         }
4746
4747         /*
4748         ** Setup for RX MultiQueue
4749         */
4750         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4751         if (adapter->num_queues >1) {
4752
4753                 /* rss setup */
4754                 igb_initialise_rss_mapping(adapter);
4755
4756                 /*
4757                 ** NOTE: Receive Full-Packet Checksum Offload 
4758                 ** is mutually exclusive with Multiqueue. However
4759                 ** this is not the same as TCP/IP checksums which
4760                 ** still work.
4761                 */
4762                 rxcsum |= E1000_RXCSUM_PCSD;
4763 #if __FreeBSD_version >= 800000
4764                 /* For SCTP Offload */
4765                 if ((hw->mac.type == e1000_82576)
4766                     && (ifp->if_capenable & IFCAP_RXCSUM))
4767                         rxcsum |= E1000_RXCSUM_CRCOFL;
4768 #endif
4769         } else {
4770                 /* Non RSS setup */
4771                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4772                         rxcsum |= E1000_RXCSUM_IPPCSE;
4773 #if __FreeBSD_version >= 800000
4774                         if (adapter->hw.mac.type == e1000_82576)
4775                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4776 #endif
4777                 } else
4778                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4779         }
4780         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4781
4782         /* Setup the Receive Control Register */
4783         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4784         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4785                    E1000_RCTL_RDMTS_HALF |
4786                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4787         /* Strip CRC bytes. */
4788         rctl |= E1000_RCTL_SECRC;
4789         /* Make sure VLAN Filters are off */
4790         rctl &= ~E1000_RCTL_VFE;
4791         /* Don't store bad packets */
4792         rctl &= ~E1000_RCTL_SBP;
4793
4794         /* Enable Receives */
4795         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4796
4797         /*
4798          * Setup the HW Rx Head and Tail Descriptor Pointers
4799          *   - needs to be after enable
4800          */
4801         for (int i = 0; i < adapter->num_queues; i++) {
4802                 rxr = &adapter->rx_rings[i];
4803                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4804 #ifdef DEV_NETMAP
4805                 /*
4806                  * an init() while a netmap client is active must
4807                  * preserve the rx buffers passed to userspace.
4808                  * In this driver it means we adjust RDT to
4809                  * something different from next_to_refresh
4810                  * (which is not used in netmap mode).
4811                  */
4812                 if (ifp->if_capenable & IFCAP_NETMAP) {
4813                         struct netmap_adapter *na = NA(adapter->ifp);
4814                         struct netmap_kring *kring = &na->rx_rings[i];
4815                         int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4816
4817                         if (t >= adapter->num_rx_desc)
4818                                 t -= adapter->num_rx_desc;
4819                         else if (t < 0)
4820                                 t += adapter->num_rx_desc;
4821                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4822                 } else
4823 #endif /* DEV_NETMAP */
4824                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4825         }
4826         return;
4827 }
4828
4829 /*********************************************************************
4830  *
4831  *  Free receive rings.
4832  *
4833  **********************************************************************/
4834 static void
4835 igb_free_receive_structures(struct adapter *adapter)
4836 {
4837         struct rx_ring *rxr = adapter->rx_rings;
4838
4839         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4840                 struct lro_ctrl *lro = &rxr->lro;
4841                 igb_free_receive_buffers(rxr);
4842                 tcp_lro_free(lro);
4843                 igb_dma_free(adapter, &rxr->rxdma);
4844         }
4845
4846         free(adapter->rx_rings, M_DEVBUF);
4847 }
4848
4849 /*********************************************************************
4850  *
4851  *  Free receive ring data structures.
4852  *
4853  **********************************************************************/
4854 static void
4855 igb_free_receive_buffers(struct rx_ring *rxr)
4856 {
4857         struct adapter          *adapter = rxr->adapter;
4858         struct igb_rx_buf       *rxbuf;
4859         int i;
4860
4861         INIT_DEBUGOUT("free_receive_structures: begin");
4862
4863         /* Cleanup any existing buffers */
4864         if (rxr->rx_buffers != NULL) {
4865                 for (i = 0; i < adapter->num_rx_desc; i++) {
4866                         rxbuf = &rxr->rx_buffers[i];
4867                         if (rxbuf->m_head != NULL) {
4868                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4869                                     BUS_DMASYNC_POSTREAD);
4870                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4871                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4872                                 m_freem(rxbuf->m_head);
4873                         }
4874                         if (rxbuf->m_pack != NULL) {
4875                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4876                                     BUS_DMASYNC_POSTREAD);
4877                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4878                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4879                                 m_freem(rxbuf->m_pack);
4880                         }
4881                         rxbuf->m_head = NULL;
4882                         rxbuf->m_pack = NULL;
4883                         if (rxbuf->hmap != NULL) {
4884                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4885                                 rxbuf->hmap = NULL;
4886                         }
4887                         if (rxbuf->pmap != NULL) {
4888                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4889                                 rxbuf->pmap = NULL;
4890                         }
4891                 }
4892                 if (rxr->rx_buffers != NULL) {
4893                         free(rxr->rx_buffers, M_DEVBUF);
4894                         rxr->rx_buffers = NULL;
4895                 }
4896         }
4897
4898         if (rxr->htag != NULL) {
4899                 bus_dma_tag_destroy(rxr->htag);
4900                 rxr->htag = NULL;
4901         }
4902         if (rxr->ptag != NULL) {
4903                 bus_dma_tag_destroy(rxr->ptag);
4904                 rxr->ptag = NULL;
4905         }
4906 }
4907
4908 static __inline void
4909 igb_rx_discard(struct rx_ring *rxr, int i)
4910 {
4911         struct igb_rx_buf       *rbuf;
4912
4913         rbuf = &rxr->rx_buffers[i];
4914
4915         /* Partially received? Free the chain */
4916         if (rxr->fmp != NULL) {
4917                 rxr->fmp->m_flags |= M_PKTHDR;
4918                 m_freem(rxr->fmp);
4919                 rxr->fmp = NULL;
4920                 rxr->lmp = NULL;
4921         }
4922
4923         /*
4924         ** With advanced descriptors the writeback
4925         ** clobbers the buffer addrs, so its easier
4926         ** to just free the existing mbufs and take
4927         ** the normal refresh path to get new buffers
4928         ** and mapping.
4929         */
4930         if (rbuf->m_head) {
4931                 m_free(rbuf->m_head);
4932                 rbuf->m_head = NULL;
4933                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4934         }
4935
4936         if (rbuf->m_pack) {
4937                 m_free(rbuf->m_pack);
4938                 rbuf->m_pack = NULL;
4939                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4940         }
4941
4942         return;
4943 }
4944
4945 static __inline void
4946 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4947 {
4948
4949         /*
4950          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4951          * should be computed by hardware. Also it should not have VLAN tag in
4952          * ethernet header.
4953          */
4954         if (rxr->lro_enabled &&
4955             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4956             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4957             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4958             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4959             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4960             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4961                 /*
4962                  * Send to the stack if:
4963                  **  - LRO not enabled, or
4964                  **  - no LRO resources, or
4965                  **  - lro enqueue fails
4966                  */
4967                 if (rxr->lro.lro_cnt != 0)
4968                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4969                                 return;
4970         }
4971         IGB_RX_UNLOCK(rxr);
4972         (*ifp->if_input)(ifp, m);
4973         IGB_RX_LOCK(rxr);
4974 }
4975
4976 /*********************************************************************
4977  *
4978  *  This routine executes in interrupt context. It replenishes
4979  *  the mbufs in the descriptor and sends data which has been
4980  *  dma'ed into host memory to upper layer.
4981  *
4982  *  We loop at most count times if count is > 0, or until done if
4983  *  count < 0.
4984  *
4985  *  Return TRUE if more to clean, FALSE otherwise
4986  *********************************************************************/
4987 static bool
4988 igb_rxeof(struct igb_queue *que, int count, int *done)
4989 {
4990         struct adapter          *adapter = que->adapter;
4991         struct rx_ring          *rxr = que->rxr;
4992         struct ifnet            *ifp = adapter->ifp;
4993         struct lro_ctrl         *lro = &rxr->lro;
4994         struct lro_entry        *queued;
4995         int                     i, processed = 0, rxdone = 0;
4996         u32                     ptype, staterr = 0;
4997         union e1000_adv_rx_desc *cur;
4998
4999         IGB_RX_LOCK(rxr);
5000         /* Sync the ring. */
5001         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5002             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
5003
5004 #ifdef DEV_NETMAP
5005         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
5006                 IGB_RX_UNLOCK(rxr);
5007                 return (FALSE);
5008         }
5009 #endif /* DEV_NETMAP */
5010
5011         /* Main clean loop */
5012         for (i = rxr->next_to_check; count != 0;) {
5013                 struct mbuf             *sendmp, *mh, *mp;
5014                 struct igb_rx_buf       *rxbuf;
5015                 u16                     hlen, plen, hdr, vtag, pkt_info;
5016                 bool                    eop = FALSE;
5017  
5018                 cur = &rxr->rx_base[i];
5019                 staterr = le32toh(cur->wb.upper.status_error);
5020                 if ((staterr & E1000_RXD_STAT_DD) == 0)
5021                         break;
5022                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
5023                         break;
5024                 count--;
5025                 sendmp = mh = mp = NULL;
5026                 cur->wb.upper.status_error = 0;
5027                 rxbuf = &rxr->rx_buffers[i];
5028                 plen = le16toh(cur->wb.upper.length);
5029                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
5030                 if (((adapter->hw.mac.type == e1000_i350) ||
5031                     (adapter->hw.mac.type == e1000_i354)) &&
5032                     (staterr & E1000_RXDEXT_STATERR_LB))
5033                         vtag = be16toh(cur->wb.upper.vlan);
5034                 else
5035                         vtag = le16toh(cur->wb.upper.vlan);
5036                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
5037                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
5038                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
5039
5040                 /*
5041                  * Free the frame (all segments) if we're at EOP and
5042                  * it's an error.
5043                  *
5044                  * The datasheet states that EOP + status is only valid for
5045                  * the final segment in a multi-segment frame.
5046                  */
5047                 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
5048                         adapter->dropped_pkts++;
5049                         ++rxr->rx_discarded;
5050                         igb_rx_discard(rxr, i);
5051                         goto next_desc;
5052                 }
5053
5054                 /*
5055                 ** The way the hardware is configured to
5056                 ** split, it will ONLY use the header buffer
5057                 ** when header split is enabled, otherwise we
5058                 ** get normal behavior, ie, both header and
5059                 ** payload are DMA'd into the payload buffer.
5060                 **
5061                 ** The fmp test is to catch the case where a
5062                 ** packet spans multiple descriptors, in that
5063                 ** case only the first header is valid.
5064                 */
5065                 if (rxr->hdr_split && rxr->fmp == NULL) {
5066                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5067                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5068                             E1000_RXDADV_HDRBUFLEN_SHIFT;
5069                         if (hlen > IGB_HDR_BUF)
5070                                 hlen = IGB_HDR_BUF;
5071                         mh = rxr->rx_buffers[i].m_head;
5072                         mh->m_len = hlen;
5073                         /* clear buf pointer for refresh */
5074                         rxbuf->m_head = NULL;
5075                         /*
5076                         ** Get the payload length, this
5077                         ** could be zero if its a small
5078                         ** packet.
5079                         */
5080                         if (plen > 0) {
5081                                 mp = rxr->rx_buffers[i].m_pack;
5082                                 mp->m_len = plen;
5083                                 mh->m_next = mp;
5084                                 /* clear buf pointer */
5085                                 rxbuf->m_pack = NULL;
5086                                 rxr->rx_split_packets++;
5087                         }
5088                 } else {
5089                         /*
5090                         ** Either no header split, or a
5091                         ** secondary piece of a fragmented
5092                         ** split packet.
5093                         */
5094                         mh = rxr->rx_buffers[i].m_pack;
5095                         mh->m_len = plen;
5096                         /* clear buf info for refresh */
5097                         rxbuf->m_pack = NULL;
5098                 }
5099                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5100
5101                 ++processed; /* So we know when to refresh */
5102
5103                 /* Initial frame - setup */
5104                 if (rxr->fmp == NULL) {
5105                         mh->m_pkthdr.len = mh->m_len;
5106                         /* Save the head of the chain */
5107                         rxr->fmp = mh;
5108                         rxr->lmp = mh;
5109                         if (mp != NULL) {
5110                                 /* Add payload if split */
5111                                 mh->m_pkthdr.len += mp->m_len;
5112                                 rxr->lmp = mh->m_next;
5113                         }
5114                 } else {
5115                         /* Chain mbuf's together */
5116                         rxr->lmp->m_next = mh;
5117                         rxr->lmp = rxr->lmp->m_next;
5118                         rxr->fmp->m_pkthdr.len += mh->m_len;
5119                 }
5120
5121                 if (eop) {
5122                         rxr->fmp->m_pkthdr.rcvif = ifp;
5123                         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
5124                         rxr->rx_packets++;
5125                         /* capture data for AIM */
5126                         rxr->packets++;
5127                         rxr->bytes += rxr->fmp->m_pkthdr.len;
5128                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5129
5130                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5131                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
5132
5133                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5134                             (staterr & E1000_RXD_STAT_VP) != 0) {
5135                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5136                                 rxr->fmp->m_flags |= M_VLANTAG;
5137                         }
5138 #ifdef  RSS
5139                         /* XXX set flowtype once this works right */
5140                         rxr->fmp->m_pkthdr.flowid = 
5141                             le32toh(cur->wb.lower.hi_dword.rss);
5142                         switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5143                         case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5144                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV4);
5145                                 break;
5146                         case E1000_RXDADV_RSSTYPE_IPV4:
5147                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV4);
5148                                 break;
5149                         case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5150                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6);
5151                                 break;
5152                         case E1000_RXDADV_RSSTYPE_IPV6_EX:
5153                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6_EX);
5154                                 break;
5155                         case E1000_RXDADV_RSSTYPE_IPV6:
5156                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6);
5157                                 break;
5158                         case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5159                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
5160                                 break;
5161
5162                         /* XXX no UDP support in RSS just yet */
5163 #ifdef notyet
5164                         case E1000_RXDADV_RSSTYPE_IPV4_UDP:
5165                         case E1000_RXDADV_RSSTYPE_IPV6_UDP:
5166                         case E1000_RXDADV_RSSTYPE_IPV6_UDP_EX:
5167 #endif
5168                         
5169                         default:
5170                                 /* XXX fallthrough */
5171                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5172                         }
5173 #elif !defined(IGB_LEGACY_TX)
5174                         rxr->fmp->m_pkthdr.flowid = que->msix;
5175                         M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5176 #endif
5177                         sendmp = rxr->fmp;
5178                         /* Make sure to set M_PKTHDR. */
5179                         sendmp->m_flags |= M_PKTHDR;
5180                         rxr->fmp = NULL;
5181                         rxr->lmp = NULL;
5182                 }
5183
5184 next_desc:
5185                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5186                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5187
5188                 /* Advance our pointers to the next descriptor. */
5189                 if (++i == adapter->num_rx_desc)
5190                         i = 0;
5191                 /*
5192                 ** Send to the stack or LRO
5193                 */
5194                 if (sendmp != NULL) {
5195                         rxr->next_to_check = i;
5196                         igb_rx_input(rxr, ifp, sendmp, ptype);
5197                         i = rxr->next_to_check;
5198                         rxdone++;
5199                 }
5200
5201                 /* Every 8 descriptors we go to refresh mbufs */
5202                 if (processed == 8) {
5203                         igb_refresh_mbufs(rxr, i);
5204                         processed = 0;
5205                 }
5206         }
5207
5208         /* Catch any remainders */
5209         if (igb_rx_unrefreshed(rxr))
5210                 igb_refresh_mbufs(rxr, i);
5211
5212         rxr->next_to_check = i;
5213
5214         /*
5215          * Flush any outstanding LRO work
5216          */
5217         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5218                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5219                 tcp_lro_flush(lro, queued);
5220         }
5221
5222         if (done != NULL)
5223                 *done += rxdone;
5224
5225         IGB_RX_UNLOCK(rxr);
5226         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5227 }
5228
5229 /*********************************************************************
5230  *
5231  *  Verify that the hardware indicated that the checksum is valid.
5232  *  Inform the stack about the status of checksum so that stack
5233  *  doesn't spend time verifying the checksum.
5234  *
5235  *********************************************************************/
5236 static void
5237 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5238 {
5239         u16 status = (u16)staterr;
5240         u8  errors = (u8) (staterr >> 24);
5241         int sctp;
5242
5243         /* Ignore Checksum bit is set */
5244         if (status & E1000_RXD_STAT_IXSM) {
5245                 mp->m_pkthdr.csum_flags = 0;
5246                 return;
5247         }
5248
5249         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5250             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5251                 sctp = 1;
5252         else
5253                 sctp = 0;
5254         if (status & E1000_RXD_STAT_IPCS) {
5255                 /* Did it pass? */
5256                 if (!(errors & E1000_RXD_ERR_IPE)) {
5257                         /* IP Checksum Good */
5258                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5259                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5260                 } else
5261                         mp->m_pkthdr.csum_flags = 0;
5262         }
5263
5264         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5265                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5266 #if __FreeBSD_version >= 800000
5267                 if (sctp) /* reassign */
5268                         type = CSUM_SCTP_VALID;
5269 #endif
5270                 /* Did it pass? */
5271                 if (!(errors & E1000_RXD_ERR_TCPE)) {
5272                         mp->m_pkthdr.csum_flags |= type;
5273                         if (sctp == 0)
5274                                 mp->m_pkthdr.csum_data = htons(0xffff);
5275                 }
5276         }
5277         return;
5278 }
5279
5280 /*
5281  * This routine is run via an vlan
5282  * config EVENT
5283  */
5284 static void
5285 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5286 {
5287         struct adapter  *adapter = ifp->if_softc;
5288         u32             index, bit;
5289
5290         if (ifp->if_softc !=  arg)   /* Not our event */
5291                 return;
5292
5293         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5294                 return;
5295
5296         IGB_CORE_LOCK(adapter);
5297         index = (vtag >> 5) & 0x7F;
5298         bit = vtag & 0x1F;
5299         adapter->shadow_vfta[index] |= (1 << bit);
5300         ++adapter->num_vlans;
5301         /* Change hw filter setting */
5302         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5303                 igb_setup_vlan_hw_support(adapter);
5304         IGB_CORE_UNLOCK(adapter);
5305 }
5306
5307 /*
5308  * This routine is run via an vlan
5309  * unconfig EVENT
5310  */
5311 static void
5312 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5313 {
5314         struct adapter  *adapter = ifp->if_softc;
5315         u32             index, bit;
5316
5317         if (ifp->if_softc !=  arg)
5318                 return;
5319
5320         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5321                 return;
5322
5323         IGB_CORE_LOCK(adapter);
5324         index = (vtag >> 5) & 0x7F;
5325         bit = vtag & 0x1F;
5326         adapter->shadow_vfta[index] &= ~(1 << bit);
5327         --adapter->num_vlans;
5328         /* Change hw filter setting */
5329         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5330                 igb_setup_vlan_hw_support(adapter);
5331         IGB_CORE_UNLOCK(adapter);
5332 }
5333
5334 static void
5335 igb_setup_vlan_hw_support(struct adapter *adapter)
5336 {
5337         struct e1000_hw *hw = &adapter->hw;
5338         struct ifnet    *ifp = adapter->ifp;
5339         u32             reg;
5340
5341         if (adapter->vf_ifp) {
5342                 e1000_rlpml_set_vf(hw,
5343                     adapter->max_frame_size + VLAN_TAG_SIZE);
5344                 return;
5345         }
5346
5347         reg = E1000_READ_REG(hw, E1000_CTRL);
5348         reg |= E1000_CTRL_VME;
5349         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5350
5351         /* Enable the Filter Table */
5352         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5353                 reg = E1000_READ_REG(hw, E1000_RCTL);
5354                 reg &= ~E1000_RCTL_CFIEN;
5355                 reg |= E1000_RCTL_VFE;
5356                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5357         }
5358
5359         /* Update the frame size */
5360         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5361             adapter->max_frame_size + VLAN_TAG_SIZE);
5362
5363         /* Don't bother with table if no vlans */
5364         if ((adapter->num_vlans == 0) ||
5365             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5366                 return;
5367         /*
5368         ** A soft reset zero's out the VFTA, so
5369         ** we need to repopulate it now.
5370         */
5371         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5372                 if (adapter->shadow_vfta[i] != 0) {
5373                         if (adapter->vf_ifp)
5374                                 e1000_vfta_set_vf(hw,
5375                                     adapter->shadow_vfta[i], TRUE);
5376                         else
5377                                 e1000_write_vfta(hw,
5378                                     i, adapter->shadow_vfta[i]);
5379                 }
5380 }
5381
5382 static void
5383 igb_enable_intr(struct adapter *adapter)
5384 {
5385         /* With RSS set up what to auto clear */
5386         if (adapter->msix_mem) {
5387                 u32 mask = (adapter->que_mask | adapter->link_mask);
5388                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5389                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5390                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5391                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5392                     E1000_IMS_LSC);
5393         } else {
5394                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5395                     IMS_ENABLE_MASK);
5396         }
5397         E1000_WRITE_FLUSH(&adapter->hw);
5398
5399         return;
5400 }
5401
5402 static void
5403 igb_disable_intr(struct adapter *adapter)
5404 {
5405         if (adapter->msix_mem) {
5406                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5407                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5408         } 
5409         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5410         E1000_WRITE_FLUSH(&adapter->hw);
5411         return;
5412 }
5413
5414 /*
5415  * Bit of a misnomer, what this really means is
5416  * to enable OS management of the system... aka
5417  * to disable special hardware management features 
5418  */
5419 static void
5420 igb_init_manageability(struct adapter *adapter)
5421 {
5422         if (adapter->has_manage) {
5423                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5424                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5425
5426                 /* disable hardware interception of ARP */
5427                 manc &= ~(E1000_MANC_ARP_EN);
5428
5429                 /* enable receiving management packets to the host */
5430                 manc |= E1000_MANC_EN_MNG2HOST;
5431                 manc2h |= 1 << 5;  /* Mng Port 623 */
5432                 manc2h |= 1 << 6;  /* Mng Port 664 */
5433                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5434                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5435         }
5436 }
5437
5438 /*
5439  * Give control back to hardware management
5440  * controller if there is one.
5441  */
5442 static void
5443 igb_release_manageability(struct adapter *adapter)
5444 {
5445         if (adapter->has_manage) {
5446                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5447
5448                 /* re-enable hardware interception of ARP */
5449                 manc |= E1000_MANC_ARP_EN;
5450                 manc &= ~E1000_MANC_EN_MNG2HOST;
5451
5452                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5453         }
5454 }
5455
5456 /*
5457  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5458  * For ASF and Pass Through versions of f/w this means that
5459  * the driver is loaded. 
5460  *
5461  */
5462 static void
5463 igb_get_hw_control(struct adapter *adapter)
5464 {
5465         u32 ctrl_ext;
5466
5467         if (adapter->vf_ifp)
5468                 return;
5469
5470         /* Let firmware know the driver has taken over */
5471         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5472         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5473             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5474 }
5475
5476 /*
5477  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5478  * For ASF and Pass Through versions of f/w this means that the
5479  * driver is no longer loaded.
5480  *
5481  */
5482 static void
5483 igb_release_hw_control(struct adapter *adapter)
5484 {
5485         u32 ctrl_ext;
5486
5487         if (adapter->vf_ifp)
5488                 return;
5489
5490         /* Let firmware taken over control of h/w */
5491         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5492         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5493             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5494 }
5495
5496 static int
5497 igb_is_valid_ether_addr(uint8_t *addr)
5498 {
5499         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5500
5501         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5502                 return (FALSE);
5503         }
5504
5505         return (TRUE);
5506 }
5507
5508
5509 /*
5510  * Enable PCI Wake On Lan capability
5511  */
5512 static void
5513 igb_enable_wakeup(device_t dev)
5514 {
5515         u16     cap, status;
5516         u8      id;
5517
5518         /* First find the capabilities pointer*/
5519         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5520         /* Read the PM Capabilities */
5521         id = pci_read_config(dev, cap, 1);
5522         if (id != PCIY_PMG)     /* Something wrong */
5523                 return;
5524         /* OK, we have the power capabilities, so
5525            now get the status register */
5526         cap += PCIR_POWER_STATUS;
5527         status = pci_read_config(dev, cap, 2);
5528         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5529         pci_write_config(dev, cap, status, 2);
5530         return;
5531 }
5532
5533 static void
5534 igb_led_func(void *arg, int onoff)
5535 {
5536         struct adapter  *adapter = arg;
5537
5538         IGB_CORE_LOCK(adapter);
5539         if (onoff) {
5540                 e1000_setup_led(&adapter->hw);
5541                 e1000_led_on(&adapter->hw);
5542         } else {
5543                 e1000_led_off(&adapter->hw);
5544                 e1000_cleanup_led(&adapter->hw);
5545         }
5546         IGB_CORE_UNLOCK(adapter);
5547 }
5548
5549 static uint64_t
5550 igb_get_counter(if_t ifp, ift_counter cnt)
5551 {
5552         struct adapter *adapter;
5553         struct e1000_hw_stats *stats;
5554
5555         adapter = if_getsoftc(ifp);
5556         stats = (struct e1000_hw_stats *)adapter->stats;
5557
5558         switch (cnt) {
5559         case IFCOUNTER_IERRORS:
5560                 return (adapter->dropped_pkts + stats->rxerrc +
5561                     stats->crcerrs + stats->algnerrc +
5562                     stats->ruc + stats->roc + stats->mpc + stats->cexterr);
5563         case IFCOUNTER_OERRORS:
5564                 return (stats->ecol + stats->latecol +
5565                     adapter->watchdog_events);
5566         case IFCOUNTER_COLLISIONS:
5567                 return (stats->colc);
5568         default:
5569                 return (if_get_counter_default(ifp, cnt));
5570         }
5571 }
5572
5573 /**********************************************************************
5574  *
5575  *  Update the board statistics counters.
5576  *
5577  **********************************************************************/
5578 static void
5579 igb_update_stats_counters(struct adapter *adapter)
5580 {
5581         struct e1000_hw         *hw = &adapter->hw;
5582         struct e1000_hw_stats   *stats;
5583
5584         /* 
5585         ** The virtual function adapter has only a
5586         ** small controlled set of stats, do only 
5587         ** those and return.
5588         */
5589         if (adapter->vf_ifp) {
5590                 igb_update_vf_stats_counters(adapter);
5591                 return;
5592         }
5593
5594         stats = (struct e1000_hw_stats  *)adapter->stats;
5595
5596         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5597            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5598                 stats->symerrs +=
5599                     E1000_READ_REG(hw,E1000_SYMERRS);
5600                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5601         }
5602
5603         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5604         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5605         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5606         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5607
5608         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5609         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5610         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5611         stats->dc += E1000_READ_REG(hw, E1000_DC);
5612         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5613         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5614         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5615         /*
5616         ** For watchdog management we need to know if we have been
5617         ** paused during the last interval, so capture that here.
5618         */ 
5619         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5620         stats->xoffrxc += adapter->pause_frames;
5621         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5622         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5623         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5624         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5625         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5626         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5627         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5628         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5629         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5630         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5631         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5632         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5633
5634         /* For the 64-bit byte counters the low dword must be read first. */
5635         /* Both registers clear on the read of the high dword */
5636
5637         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5638             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5639         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5640             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5641
5642         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5643         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5644         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5645         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5646         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5647
5648         stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5649         stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5650         stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5651
5652         stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5653             ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5654         stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5655             ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5656
5657         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5658         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5659         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5660         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5661         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5662         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5663         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5664         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5665         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5666         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5667
5668         /* Interrupt Counts */
5669
5670         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5671         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5672         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5673         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5674         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5675         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5676         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5677         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5678         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5679
5680         /* Host to Card Statistics */
5681
5682         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5683         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5684         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5685         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5686         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5687         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5688         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5689         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5690             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5691         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5692             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5693         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5694         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5695         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5696
5697         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5698         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5699         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5700         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5701         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5702         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5703
5704         /* Driver specific counters */
5705         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5706         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5707         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5708         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5709         adapter->packet_buf_alloc_tx =
5710             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5711         adapter->packet_buf_alloc_rx =
5712             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5713 }
5714
5715
5716 /**********************************************************************
5717  *
5718  *  Initialize the VF board statistics counters.
5719  *
5720  **********************************************************************/
5721 static void
5722 igb_vf_init_stats(struct adapter *adapter)
5723 {
5724         struct e1000_hw *hw = &adapter->hw;
5725         struct e1000_vf_stats   *stats;
5726
5727         stats = (struct e1000_vf_stats  *)adapter->stats;
5728         if (stats == NULL)
5729                 return;
5730         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5731         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5732         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5733         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5734         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5735 }
5736  
5737 /**********************************************************************
5738  *
5739  *  Update the VF board statistics counters.
5740  *
5741  **********************************************************************/
5742 static void
5743 igb_update_vf_stats_counters(struct adapter *adapter)
5744 {
5745         struct e1000_hw *hw = &adapter->hw;
5746         struct e1000_vf_stats   *stats;
5747
5748         if (adapter->link_speed == 0)
5749                 return;
5750
5751         stats = (struct e1000_vf_stats  *)adapter->stats;
5752
5753         UPDATE_VF_REG(E1000_VFGPRC,
5754             stats->last_gprc, stats->gprc);
5755         UPDATE_VF_REG(E1000_VFGORC,
5756             stats->last_gorc, stats->gorc);
5757         UPDATE_VF_REG(E1000_VFGPTC,
5758             stats->last_gptc, stats->gptc);
5759         UPDATE_VF_REG(E1000_VFGOTC,
5760             stats->last_gotc, stats->gotc);
5761         UPDATE_VF_REG(E1000_VFMPRC,
5762             stats->last_mprc, stats->mprc);
5763 }
5764
5765 /* Export a single 32-bit register via a read-only sysctl. */
5766 static int
5767 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5768 {
5769         struct adapter *adapter;
5770         u_int val;
5771
5772         adapter = oidp->oid_arg1;
5773         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5774         return (sysctl_handle_int(oidp, &val, 0, req));
5775 }
5776
5777 /*
5778 **  Tuneable interrupt rate handler
5779 */
5780 static int
5781 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5782 {
5783         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5784         int                     error;
5785         u32                     reg, usec, rate;
5786                         
5787         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5788         usec = ((reg & 0x7FFC) >> 2);
5789         if (usec > 0)
5790                 rate = 1000000 / usec;
5791         else
5792                 rate = 0;
5793         error = sysctl_handle_int(oidp, &rate, 0, req);
5794         if (error || !req->newptr)
5795                 return error;
5796         return 0;
5797 }
5798
5799 /*
5800  * Add sysctl variables, one per statistic, to the system.
5801  */
5802 static void
5803 igb_add_hw_stats(struct adapter *adapter)
5804 {
5805         device_t dev = adapter->dev;
5806
5807         struct tx_ring *txr = adapter->tx_rings;
5808         struct rx_ring *rxr = adapter->rx_rings;
5809
5810         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5811         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5812         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5813         struct e1000_hw_stats *stats = adapter->stats;
5814
5815         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5816         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5817
5818 #define QUEUE_NAME_LEN 32
5819         char namebuf[QUEUE_NAME_LEN];
5820
5821         /* Driver Statistics */
5822         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 
5823                         CTLFLAG_RD, &adapter->link_irq,
5824                         "Link MSIX IRQ Handled");
5825         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5826                         CTLFLAG_RD, &adapter->dropped_pkts,
5827                         "Driver dropped packets");
5828         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5829                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5830                         "Driver tx dma failure in xmit");
5831         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5832                         CTLFLAG_RD, &adapter->rx_overruns,
5833                         "RX overruns");
5834         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5835                         CTLFLAG_RD, &adapter->watchdog_events,
5836                         "Watchdog timeouts");
5837
5838         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5839                         CTLFLAG_RD, &adapter->device_control,
5840                         "Device Control Register");
5841         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5842                         CTLFLAG_RD, &adapter->rx_control,
5843                         "Receiver Control Register");
5844         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5845                         CTLFLAG_RD, &adapter->int_mask,
5846                         "Interrupt Mask");
5847         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5848                         CTLFLAG_RD, &adapter->eint_mask,
5849                         "Extended Interrupt Mask");
5850         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5851                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5852                         "Transmit Buffer Packet Allocation");
5853         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5854                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5855                         "Receive Buffer Packet Allocation");
5856         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5857                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5858                         "Flow Control High Watermark");
5859         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5860                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5861                         "Flow Control Low Watermark");
5862
5863         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5864                 struct lro_ctrl *lro = &rxr->lro;
5865
5866                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5867                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5868                                             CTLFLAG_RD, NULL, "Queue Name");
5869                 queue_list = SYSCTL_CHILDREN(queue_node);
5870
5871                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5872                                 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5873                                 sizeof(&adapter->queues[i]),
5874                                 igb_sysctl_interrupt_rate_handler,
5875                                 "IU", "Interrupt Rate");
5876
5877                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5878                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5879                                 igb_sysctl_reg_handler, "IU",
5880                                 "Transmit Descriptor Head");
5881                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5882                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5883                                 igb_sysctl_reg_handler, "IU",
5884                                 "Transmit Descriptor Tail");
5885                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5886                                 CTLFLAG_RD, &txr->no_desc_avail,
5887                                 "Queue Descriptors Unavailable");
5888                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5889                                 CTLFLAG_RD, &txr->total_packets,
5890                                 "Queue Packets Transmitted");
5891
5892                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5893                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5894                                 igb_sysctl_reg_handler, "IU",
5895                                 "Receive Descriptor Head");
5896                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5897                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5898                                 igb_sysctl_reg_handler, "IU",
5899                                 "Receive Descriptor Tail");
5900                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5901                                 CTLFLAG_RD, &rxr->rx_packets,
5902                                 "Queue Packets Received");
5903                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5904                                 CTLFLAG_RD, &rxr->rx_bytes,
5905                                 "Queue Bytes Received");
5906                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5907                                 CTLFLAG_RD, &lro->lro_queued, 0,
5908                                 "LRO Queued");
5909                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5910                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5911                                 "LRO Flushed");
5912         }
5913
5914         /* MAC stats get their own sub node */
5915
5916         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5917                                     CTLFLAG_RD, NULL, "MAC Statistics");
5918         stat_list = SYSCTL_CHILDREN(stat_node);
5919
5920         /*
5921         ** VF adapter has a very limited set of stats
5922         ** since its not managing the metal, so to speak.
5923         */
5924         if (adapter->vf_ifp) {
5925         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5926                         CTLFLAG_RD, &stats->gprc,
5927                         "Good Packets Received");
5928         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5929                         CTLFLAG_RD, &stats->gptc,
5930                         "Good Packets Transmitted");
5931         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5932                         CTLFLAG_RD, &stats->gorc, 
5933                         "Good Octets Received"); 
5934         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5935                         CTLFLAG_RD, &stats->gotc, 
5936                         "Good Octets Transmitted"); 
5937         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5938                         CTLFLAG_RD, &stats->mprc,
5939                         "Multicast Packets Received");
5940                 return;
5941         }
5942
5943         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5944                         CTLFLAG_RD, &stats->ecol,
5945                         "Excessive collisions");
5946         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5947                         CTLFLAG_RD, &stats->scc,
5948                         "Single collisions");
5949         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5950                         CTLFLAG_RD, &stats->mcc,
5951                         "Multiple collisions");
5952         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5953                         CTLFLAG_RD, &stats->latecol,
5954                         "Late collisions");
5955         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5956                         CTLFLAG_RD, &stats->colc,
5957                         "Collision Count");
5958         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5959                         CTLFLAG_RD, &stats->symerrs,
5960                         "Symbol Errors");
5961         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5962                         CTLFLAG_RD, &stats->sec,
5963                         "Sequence Errors");
5964         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5965                         CTLFLAG_RD, &stats->dc,
5966                         "Defer Count");
5967         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5968                         CTLFLAG_RD, &stats->mpc,
5969                         "Missed Packets");
5970         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5971                         CTLFLAG_RD, &stats->rlec,
5972                         "Receive Length Errors");
5973         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5974                         CTLFLAG_RD, &stats->rnbc,
5975                         "Receive No Buffers");
5976         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5977                         CTLFLAG_RD, &stats->ruc,
5978                         "Receive Undersize");
5979         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5980                         CTLFLAG_RD, &stats->rfc,
5981                         "Fragmented Packets Received");
5982         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5983                         CTLFLAG_RD, &stats->roc,
5984                         "Oversized Packets Received");
5985         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5986                         CTLFLAG_RD, &stats->rjc,
5987                         "Recevied Jabber");
5988         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5989                         CTLFLAG_RD, &stats->rxerrc,
5990                         "Receive Errors");
5991         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5992                         CTLFLAG_RD, &stats->crcerrs,
5993                         "CRC errors");
5994         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5995                         CTLFLAG_RD, &stats->algnerrc,
5996                         "Alignment Errors");
5997         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
5998                         CTLFLAG_RD, &stats->tncrs,
5999                         "Transmit with No CRS");
6000         /* On 82575 these are collision counts */
6001         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6002                         CTLFLAG_RD, &stats->cexterr,
6003                         "Collision/Carrier extension errors");
6004         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6005                         CTLFLAG_RD, &stats->xonrxc,
6006                         "XON Received");
6007         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6008                         CTLFLAG_RD, &stats->xontxc,
6009                         "XON Transmitted");
6010         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6011                         CTLFLAG_RD, &stats->xoffrxc,
6012                         "XOFF Received");
6013         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6014                         CTLFLAG_RD, &stats->xofftxc,
6015                         "XOFF Transmitted");
6016         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6017                         CTLFLAG_RD, &stats->fcruc,
6018                         "Unsupported Flow Control Received");
6019         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6020                         CTLFLAG_RD, &stats->mgprc,
6021                         "Management Packets Received");
6022         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6023                         CTLFLAG_RD, &stats->mgpdc,
6024                         "Management Packets Dropped");
6025         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6026                         CTLFLAG_RD, &stats->mgptc,
6027                         "Management Packets Transmitted");
6028         /* Packet Reception Stats */
6029         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6030                         CTLFLAG_RD, &stats->tpr,
6031                         "Total Packets Received");
6032         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6033                         CTLFLAG_RD, &stats->gprc,
6034                         "Good Packets Received");
6035         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6036                         CTLFLAG_RD, &stats->bprc,
6037                         "Broadcast Packets Received");
6038         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6039                         CTLFLAG_RD, &stats->mprc,
6040                         "Multicast Packets Received");
6041         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6042                         CTLFLAG_RD, &stats->prc64,
6043                         "64 byte frames received");
6044         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6045                         CTLFLAG_RD, &stats->prc127,
6046                         "65-127 byte frames received");
6047         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6048                         CTLFLAG_RD, &stats->prc255,
6049                         "128-255 byte frames received");
6050         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6051                         CTLFLAG_RD, &stats->prc511,
6052                         "256-511 byte frames received");
6053         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6054                         CTLFLAG_RD, &stats->prc1023,
6055                         "512-1023 byte frames received");
6056         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6057                         CTLFLAG_RD, &stats->prc1522,
6058                         "1023-1522 byte frames received");
6059         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
6060                         CTLFLAG_RD, &stats->gorc, 
6061                         "Good Octets Received");
6062         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 
6063                         CTLFLAG_RD, &stats->tor, 
6064                         "Total Octets Received");
6065
6066         /* Packet Transmission Stats */
6067         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
6068                         CTLFLAG_RD, &stats->gotc, 
6069                         "Good Octets Transmitted"); 
6070         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 
6071                         CTLFLAG_RD, &stats->tot, 
6072                         "Total Octets Transmitted");
6073         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6074                         CTLFLAG_RD, &stats->tpt,
6075                         "Total Packets Transmitted");
6076         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6077                         CTLFLAG_RD, &stats->gptc,
6078                         "Good Packets Transmitted");
6079         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6080                         CTLFLAG_RD, &stats->bptc,
6081                         "Broadcast Packets Transmitted");
6082         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6083                         CTLFLAG_RD, &stats->mptc,
6084                         "Multicast Packets Transmitted");
6085         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6086                         CTLFLAG_RD, &stats->ptc64,
6087                         "64 byte frames transmitted");
6088         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6089                         CTLFLAG_RD, &stats->ptc127,
6090                         "65-127 byte frames transmitted");
6091         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6092                         CTLFLAG_RD, &stats->ptc255,
6093                         "128-255 byte frames transmitted");
6094         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6095                         CTLFLAG_RD, &stats->ptc511,
6096                         "256-511 byte frames transmitted");
6097         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6098                         CTLFLAG_RD, &stats->ptc1023,
6099                         "512-1023 byte frames transmitted");
6100         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6101                         CTLFLAG_RD, &stats->ptc1522,
6102                         "1024-1522 byte frames transmitted");
6103         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6104                         CTLFLAG_RD, &stats->tsctc,
6105                         "TSO Contexts Transmitted");
6106         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6107                         CTLFLAG_RD, &stats->tsctfc,
6108                         "TSO Contexts Failed");
6109
6110
6111         /* Interrupt Stats */
6112
6113         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
6114                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
6115         int_list = SYSCTL_CHILDREN(int_node);
6116
6117         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6118                         CTLFLAG_RD, &stats->iac,
6119                         "Interrupt Assertion Count");
6120
6121         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6122                         CTLFLAG_RD, &stats->icrxptc,
6123                         "Interrupt Cause Rx Pkt Timer Expire Count");
6124
6125         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6126                         CTLFLAG_RD, &stats->icrxatc,
6127                         "Interrupt Cause Rx Abs Timer Expire Count");
6128
6129         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6130                         CTLFLAG_RD, &stats->ictxptc,
6131                         "Interrupt Cause Tx Pkt Timer Expire Count");
6132
6133         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6134                         CTLFLAG_RD, &stats->ictxatc,
6135                         "Interrupt Cause Tx Abs Timer Expire Count");
6136
6137         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6138                         CTLFLAG_RD, &stats->ictxqec,
6139                         "Interrupt Cause Tx Queue Empty Count");
6140
6141         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6142                         CTLFLAG_RD, &stats->ictxqmtc,
6143                         "Interrupt Cause Tx Queue Min Thresh Count");
6144
6145         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6146                         CTLFLAG_RD, &stats->icrxdmtc,
6147                         "Interrupt Cause Rx Desc Min Thresh Count");
6148
6149         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6150                         CTLFLAG_RD, &stats->icrxoc,
6151                         "Interrupt Cause Receiver Overrun Count");
6152
6153         /* Host to Card Stats */
6154
6155         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
6156                                     CTLFLAG_RD, NULL, 
6157                                     "Host to Card Statistics");
6158
6159         host_list = SYSCTL_CHILDREN(host_node);
6160
6161         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6162                         CTLFLAG_RD, &stats->cbtmpc,
6163                         "Circuit Breaker Tx Packet Count");
6164
6165         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6166                         CTLFLAG_RD, &stats->htdpmc,
6167                         "Host Transmit Discarded Packets");
6168
6169         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6170                         CTLFLAG_RD, &stats->rpthc,
6171                         "Rx Packets To Host");
6172
6173         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6174                         CTLFLAG_RD, &stats->cbrmpc,
6175                         "Circuit Breaker Rx Packet Count");
6176
6177         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6178                         CTLFLAG_RD, &stats->cbrdpc,
6179                         "Circuit Breaker Rx Dropped Count");
6180
6181         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6182                         CTLFLAG_RD, &stats->hgptc,
6183                         "Host Good Packets Tx Count");
6184
6185         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6186                         CTLFLAG_RD, &stats->htcbdpc,
6187                         "Host Tx Circuit Breaker Dropped Count");
6188
6189         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6190                         CTLFLAG_RD, &stats->hgorc,
6191                         "Host Good Octets Received Count");
6192
6193         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6194                         CTLFLAG_RD, &stats->hgotc,
6195                         "Host Good Octets Transmit Count");
6196
6197         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6198                         CTLFLAG_RD, &stats->lenerrs,
6199                         "Length Errors");
6200
6201         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6202                         CTLFLAG_RD, &stats->scvpc,
6203                         "SerDes/SGMII Code Violation Pkt Count");
6204
6205         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6206                         CTLFLAG_RD, &stats->hrmpc,
6207                         "Header Redirection Missed Packet Count");
6208 }
6209
6210
6211 /**********************************************************************
6212  *
6213  *  This routine provides a way to dump out the adapter eeprom,
6214  *  often a useful debug/service tool. This only dumps the first
6215  *  32 words, stuff that matters is in that extent.
6216  *
6217  **********************************************************************/
6218 static int
6219 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6220 {
6221         struct adapter *adapter;
6222         int error;
6223         int result;
6224
6225         result = -1;
6226         error = sysctl_handle_int(oidp, &result, 0, req);
6227
6228         if (error || !req->newptr)
6229                 return (error);
6230
6231         /*
6232          * This value will cause a hex dump of the
6233          * first 32 16-bit words of the EEPROM to
6234          * the screen.
6235          */
6236         if (result == 1) {
6237                 adapter = (struct adapter *)arg1;
6238                 igb_print_nvm_info(adapter);
6239         }
6240
6241         return (error);
6242 }
6243
6244 static void
6245 igb_print_nvm_info(struct adapter *adapter)
6246 {
6247         u16     eeprom_data;
6248         int     i, j, row = 0;
6249
6250         /* Its a bit crude, but it gets the job done */
6251         printf("\nInterface EEPROM Dump:\n");
6252         printf("Offset\n0x0000  ");
6253         for (i = 0, j = 0; i < 32; i++, j++) {
6254                 if (j == 8) { /* Make the offset block */
6255                         j = 0; ++row;
6256                         printf("\n0x00%x0  ",row);
6257                 }
6258                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6259                 printf("%04x ", eeprom_data);
6260         }
6261         printf("\n");
6262 }
6263
6264 static void
6265 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6266         const char *description, int *limit, int value)
6267 {
6268         *limit = value;
6269         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6270             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6271             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6272 }
6273
6274 /*
6275 ** Set flow control using sysctl:
6276 ** Flow control values:
6277 **      0 - off
6278 **      1 - rx pause
6279 **      2 - tx pause
6280 **      3 - full
6281 */
6282 static int
6283 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6284 {
6285         int             error;
6286         static int      input = 3; /* default is full */
6287         struct adapter  *adapter = (struct adapter *) arg1;
6288
6289         error = sysctl_handle_int(oidp, &input, 0, req);
6290
6291         if ((error) || (req->newptr == NULL))
6292                 return (error);
6293
6294         switch (input) {
6295                 case e1000_fc_rx_pause:
6296                 case e1000_fc_tx_pause:
6297                 case e1000_fc_full:
6298                 case e1000_fc_none:
6299                         adapter->hw.fc.requested_mode = input;
6300                         adapter->fc = input;
6301                         break;
6302                 default:
6303                         /* Do nothing */
6304                         return (error);
6305         }
6306
6307         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6308         e1000_force_mac_fc(&adapter->hw);
6309         /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6310         return (error);
6311 }
6312
6313 /*
6314 ** Manage DMA Coalesce:
6315 ** Control values:
6316 **      0/1 - off/on
6317 **      Legal timer values are:
6318 **      250,500,1000-10000 in thousands
6319 */
6320 static int
6321 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6322 {
6323         struct adapter *adapter = (struct adapter *) arg1;
6324         int             error;
6325
6326         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6327
6328         if ((error) || (req->newptr == NULL))
6329                 return (error);
6330
6331         switch (adapter->dmac) {
6332                 case 0:
6333                         /*Disabling */
6334                         break;
6335                 case 1: /* Just enable and use default */
6336                         adapter->dmac = 1000;
6337                         break;
6338                 case 250:
6339                 case 500:
6340                 case 1000:
6341                 case 2000:
6342                 case 3000:
6343                 case 4000:
6344                 case 5000:
6345                 case 6000:
6346                 case 7000:
6347                 case 8000:
6348                 case 9000:
6349                 case 10000:
6350                         /* Legal values - allow */
6351                         break;
6352                 default:
6353                         /* Do nothing, illegal value */
6354                         adapter->dmac = 0;
6355                         return (EINVAL);
6356         }
6357         /* Reinit the interface */
6358         igb_init(adapter);
6359         return (error);
6360 }
6361
6362 /*
6363 ** Manage Energy Efficient Ethernet:
6364 ** Control values:
6365 **     0/1 - enabled/disabled
6366 */
6367 static int
6368 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6369 {
6370         struct adapter  *adapter = (struct adapter *) arg1;
6371         int             error, value;
6372
6373         value = adapter->hw.dev_spec._82575.eee_disable;
6374         error = sysctl_handle_int(oidp, &value, 0, req);
6375         if (error || req->newptr == NULL)
6376                 return (error);
6377         IGB_CORE_LOCK(adapter);
6378         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6379         igb_init_locked(adapter);
6380         IGB_CORE_UNLOCK(adapter);
6381         return (0);
6382 }