]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/dev/e1000/if_igb.c
MFC r256500:
[FreeBSD/stable/9.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38
39 #ifdef HAVE_KERNEL_OPTION_HEADERS
40 #include "opt_device_polling.h"
41 #include "opt_altq.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifndef IGB_LEGACY_TX
47 #include <sys/buf_ring.h>
48 #endif
49 #include <sys/bus.h>
50 #include <sys/endian.h>
51 #include <sys/kernel.h>
52 #include <sys/kthread.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/module.h>
56 #include <sys/rman.h>
57 #include <sys/socket.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/taskqueue.h>
61 #include <sys/eventhandler.h>
62 #include <sys/pcpu.h>
63 #include <sys/smp.h>
64 #include <machine/smp.h>
65 #include <machine/bus.h>
66 #include <machine/resource.h>
67
68 #include <net/bpf.h>
69 #include <net/ethernet.h>
70 #include <net/if.h>
71 #include <net/if_arp.h>
72 #include <net/if_dl.h>
73 #include <net/if_media.h>
74
75 #include <net/if_types.h>
76 #include <net/if_vlan_var.h>
77
78 #include <netinet/in_systm.h>
79 #include <netinet/in.h>
80 #include <netinet/if_ether.h>
81 #include <netinet/ip.h>
82 #include <netinet/ip6.h>
83 #include <netinet/tcp.h>
84 #include <netinet/tcp_lro.h>
85 #include <netinet/udp.h>
86
87 #include <machine/in_cksum.h>
88 #include <dev/led/led.h>
89 #include <dev/pci/pcivar.h>
90 #include <dev/pci/pcireg.h>
91
92 #include "e1000_api.h"
93 #include "e1000_82575.h"
94 #include "if_igb.h"
95
96 /*********************************************************************
97  *  Set this to one to display debug statistics
98  *********************************************************************/
99 int     igb_display_debug_stats = 0;
100
101 /*********************************************************************
102  *  Driver version:
103  *********************************************************************/
104 char igb_driver_version[] = "version - 2.3.10";
105
106
107 /*********************************************************************
108  *  PCI Device ID Table
109  *
110  *  Used by probe to select devices to load on
111  *  Last field stores an index into e1000_strings
112  *  Last entry must be all 0s
113  *
114  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115  *********************************************************************/
116
117 static igb_vendor_info_t igb_vendor_info_array[] =
118 {
119         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
134                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
148                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_I210_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_I210_COPPER_IT,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
157                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_I210_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_I210_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_I210_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_I211_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
162         /* required last entry */
163         { 0, 0, 0, 0, 0}
164 };
165
166 /*********************************************************************
167  *  Table of branding strings for all supported NICs.
168  *********************************************************************/
169
170 static char *igb_strings[] = {
171         "Intel(R) PRO/1000 Network Connection"
172 };
173
174 /*********************************************************************
175  *  Function prototypes
176  *********************************************************************/
177 static int      igb_probe(device_t);
178 static int      igb_attach(device_t);
179 static int      igb_detach(device_t);
180 static int      igb_shutdown(device_t);
181 static int      igb_suspend(device_t);
182 static int      igb_resume(device_t);
183 #ifndef IGB_LEGACY_TX
184 static int      igb_mq_start(struct ifnet *, struct mbuf *);
185 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
186 static void     igb_qflush(struct ifnet *);
187 static void     igb_deferred_mq_start(void *, int);
188 #else
189 static void     igb_start(struct ifnet *);
190 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191 #endif
192 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
193 static void     igb_init(void *);
194 static void     igb_init_locked(struct adapter *);
195 static void     igb_stop(void *);
196 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
197 static int      igb_media_change(struct ifnet *);
198 static void     igb_identify_hardware(struct adapter *);
199 static int      igb_allocate_pci_resources(struct adapter *);
200 static int      igb_allocate_msix(struct adapter *);
201 static int      igb_allocate_legacy(struct adapter *);
202 static int      igb_setup_msix(struct adapter *);
203 static void     igb_free_pci_resources(struct adapter *);
204 static void     igb_local_timer(void *);
205 static void     igb_reset(struct adapter *);
206 static int      igb_setup_interface(device_t, struct adapter *);
207 static int      igb_allocate_queues(struct adapter *);
208 static void     igb_configure_queues(struct adapter *);
209
210 static int      igb_allocate_transmit_buffers(struct tx_ring *);
211 static void     igb_setup_transmit_structures(struct adapter *);
212 static void     igb_setup_transmit_ring(struct tx_ring *);
213 static void     igb_initialize_transmit_units(struct adapter *);
214 static void     igb_free_transmit_structures(struct adapter *);
215 static void     igb_free_transmit_buffers(struct tx_ring *);
216
217 static int      igb_allocate_receive_buffers(struct rx_ring *);
218 static int      igb_setup_receive_structures(struct adapter *);
219 static int      igb_setup_receive_ring(struct rx_ring *);
220 static void     igb_initialize_receive_units(struct adapter *);
221 static void     igb_free_receive_structures(struct adapter *);
222 static void     igb_free_receive_buffers(struct rx_ring *);
223 static void     igb_free_receive_ring(struct rx_ring *);
224
225 static void     igb_enable_intr(struct adapter *);
226 static void     igb_disable_intr(struct adapter *);
227 static void     igb_update_stats_counters(struct adapter *);
228 static bool     igb_txeof(struct tx_ring *);
229
230 static __inline void igb_rx_discard(struct rx_ring *, int);
231 static __inline void igb_rx_input(struct rx_ring *,
232                     struct ifnet *, struct mbuf *, u32);
233
234 static bool     igb_rxeof(struct igb_queue *, int, int *);
235 static void     igb_rx_checksum(u32, struct mbuf *, u32);
236 static bool     igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238                     struct ip *, struct tcphdr *);
239 static void     igb_set_promisc(struct adapter *);
240 static void     igb_disable_promisc(struct adapter *);
241 static void     igb_set_multi(struct adapter *);
242 static void     igb_update_link_status(struct adapter *);
243 static void     igb_refresh_mbufs(struct rx_ring *, int);
244
245 static void     igb_register_vlan(void *, struct ifnet *, u16);
246 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
247 static void     igb_setup_vlan_hw_support(struct adapter *);
248
249 static int      igb_xmit(struct tx_ring *, struct mbuf **);
250 static int      igb_dma_malloc(struct adapter *, bus_size_t,
251                     struct igb_dma_alloc *, int);
252 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254 static void     igb_print_nvm_info(struct adapter *);
255 static int      igb_is_valid_ether_addr(u8 *);
256 static void     igb_add_hw_stats(struct adapter *);
257
258 static void     igb_vf_init_stats(struct adapter *);
259 static void     igb_update_vf_stats_counters(struct adapter *);
260
261 /* Management and WOL Support */
262 static void     igb_init_manageability(struct adapter *);
263 static void     igb_release_manageability(struct adapter *);
264 static void     igb_get_hw_control(struct adapter *);
265 static void     igb_release_hw_control(struct adapter *);
266 static void     igb_enable_wakeup(device_t);
267 static void     igb_led_func(void *, int);
268
269 static int      igb_irq_fast(void *);
270 static void     igb_msix_que(void *);
271 static void     igb_msix_link(void *);
272 static void     igb_handle_que(void *context, int pending);
273 static void     igb_handle_link(void *context, int pending);
274 static void     igb_handle_link_locked(struct adapter *);
275
276 static void     igb_set_sysctl_value(struct adapter *, const char *,
277                     const char *, int *, int);
278 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282 #ifdef DEVICE_POLLING
283 static poll_handler_t igb_poll;
284 #endif /* POLLING */
285
286 /*********************************************************************
287  *  FreeBSD Device Interface Entry Points
288  *********************************************************************/
289
290 static device_method_t igb_methods[] = {
291         /* Device interface */
292         DEVMETHOD(device_probe, igb_probe),
293         DEVMETHOD(device_attach, igb_attach),
294         DEVMETHOD(device_detach, igb_detach),
295         DEVMETHOD(device_shutdown, igb_shutdown),
296         DEVMETHOD(device_suspend, igb_suspend),
297         DEVMETHOD(device_resume, igb_resume),
298         DEVMETHOD_END
299 };
300
301 static driver_t igb_driver = {
302         "igb", igb_methods, sizeof(struct adapter),
303 };
304
305 static devclass_t igb_devclass;
306 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307 MODULE_DEPEND(igb, pci, 1, 1, 1);
308 MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310 /*********************************************************************
311  *  Tunable default values.
312  *********************************************************************/
313
314 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316 /* Descriptor defaults */
317 static int igb_rxd = IGB_DEFAULT_RXD;
318 static int igb_txd = IGB_DEFAULT_TXD;
319 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320 TUNABLE_INT("hw.igb.txd", &igb_txd);
321 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322     "Number of receive descriptors per queue");
323 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324     "Number of transmit descriptors per queue");
325
326 /*
327 ** AIM: Adaptive Interrupt Moderation
328 ** which means that the interrupt rate
329 ** is varied over time based on the
330 ** traffic for that interrupt vector
331 */
332 static int igb_enable_aim = TRUE;
333 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335     "Enable adaptive interrupt moderation");
336
337 /*
338  * MSIX should be the default for best performance,
339  * but this allows it to be forced off for testing.
340  */         
341 static int igb_enable_msix = 1;
342 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344     "Enable MSI-X interrupts");
345
346 /*
347 ** Tuneable Interrupt rate
348 */
349 static int igb_max_interrupt_rate = 8000;
350 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354 #if __FreeBSD_version >= 800000
355 /*
356 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
357 */
358 static int igb_buf_ring_size = IGB_BR_SIZE;
359 TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
360 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
361     &igb_buf_ring_size, 0, "Size of the bufring");
362 #endif
363
364 /*
365 ** Header split causes the packet header to
366 ** be dma'd to a seperate mbuf from the payload.
367 ** this can have memory alignment benefits. But
368 ** another plus is that small packets often fit
369 ** into the header and thus use no cluster. Its
370 ** a very workload dependent type feature.
371 */
372 static int igb_header_split = FALSE;
373 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
374 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
375     "Enable receive mbuf header split");
376
377 /*
378 ** This will autoconfigure based on the
379 ** number of CPUs and max supported
380 ** MSIX messages if left at 0.
381 */
382 static int igb_num_queues = 0;
383 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
384 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
385     "Number of queues to configure, 0 indicates autoconfigure");
386
387 /*
388 ** Global variable to store last used CPU when binding queues
389 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
390 ** queue is bound to a cpu.
391 */
392 static int igb_last_bind_cpu = -1;
393
394 /* How many packets rxeof tries to clean at a time */
395 static int igb_rx_process_limit = 100;
396 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
397 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398     &igb_rx_process_limit, 0,
399     "Maximum number of received packets to process at a time, -1 means unlimited");
400
401 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
402 #include <dev/netmap/if_igb_netmap.h>
403 #endif /* DEV_NETMAP */
404 /*********************************************************************
405  *  Device identification routine
406  *
407  *  igb_probe determines if the driver should be loaded on
408  *  adapter based on PCI vendor/device id of the adapter.
409  *
410  *  return BUS_PROBE_DEFAULT on success, positive on failure
411  *********************************************************************/
412
413 static int
414 igb_probe(device_t dev)
415 {
416         char            adapter_name[60];
417         uint16_t        pci_vendor_id = 0;
418         uint16_t        pci_device_id = 0;
419         uint16_t        pci_subvendor_id = 0;
420         uint16_t        pci_subdevice_id = 0;
421         igb_vendor_info_t *ent;
422
423         INIT_DEBUGOUT("igb_probe: begin");
424
425         pci_vendor_id = pci_get_vendor(dev);
426         if (pci_vendor_id != IGB_VENDOR_ID)
427                 return (ENXIO);
428
429         pci_device_id = pci_get_device(dev);
430         pci_subvendor_id = pci_get_subvendor(dev);
431         pci_subdevice_id = pci_get_subdevice(dev);
432
433         ent = igb_vendor_info_array;
434         while (ent->vendor_id != 0) {
435                 if ((pci_vendor_id == ent->vendor_id) &&
436                     (pci_device_id == ent->device_id) &&
437
438                     ((pci_subvendor_id == ent->subvendor_id) ||
439                     (ent->subvendor_id == PCI_ANY_ID)) &&
440
441                     ((pci_subdevice_id == ent->subdevice_id) ||
442                     (ent->subdevice_id == PCI_ANY_ID))) {
443                         sprintf(adapter_name, "%s %s",
444                                 igb_strings[ent->index],
445                                 igb_driver_version);
446                         device_set_desc_copy(dev, adapter_name);
447                         return (BUS_PROBE_DEFAULT);
448                 }
449                 ent++;
450         }
451
452         return (ENXIO);
453 }
454
455 /*********************************************************************
456  *  Device initialization routine
457  *
458  *  The attach entry point is called when the driver is being loaded.
459  *  This routine identifies the type of hardware, allocates all resources
460  *  and initializes the hardware.
461  *
462  *  return 0 on success, positive on failure
463  *********************************************************************/
464
465 static int
466 igb_attach(device_t dev)
467 {
468         struct adapter  *adapter;
469         int             error = 0;
470         u16             eeprom_data;
471
472         INIT_DEBUGOUT("igb_attach: begin");
473
474         if (resource_disabled("igb", device_get_unit(dev))) {
475                 device_printf(dev, "Disabled by device hint\n");
476                 return (ENXIO);
477         }
478
479         adapter = device_get_softc(dev);
480         adapter->dev = adapter->osdep.dev = dev;
481         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
482
483         /* SYSCTL stuff */
484         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
485             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
486             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
487             igb_sysctl_nvm_info, "I", "NVM Information");
488
489         igb_set_sysctl_value(adapter, "enable_aim",
490             "Interrupt Moderation", &adapter->enable_aim,
491             igb_enable_aim);
492
493         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
494             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
495             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
496             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
497
498         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
499
500         /* Determine hardware and mac info */
501         igb_identify_hardware(adapter);
502
503         /* Setup PCI resources */
504         if (igb_allocate_pci_resources(adapter)) {
505                 device_printf(dev, "Allocation of PCI resources failed\n");
506                 error = ENXIO;
507                 goto err_pci;
508         }
509
510         /* Do Shared Code initialization */
511         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
512                 device_printf(dev, "Setup of Shared code failed\n");
513                 error = ENXIO;
514                 goto err_pci;
515         }
516
517         e1000_get_bus_info(&adapter->hw);
518
519         /* Sysctl for limiting the amount of work done in the taskqueue */
520         igb_set_sysctl_value(adapter, "rx_processing_limit",
521             "max number of rx packets to process",
522             &adapter->rx_process_limit, igb_rx_process_limit);
523
524         /*
525          * Validate number of transmit and receive descriptors. It
526          * must not exceed hardware maximum, and must be multiple
527          * of E1000_DBA_ALIGN.
528          */
529         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
530             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
531                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
532                     IGB_DEFAULT_TXD, igb_txd);
533                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
534         } else
535                 adapter->num_tx_desc = igb_txd;
536         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
537             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
538                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
539                     IGB_DEFAULT_RXD, igb_rxd);
540                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
541         } else
542                 adapter->num_rx_desc = igb_rxd;
543
544         adapter->hw.mac.autoneg = DO_AUTO_NEG;
545         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
546         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
547
548         /* Copper options */
549         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
550                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
551                 adapter->hw.phy.disable_polarity_correction = FALSE;
552                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
553         }
554
555         /*
556          * Set the frame limits assuming
557          * standard ethernet sized frames.
558          */
559         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
561
562         /*
563         ** Allocate and Setup Queues
564         */
565         if (igb_allocate_queues(adapter)) {
566                 error = ENOMEM;
567                 goto err_pci;
568         }
569
570         /* Allocate the appropriate stats memory */
571         if (adapter->vf_ifp) {
572                 adapter->stats =
573                     (struct e1000_vf_stats *)malloc(sizeof \
574                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
575                 igb_vf_init_stats(adapter);
576         } else
577                 adapter->stats =
578                     (struct e1000_hw_stats *)malloc(sizeof \
579                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
580         if (adapter->stats == NULL) {
581                 device_printf(dev, "Can not allocate stats memory\n");
582                 error = ENOMEM;
583                 goto err_late;
584         }
585
586         /* Allocate multicast array memory. */
587         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
588             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
589         if (adapter->mta == NULL) {
590                 device_printf(dev, "Can not allocate multicast setup array\n");
591                 error = ENOMEM;
592                 goto err_late;
593         }
594
595         /* Some adapter-specific advanced features */
596         if (adapter->hw.mac.type >= e1000_i350) {
597                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
598                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
599                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
600                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
601                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
602                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
603                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
604                     adapter, 0, igb_sysctl_eee, "I",
605                     "Disable Energy Efficient Ethernet");
606                 if (adapter->hw.phy.media_type == e1000_media_type_copper)
607                         e1000_set_eee_i350(&adapter->hw);
608         }
609
610         /*
611         ** Start from a known state, this is
612         ** important in reading the nvm and
613         ** mac from that.
614         */
615         e1000_reset_hw(&adapter->hw);
616
617         /* Make sure we have a good EEPROM before we read from it */
618         if (((adapter->hw.mac.type != e1000_i210) &&
619             (adapter->hw.mac.type != e1000_i211)) &&
620             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
621                 /*
622                 ** Some PCI-E parts fail the first check due to
623                 ** the link being in sleep state, call it again,
624                 ** if it fails a second time its a real issue.
625                 */
626                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
627                         device_printf(dev,
628                             "The EEPROM Checksum Is Not Valid\n");
629                         error = EIO;
630                         goto err_late;
631                 }
632         }
633
634         /*
635         ** Copy the permanent MAC address out of the EEPROM
636         */
637         if (e1000_read_mac_addr(&adapter->hw) < 0) {
638                 device_printf(dev, "EEPROM read error while reading MAC"
639                     " address\n");
640                 error = EIO;
641                 goto err_late;
642         }
643         /* Check its sanity */
644         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
645                 device_printf(dev, "Invalid MAC address\n");
646                 error = EIO;
647                 goto err_late;
648         }
649
650         /* Setup OS specific network interface */
651         if (igb_setup_interface(dev, adapter) != 0)
652                 goto err_late;
653
654         /* Now get a good starting state */
655         igb_reset(adapter);
656
657         /* Initialize statistics */
658         igb_update_stats_counters(adapter);
659
660         adapter->hw.mac.get_link_status = 1;
661         igb_update_link_status(adapter);
662
663         /* Indicate SOL/IDER usage */
664         if (e1000_check_reset_block(&adapter->hw))
665                 device_printf(dev,
666                     "PHY reset is blocked due to SOL/IDER session.\n");
667
668         /* Determine if we have to control management hardware */
669         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
670
671         /*
672          * Setup Wake-on-Lan
673          */
674         /* APME bit in EEPROM is mapped to WUC.APME */
675         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
676         if (eeprom_data)
677                 adapter->wol = E1000_WUFC_MAG;
678
679         /* Register for VLAN events */
680         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
681              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
682         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
683              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
684
685         igb_add_hw_stats(adapter);
686
687         /* Tell the stack that the interface is not active */
688         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
689         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
690
691         adapter->led_dev = led_create(igb_led_func, adapter,
692             device_get_nameunit(dev));
693
694         /* 
695         ** Configure Interrupts
696         */
697         if ((adapter->msix > 1) && (igb_enable_msix))
698                 error = igb_allocate_msix(adapter);
699         else /* MSI or Legacy */
700                 error = igb_allocate_legacy(adapter);
701         if (error)
702                 goto err_late;
703
704 #ifdef DEV_NETMAP
705         igb_netmap_attach(adapter);
706 #endif /* DEV_NETMAP */
707         INIT_DEBUGOUT("igb_attach: end");
708
709         return (0);
710
711 err_late:
712         igb_detach(dev);
713         igb_free_transmit_structures(adapter);
714         igb_free_receive_structures(adapter);
715         igb_release_hw_control(adapter);
716 err_pci:
717         igb_free_pci_resources(adapter);
718         if (adapter->ifp != NULL)
719                 if_free(adapter->ifp);
720         free(adapter->mta, M_DEVBUF);
721         IGB_CORE_LOCK_DESTROY(adapter);
722
723         return (error);
724 }
725
726 /*********************************************************************
727  *  Device removal routine
728  *
729  *  The detach entry point is called when the driver is being removed.
730  *  This routine stops the adapter and deallocates all the resources
731  *  that were allocated for driver operation.
732  *
733  *  return 0 on success, positive on failure
734  *********************************************************************/
735
736 static int
737 igb_detach(device_t dev)
738 {
739         struct adapter  *adapter = device_get_softc(dev);
740         struct ifnet    *ifp = adapter->ifp;
741
742         INIT_DEBUGOUT("igb_detach: begin");
743
744         /* Make sure VLANS are not using driver */
745         if (adapter->ifp->if_vlantrunk != NULL) {
746                 device_printf(dev,"Vlan in use, detach first\n");
747                 return (EBUSY);
748         }
749
750         ether_ifdetach(adapter->ifp);
751
752         if (adapter->led_dev != NULL)
753                 led_destroy(adapter->led_dev);
754
755 #ifdef DEVICE_POLLING
756         if (ifp->if_capenable & IFCAP_POLLING)
757                 ether_poll_deregister(ifp);
758 #endif
759
760         IGB_CORE_LOCK(adapter);
761         adapter->in_detach = 1;
762         igb_stop(adapter);
763         IGB_CORE_UNLOCK(adapter);
764
765         e1000_phy_hw_reset(&adapter->hw);
766
767         /* Give control back to firmware */
768         igb_release_manageability(adapter);
769         igb_release_hw_control(adapter);
770
771         if (adapter->wol) {
772                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
773                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
774                 igb_enable_wakeup(dev);
775         }
776
777         /* Unregister VLAN events */
778         if (adapter->vlan_attach != NULL)
779                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
780         if (adapter->vlan_detach != NULL)
781                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
782
783         callout_drain(&adapter->timer);
784
785 #ifdef DEV_NETMAP
786         netmap_detach(adapter->ifp);
787 #endif /* DEV_NETMAP */
788         igb_free_pci_resources(adapter);
789         bus_generic_detach(dev);
790         if_free(ifp);
791
792         igb_free_transmit_structures(adapter);
793         igb_free_receive_structures(adapter);
794         if (adapter->mta != NULL)
795                 free(adapter->mta, M_DEVBUF);
796
797         IGB_CORE_LOCK_DESTROY(adapter);
798
799         return (0);
800 }
801
802 /*********************************************************************
803  *
804  *  Shutdown entry point
805  *
806  **********************************************************************/
807
808 static int
809 igb_shutdown(device_t dev)
810 {
811         return igb_suspend(dev);
812 }
813
814 /*
815  * Suspend/resume device methods.
816  */
817 static int
818 igb_suspend(device_t dev)
819 {
820         struct adapter *adapter = device_get_softc(dev);
821
822         IGB_CORE_LOCK(adapter);
823
824         igb_stop(adapter);
825
826         igb_release_manageability(adapter);
827         igb_release_hw_control(adapter);
828
829         if (adapter->wol) {
830                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
831                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
832                 igb_enable_wakeup(dev);
833         }
834
835         IGB_CORE_UNLOCK(adapter);
836
837         return bus_generic_suspend(dev);
838 }
839
840 static int
841 igb_resume(device_t dev)
842 {
843         struct adapter *adapter = device_get_softc(dev);
844         struct tx_ring  *txr = adapter->tx_rings;
845         struct ifnet *ifp = adapter->ifp;
846
847         IGB_CORE_LOCK(adapter);
848         igb_init_locked(adapter);
849         igb_init_manageability(adapter);
850
851         if ((ifp->if_flags & IFF_UP) &&
852             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
853                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
854                         IGB_TX_LOCK(txr);
855 #ifndef IGB_LEGACY_TX
856                         /* Process the stack queue only if not depleted */
857                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
858                             !drbr_empty(ifp, txr->br))
859                                 igb_mq_start_locked(ifp, txr);
860 #else
861                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
862                                 igb_start_locked(txr, ifp);
863 #endif
864                         IGB_TX_UNLOCK(txr);
865                 }
866         }
867         IGB_CORE_UNLOCK(adapter);
868
869         return bus_generic_resume(dev);
870 }
871
872
873 #ifdef IGB_LEGACY_TX
874
875 /*********************************************************************
876  *  Transmit entry point
877  *
878  *  igb_start is called by the stack to initiate a transmit.
879  *  The driver will remain in this routine as long as there are
880  *  packets to transmit and transmit resources are available.
881  *  In case resources are not available stack is notified and
882  *  the packet is requeued.
883  **********************************************************************/
884
885 static void
886 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
887 {
888         struct adapter  *adapter = ifp->if_softc;
889         struct mbuf     *m_head;
890
891         IGB_TX_LOCK_ASSERT(txr);
892
893         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
894             IFF_DRV_RUNNING)
895                 return;
896         if (!adapter->link_active)
897                 return;
898
899         /* Call cleanup if number of TX descriptors low */
900         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
901                 igb_txeof(txr);
902
903         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
904                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
905                         txr->queue_status |= IGB_QUEUE_DEPLETED;
906                         break;
907                 }
908                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
909                 if (m_head == NULL)
910                         break;
911                 /*
912                  *  Encapsulation can modify our pointer, and or make it
913                  *  NULL on failure.  In that event, we can't requeue.
914                  */
915                 if (igb_xmit(txr, &m_head)) {
916                         if (m_head != NULL)
917                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
918                         if (txr->tx_avail <= IGB_MAX_SCATTER)
919                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
920                         break;
921                 }
922
923                 /* Send a copy of the frame to the BPF listener */
924                 ETHER_BPF_MTAP(ifp, m_head);
925
926                 /* Set watchdog on */
927                 txr->watchdog_time = ticks;
928                 txr->queue_status |= IGB_QUEUE_WORKING;
929         }
930 }
931  
932 /*
933  * Legacy TX driver routine, called from the
934  * stack, always uses tx[0], and spins for it.
935  * Should not be used with multiqueue tx
936  */
937 static void
938 igb_start(struct ifnet *ifp)
939 {
940         struct adapter  *adapter = ifp->if_softc;
941         struct tx_ring  *txr = adapter->tx_rings;
942
943         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
944                 IGB_TX_LOCK(txr);
945                 igb_start_locked(txr, ifp);
946                 IGB_TX_UNLOCK(txr);
947         }
948         return;
949 }
950
951 #else /* ~IGB_LEGACY_TX */
952
953 /*
954 ** Multiqueue Transmit Entry:
955 **  quick turnaround to the stack
956 **
957 */
958 static int
959 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
960 {
961         struct adapter          *adapter = ifp->if_softc;
962         struct igb_queue        *que;
963         struct tx_ring          *txr;
964         int                     i, err = 0;
965
966         /* Which queue to use */
967         if ((m->m_flags & M_FLOWID) != 0)
968                 i = m->m_pkthdr.flowid % adapter->num_queues;
969         else
970                 i = curcpu % adapter->num_queues;
971         txr = &adapter->tx_rings[i];
972         que = &adapter->queues[i];
973
974         err = drbr_enqueue(ifp, txr->br, m);
975         if (err)
976                 return (err);
977         if (IGB_TX_TRYLOCK(txr)) {
978                 err = igb_mq_start_locked(ifp, txr);
979                 IGB_TX_UNLOCK(txr);
980         } else
981                 taskqueue_enqueue(que->tq, &txr->txq_task);
982
983         return (err);
984 }
985
986 static int
987 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
988 {
989         struct adapter  *adapter = txr->adapter;
990         struct mbuf     *next;
991         int             err = 0, enq;
992
993         IGB_TX_LOCK_ASSERT(txr);
994
995         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
996             adapter->link_active == 0)
997                 return (ENETDOWN);
998
999         enq = 0;
1000
1001         /* Process the queue */
1002         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1003                 if ((err = igb_xmit(txr, &next)) != 0) {
1004                         if (next == NULL) {
1005                                 /* It was freed, move forward */
1006                                 drbr_advance(ifp, txr->br);
1007                         } else {
1008                                 /* 
1009                                  * Still have one left, it may not be
1010                                  * the same since the transmit function
1011                                  * may have changed it.
1012                                  */
1013                                 drbr_putback(ifp, txr->br, next);
1014                         }
1015                         break;
1016                 }
1017                 drbr_advance(ifp, txr->br);
1018                 enq++;
1019                 ifp->if_obytes += next->m_pkthdr.len;
1020                 if (next->m_flags & M_MCAST)
1021                         ifp->if_omcasts++;
1022                 ETHER_BPF_MTAP(ifp, next);
1023                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1024                         break;
1025         }
1026         if (enq > 0) {
1027                 /* Set the watchdog */
1028                 txr->queue_status |= IGB_QUEUE_WORKING;
1029                 txr->watchdog_time = ticks;
1030         }
1031         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1032                 igb_txeof(txr);
1033         if (txr->tx_avail <= IGB_MAX_SCATTER)
1034                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1035         return (err);
1036 }
1037
1038 /*
1039  * Called from a taskqueue to drain queued transmit packets.
1040  */
1041 static void
1042 igb_deferred_mq_start(void *arg, int pending)
1043 {
1044         struct tx_ring *txr = arg;
1045         struct adapter *adapter = txr->adapter;
1046         struct ifnet *ifp = adapter->ifp;
1047
1048         IGB_TX_LOCK(txr);
1049         if (!drbr_empty(ifp, txr->br))
1050                 igb_mq_start_locked(ifp, txr);
1051         IGB_TX_UNLOCK(txr);
1052 }
1053
1054 /*
1055 ** Flush all ring buffers
1056 */
1057 static void
1058 igb_qflush(struct ifnet *ifp)
1059 {
1060         struct adapter  *adapter = ifp->if_softc;
1061         struct tx_ring  *txr = adapter->tx_rings;
1062         struct mbuf     *m;
1063
1064         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1065                 IGB_TX_LOCK(txr);
1066                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1067                         m_freem(m);
1068                 IGB_TX_UNLOCK(txr);
1069         }
1070         if_qflush(ifp);
1071 }
1072 #endif /* ~IGB_LEGACY_TX */
1073
1074 /*********************************************************************
1075  *  Ioctl entry point
1076  *
1077  *  igb_ioctl is called when the user wants to configure the
1078  *  interface.
1079  *
1080  *  return 0 on success, positive on failure
1081  **********************************************************************/
1082
1083 static int
1084 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1085 {
1086         struct adapter  *adapter = ifp->if_softc;
1087         struct ifreq    *ifr = (struct ifreq *)data;
1088 #if defined(INET) || defined(INET6)
1089         struct ifaddr   *ifa = (struct ifaddr *)data;
1090 #endif
1091         bool            avoid_reset = FALSE;
1092         int             error = 0;
1093
1094         if (adapter->in_detach)
1095                 return (error);
1096
1097         switch (command) {
1098         case SIOCSIFADDR:
1099 #ifdef INET
1100                 if (ifa->ifa_addr->sa_family == AF_INET)
1101                         avoid_reset = TRUE;
1102 #endif
1103 #ifdef INET6
1104                 if (ifa->ifa_addr->sa_family == AF_INET6)
1105                         avoid_reset = TRUE;
1106 #endif
1107                 /*
1108                 ** Calling init results in link renegotiation,
1109                 ** so we avoid doing it when possible.
1110                 */
1111                 if (avoid_reset) {
1112                         ifp->if_flags |= IFF_UP;
1113                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1114                                 igb_init(adapter);
1115 #ifdef INET
1116                         if (!(ifp->if_flags & IFF_NOARP))
1117                                 arp_ifinit(ifp, ifa);
1118 #endif
1119                 } else
1120                         error = ether_ioctl(ifp, command, data);
1121                 break;
1122         case SIOCSIFMTU:
1123             {
1124                 int max_frame_size;
1125
1126                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1127
1128                 IGB_CORE_LOCK(adapter);
1129                 max_frame_size = 9234;
1130                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1131                     ETHER_CRC_LEN) {
1132                         IGB_CORE_UNLOCK(adapter);
1133                         error = EINVAL;
1134                         break;
1135                 }
1136
1137                 ifp->if_mtu = ifr->ifr_mtu;
1138                 adapter->max_frame_size =
1139                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1140                 igb_init_locked(adapter);
1141                 IGB_CORE_UNLOCK(adapter);
1142                 break;
1143             }
1144         case SIOCSIFFLAGS:
1145                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1146                     SIOCSIFFLAGS (Set Interface Flags)");
1147                 IGB_CORE_LOCK(adapter);
1148                 if (ifp->if_flags & IFF_UP) {
1149                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1150                                 if ((ifp->if_flags ^ adapter->if_flags) &
1151                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1152                                         igb_disable_promisc(adapter);
1153                                         igb_set_promisc(adapter);
1154                                 }
1155                         } else
1156                                 igb_init_locked(adapter);
1157                 } else
1158                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1159                                 igb_stop(adapter);
1160                 adapter->if_flags = ifp->if_flags;
1161                 IGB_CORE_UNLOCK(adapter);
1162                 break;
1163         case SIOCADDMULTI:
1164         case SIOCDELMULTI:
1165                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1166                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1167                         IGB_CORE_LOCK(adapter);
1168                         igb_disable_intr(adapter);
1169                         igb_set_multi(adapter);
1170 #ifdef DEVICE_POLLING
1171                         if (!(ifp->if_capenable & IFCAP_POLLING))
1172 #endif
1173                                 igb_enable_intr(adapter);
1174                         IGB_CORE_UNLOCK(adapter);
1175                 }
1176                 break;
1177         case SIOCSIFMEDIA:
1178                 /* Check SOL/IDER usage */
1179                 IGB_CORE_LOCK(adapter);
1180                 if (e1000_check_reset_block(&adapter->hw)) {
1181                         IGB_CORE_UNLOCK(adapter);
1182                         device_printf(adapter->dev, "Media change is"
1183                             " blocked due to SOL/IDER session.\n");
1184                         break;
1185                 }
1186                 IGB_CORE_UNLOCK(adapter);
1187         case SIOCGIFMEDIA:
1188                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1189                     SIOCxIFMEDIA (Get/Set Interface Media)");
1190                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1191                 break;
1192         case SIOCSIFCAP:
1193             {
1194                 int mask, reinit;
1195
1196                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1197                 reinit = 0;
1198                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1199 #ifdef DEVICE_POLLING
1200                 if (mask & IFCAP_POLLING) {
1201                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1202                                 error = ether_poll_register(igb_poll, ifp);
1203                                 if (error)
1204                                         return (error);
1205                                 IGB_CORE_LOCK(adapter);
1206                                 igb_disable_intr(adapter);
1207                                 ifp->if_capenable |= IFCAP_POLLING;
1208                                 IGB_CORE_UNLOCK(adapter);
1209                         } else {
1210                                 error = ether_poll_deregister(ifp);
1211                                 /* Enable interrupt even in error case */
1212                                 IGB_CORE_LOCK(adapter);
1213                                 igb_enable_intr(adapter);
1214                                 ifp->if_capenable &= ~IFCAP_POLLING;
1215                                 IGB_CORE_UNLOCK(adapter);
1216                         }
1217                 }
1218 #endif
1219                 if (mask & IFCAP_HWCSUM) {
1220                         ifp->if_capenable ^= IFCAP_HWCSUM;
1221                         reinit = 1;
1222                 }
1223                 if (mask & IFCAP_TSO4) {
1224                         ifp->if_capenable ^= IFCAP_TSO4;
1225                         reinit = 1;
1226                 }
1227                 if (mask & IFCAP_VLAN_HWTAGGING) {
1228                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1229                         reinit = 1;
1230                 }
1231                 if (mask & IFCAP_VLAN_HWFILTER) {
1232                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1233                         reinit = 1;
1234                 }
1235                 if (mask & IFCAP_VLAN_HWTSO) {
1236                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1237                         reinit = 1;
1238                 }
1239                 if (mask & IFCAP_LRO) {
1240                         ifp->if_capenable ^= IFCAP_LRO;
1241                         reinit = 1;
1242                 }
1243                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1244                         igb_init(adapter);
1245                 VLAN_CAPABILITIES(ifp);
1246                 break;
1247             }
1248
1249         default:
1250                 error = ether_ioctl(ifp, command, data);
1251                 break;
1252         }
1253
1254         return (error);
1255 }
1256
1257
1258 /*********************************************************************
1259  *  Init entry point
1260  *
1261  *  This routine is used in two ways. It is used by the stack as
1262  *  init entry point in network interface structure. It is also used
1263  *  by the driver as a hw/sw initialization routine to get to a
1264  *  consistent state.
1265  *
1266  *  return 0 on success, positive on failure
1267  **********************************************************************/
1268
1269 static void
1270 igb_init_locked(struct adapter *adapter)
1271 {
1272         struct ifnet    *ifp = adapter->ifp;
1273         device_t        dev = adapter->dev;
1274
1275         INIT_DEBUGOUT("igb_init: begin");
1276
1277         IGB_CORE_LOCK_ASSERT(adapter);
1278
1279         igb_disable_intr(adapter);
1280         callout_stop(&adapter->timer);
1281
1282         /* Get the latest mac address, User can use a LAA */
1283         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1284               ETHER_ADDR_LEN);
1285
1286         /* Put the address into the Receive Address Array */
1287         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1288
1289         igb_reset(adapter);
1290         igb_update_link_status(adapter);
1291
1292         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1293
1294         /* Set hardware offload abilities */
1295         ifp->if_hwassist = 0;
1296         if (ifp->if_capenable & IFCAP_TXCSUM) {
1297                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1298 #if __FreeBSD_version >= 800000
1299                 if (adapter->hw.mac.type == e1000_82576)
1300                         ifp->if_hwassist |= CSUM_SCTP;
1301 #endif
1302         }
1303
1304         if (ifp->if_capenable & IFCAP_TSO4)
1305                 ifp->if_hwassist |= CSUM_TSO;
1306
1307         /* Configure for OS presence */
1308         igb_init_manageability(adapter);
1309
1310         /* Prepare transmit descriptors and buffers */
1311         igb_setup_transmit_structures(adapter);
1312         igb_initialize_transmit_units(adapter);
1313
1314         /* Setup Multicast table */
1315         igb_set_multi(adapter);
1316
1317         /*
1318         ** Figure out the desired mbuf pool
1319         ** for doing jumbo/packetsplit
1320         */
1321         if (adapter->max_frame_size <= 2048)
1322                 adapter->rx_mbuf_sz = MCLBYTES;
1323         else if (adapter->max_frame_size <= 4096)
1324                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1325         else
1326                 adapter->rx_mbuf_sz = MJUM9BYTES;
1327
1328         /* Prepare receive descriptors and buffers */
1329         if (igb_setup_receive_structures(adapter)) {
1330                 device_printf(dev, "Could not setup receive structures\n");
1331                 return;
1332         }
1333         igb_initialize_receive_units(adapter);
1334
1335         /* Enable VLAN support */
1336         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1337                 igb_setup_vlan_hw_support(adapter);
1338                                 
1339         /* Don't lose promiscuous settings */
1340         igb_set_promisc(adapter);
1341
1342         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1343         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1344
1345         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1346         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1347
1348         if (adapter->msix > 1) /* Set up queue routing */
1349                 igb_configure_queues(adapter);
1350
1351         /* this clears any pending interrupts */
1352         E1000_READ_REG(&adapter->hw, E1000_ICR);
1353 #ifdef DEVICE_POLLING
1354         /*
1355          * Only enable interrupts if we are not polling, make sure
1356          * they are off otherwise.
1357          */
1358         if (ifp->if_capenable & IFCAP_POLLING)
1359                 igb_disable_intr(adapter);
1360         else
1361 #endif /* DEVICE_POLLING */
1362         {
1363                 igb_enable_intr(adapter);
1364                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1365         }
1366
1367         /* Set Energy Efficient Ethernet */
1368         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1369                 e1000_set_eee_i350(&adapter->hw);
1370 }
1371
1372 static void
1373 igb_init(void *arg)
1374 {
1375         struct adapter *adapter = arg;
1376
1377         IGB_CORE_LOCK(adapter);
1378         igb_init_locked(adapter);
1379         IGB_CORE_UNLOCK(adapter);
1380 }
1381
1382
1383 static void
1384 igb_handle_que(void *context, int pending)
1385 {
1386         struct igb_queue *que = context;
1387         struct adapter *adapter = que->adapter;
1388         struct tx_ring *txr = que->txr;
1389         struct ifnet    *ifp = adapter->ifp;
1390
1391         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1392                 bool    more;
1393
1394                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1395
1396                 IGB_TX_LOCK(txr);
1397                 igb_txeof(txr);
1398 #ifndef IGB_LEGACY_TX
1399                 /* Process the stack queue only if not depleted */
1400                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1401                     !drbr_empty(ifp, txr->br))
1402                         igb_mq_start_locked(ifp, txr);
1403 #else
1404                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1405                         igb_start_locked(txr, ifp);
1406 #endif
1407                 IGB_TX_UNLOCK(txr);
1408                 /* Do we need another? */
1409                 if (more) {
1410                         taskqueue_enqueue(que->tq, &que->que_task);
1411                         return;
1412                 }
1413         }
1414
1415 #ifdef DEVICE_POLLING
1416         if (ifp->if_capenable & IFCAP_POLLING)
1417                 return;
1418 #endif
1419         /* Reenable this interrupt */
1420         if (que->eims)
1421                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1422         else
1423                 igb_enable_intr(adapter);
1424 }
1425
1426 /* Deal with link in a sleepable context */
1427 static void
1428 igb_handle_link(void *context, int pending)
1429 {
1430         struct adapter *adapter = context;
1431
1432         IGB_CORE_LOCK(adapter);
1433         igb_handle_link_locked(adapter);
1434         IGB_CORE_UNLOCK(adapter);
1435 }
1436
1437 static void
1438 igb_handle_link_locked(struct adapter *adapter)
1439 {
1440         struct tx_ring  *txr = adapter->tx_rings;
1441         struct ifnet *ifp = adapter->ifp;
1442
1443         IGB_CORE_LOCK_ASSERT(adapter);
1444         adapter->hw.mac.get_link_status = 1;
1445         igb_update_link_status(adapter);
1446         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1447                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1448                         IGB_TX_LOCK(txr);
1449 #ifndef IGB_LEGACY_TX
1450                         /* Process the stack queue only if not depleted */
1451                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1452                             !drbr_empty(ifp, txr->br))
1453                                 igb_mq_start_locked(ifp, txr);
1454 #else
1455                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1456                                 igb_start_locked(txr, ifp);
1457 #endif
1458                         IGB_TX_UNLOCK(txr);
1459                 }
1460         }
1461 }
1462
1463 /*********************************************************************
1464  *
1465  *  MSI/Legacy Deferred
1466  *  Interrupt Service routine  
1467  *
1468  *********************************************************************/
1469 static int
1470 igb_irq_fast(void *arg)
1471 {
1472         struct adapter          *adapter = arg;
1473         struct igb_queue        *que = adapter->queues;
1474         u32                     reg_icr;
1475
1476
1477         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1478
1479         /* Hot eject?  */
1480         if (reg_icr == 0xffffffff)
1481                 return FILTER_STRAY;
1482
1483         /* Definitely not our interrupt.  */
1484         if (reg_icr == 0x0)
1485                 return FILTER_STRAY;
1486
1487         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1488                 return FILTER_STRAY;
1489
1490         /*
1491          * Mask interrupts until the taskqueue is finished running.  This is
1492          * cheap, just assume that it is needed.  This also works around the
1493          * MSI message reordering errata on certain systems.
1494          */
1495         igb_disable_intr(adapter);
1496         taskqueue_enqueue(que->tq, &que->que_task);
1497
1498         /* Link status change */
1499         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1500                 taskqueue_enqueue(que->tq, &adapter->link_task);
1501
1502         if (reg_icr & E1000_ICR_RXO)
1503                 adapter->rx_overruns++;
1504         return FILTER_HANDLED;
1505 }
1506
1507 #ifdef DEVICE_POLLING
1508 #if __FreeBSD_version >= 800000
1509 #define POLL_RETURN_COUNT(a) (a)
1510 static int
1511 #else
1512 #define POLL_RETURN_COUNT(a)
1513 static void
1514 #endif
1515 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1516 {
1517         struct adapter          *adapter = ifp->if_softc;
1518         struct igb_queue        *que;
1519         struct tx_ring          *txr;
1520         u32                     reg_icr, rx_done = 0;
1521         u32                     loop = IGB_MAX_LOOP;
1522         bool                    more;
1523
1524         IGB_CORE_LOCK(adapter);
1525         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1526                 IGB_CORE_UNLOCK(adapter);
1527                 return POLL_RETURN_COUNT(rx_done);
1528         }
1529
1530         if (cmd == POLL_AND_CHECK_STATUS) {
1531                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1532                 /* Link status change */
1533                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1534                         igb_handle_link_locked(adapter);
1535
1536                 if (reg_icr & E1000_ICR_RXO)
1537                         adapter->rx_overruns++;
1538         }
1539         IGB_CORE_UNLOCK(adapter);
1540
1541         for (int i = 0; i < adapter->num_queues; i++) {
1542                 que = &adapter->queues[i];
1543                 txr = que->txr;
1544
1545                 igb_rxeof(que, count, &rx_done);
1546
1547                 IGB_TX_LOCK(txr);
1548                 do {
1549                         more = igb_txeof(txr);
1550                 } while (loop-- && more);
1551 #ifndef IGB_LEGACY_TX
1552                 if (!drbr_empty(ifp, txr->br))
1553                         igb_mq_start_locked(ifp, txr);
1554 #else
1555                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1556                         igb_start_locked(txr, ifp);
1557 #endif
1558                 IGB_TX_UNLOCK(txr);
1559         }
1560
1561         return POLL_RETURN_COUNT(rx_done);
1562 }
1563 #endif /* DEVICE_POLLING */
1564
1565 /*********************************************************************
1566  *
1567  *  MSIX Que Interrupt Service routine
1568  *
1569  **********************************************************************/
1570 static void
1571 igb_msix_que(void *arg)
1572 {
1573         struct igb_queue *que = arg;
1574         struct adapter *adapter = que->adapter;
1575         struct ifnet   *ifp = adapter->ifp;
1576         struct tx_ring *txr = que->txr;
1577         struct rx_ring *rxr = que->rxr;
1578         u32             newitr = 0;
1579         bool            more_rx;
1580
1581         /* Ignore spurious interrupts */
1582         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1583                 return;
1584
1585         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1586         ++que->irqs;
1587
1588         IGB_TX_LOCK(txr);
1589         igb_txeof(txr);
1590 #ifndef IGB_LEGACY_TX
1591         /* Process the stack queue only if not depleted */
1592         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1593             !drbr_empty(ifp, txr->br))
1594                 igb_mq_start_locked(ifp, txr);
1595 #else
1596         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1597                 igb_start_locked(txr, ifp);
1598 #endif
1599         IGB_TX_UNLOCK(txr);
1600
1601         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1602
1603         if (adapter->enable_aim == FALSE)
1604                 goto no_calc;
1605         /*
1606         ** Do Adaptive Interrupt Moderation:
1607         **  - Write out last calculated setting
1608         **  - Calculate based on average size over
1609         **    the last interval.
1610         */
1611         if (que->eitr_setting)
1612                 E1000_WRITE_REG(&adapter->hw,
1613                     E1000_EITR(que->msix), que->eitr_setting);
1614  
1615         que->eitr_setting = 0;
1616
1617         /* Idle, do nothing */
1618         if ((txr->bytes == 0) && (rxr->bytes == 0))
1619                 goto no_calc;
1620                                 
1621         /* Used half Default if sub-gig */
1622         if (adapter->link_speed != 1000)
1623                 newitr = IGB_DEFAULT_ITR / 2;
1624         else {
1625                 if ((txr->bytes) && (txr->packets))
1626                         newitr = txr->bytes/txr->packets;
1627                 if ((rxr->bytes) && (rxr->packets))
1628                         newitr = max(newitr,
1629                             (rxr->bytes / rxr->packets));
1630                 newitr += 24; /* account for hardware frame, crc */
1631                 /* set an upper boundary */
1632                 newitr = min(newitr, 3000);
1633                 /* Be nice to the mid range */
1634                 if ((newitr > 300) && (newitr < 1200))
1635                         newitr = (newitr / 3);
1636                 else
1637                         newitr = (newitr / 2);
1638         }
1639         newitr &= 0x7FFC;  /* Mask invalid bits */
1640         if (adapter->hw.mac.type == e1000_82575)
1641                 newitr |= newitr << 16;
1642         else
1643                 newitr |= E1000_EITR_CNT_IGNR;
1644                  
1645         /* save for next interrupt */
1646         que->eitr_setting = newitr;
1647
1648         /* Reset state */
1649         txr->bytes = 0;
1650         txr->packets = 0;
1651         rxr->bytes = 0;
1652         rxr->packets = 0;
1653
1654 no_calc:
1655         /* Schedule a clean task if needed*/
1656         if (more_rx)
1657                 taskqueue_enqueue(que->tq, &que->que_task);
1658         else
1659                 /* Reenable this interrupt */
1660                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1661         return;
1662 }
1663
1664
1665 /*********************************************************************
1666  *
1667  *  MSIX Link Interrupt Service routine
1668  *
1669  **********************************************************************/
1670
1671 static void
1672 igb_msix_link(void *arg)
1673 {
1674         struct adapter  *adapter = arg;
1675         u32             icr;
1676
1677         ++adapter->link_irq;
1678         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1679         if (!(icr & E1000_ICR_LSC))
1680                 goto spurious;
1681         igb_handle_link(adapter, 0);
1682
1683 spurious:
1684         /* Rearm */
1685         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1686         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1687         return;
1688 }
1689
1690
1691 /*********************************************************************
1692  *
1693  *  Media Ioctl callback
1694  *
1695  *  This routine is called whenever the user queries the status of
1696  *  the interface using ifconfig.
1697  *
1698  **********************************************************************/
1699 static void
1700 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1701 {
1702         struct adapter *adapter = ifp->if_softc;
1703
1704         INIT_DEBUGOUT("igb_media_status: begin");
1705
1706         IGB_CORE_LOCK(adapter);
1707         igb_update_link_status(adapter);
1708
1709         ifmr->ifm_status = IFM_AVALID;
1710         ifmr->ifm_active = IFM_ETHER;
1711
1712         if (!adapter->link_active) {
1713                 IGB_CORE_UNLOCK(adapter);
1714                 return;
1715         }
1716
1717         ifmr->ifm_status |= IFM_ACTIVE;
1718
1719         switch (adapter->link_speed) {
1720         case 10:
1721                 ifmr->ifm_active |= IFM_10_T;
1722                 break;
1723         case 100:
1724                 /*
1725                 ** Support for 100Mb SFP - these are Fiber 
1726                 ** but the media type appears as serdes
1727                 */
1728                 if (adapter->hw.phy.media_type ==
1729                     e1000_media_type_internal_serdes)
1730                         ifmr->ifm_active |= IFM_100_FX;
1731                 else
1732                         ifmr->ifm_active |= IFM_100_TX;
1733                 break;
1734         case 1000:
1735                 ifmr->ifm_active |= IFM_1000_T;
1736                 break;
1737         }
1738
1739         if (adapter->link_duplex == FULL_DUPLEX)
1740                 ifmr->ifm_active |= IFM_FDX;
1741         else
1742                 ifmr->ifm_active |= IFM_HDX;
1743
1744         IGB_CORE_UNLOCK(adapter);
1745 }
1746
1747 /*********************************************************************
1748  *
1749  *  Media Ioctl callback
1750  *
1751  *  This routine is called when the user changes speed/duplex using
1752  *  media/mediopt option with ifconfig.
1753  *
1754  **********************************************************************/
1755 static int
1756 igb_media_change(struct ifnet *ifp)
1757 {
1758         struct adapter *adapter = ifp->if_softc;
1759         struct ifmedia  *ifm = &adapter->media;
1760
1761         INIT_DEBUGOUT("igb_media_change: begin");
1762
1763         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1764                 return (EINVAL);
1765
1766         IGB_CORE_LOCK(adapter);
1767         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1768         case IFM_AUTO:
1769                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1770                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1771                 break;
1772         case IFM_1000_LX:
1773         case IFM_1000_SX:
1774         case IFM_1000_T:
1775                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1776                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1777                 break;
1778         case IFM_100_TX:
1779                 adapter->hw.mac.autoneg = FALSE;
1780                 adapter->hw.phy.autoneg_advertised = 0;
1781                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1782                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1783                 else
1784                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1785                 break;
1786         case IFM_10_T:
1787                 adapter->hw.mac.autoneg = FALSE;
1788                 adapter->hw.phy.autoneg_advertised = 0;
1789                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1790                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1791                 else
1792                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1793                 break;
1794         default:
1795                 device_printf(adapter->dev, "Unsupported media type\n");
1796         }
1797
1798         igb_init_locked(adapter);
1799         IGB_CORE_UNLOCK(adapter);
1800
1801         return (0);
1802 }
1803
1804
1805 /*********************************************************************
1806  *
1807  *  This routine maps the mbufs to Advanced TX descriptors.
1808  *  
1809  **********************************************************************/
1810 static int
1811 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1812 {
1813         struct adapter          *adapter = txr->adapter;
1814         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1815         bus_dmamap_t            map;
1816         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1817         union e1000_adv_tx_desc *txd = NULL;
1818         struct mbuf             *m_head = *m_headp;
1819         struct ether_vlan_header *eh = NULL;
1820         struct ip               *ip = NULL;
1821         struct tcphdr           *th = NULL;
1822         u32                     hdrlen, cmd_type_len, olinfo_status = 0;
1823         int                     ehdrlen, poff;
1824         int                     nsegs, i, first, last = 0;
1825         int                     error, do_tso, remap = 1;
1826
1827         /* Set basic descriptor constants */
1828         cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1829         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1830         if (m_head->m_flags & M_VLANTAG)
1831                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1832
1833 retry:
1834         m_head = *m_headp;
1835         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1836         hdrlen = ehdrlen = poff = 0;
1837
1838         /*
1839          * Intel recommends entire IP/TCP header length reside in a single
1840          * buffer. If multiple descriptors are used to describe the IP and
1841          * TCP header, each descriptor should describe one or more
1842          * complete headers; descriptors referencing only parts of headers
1843          * are not supported. If all layer headers are not coalesced into
1844          * a single buffer, each buffer should not cross a 4KB boundary,
1845          * or be larger than the maximum read request size.
1846          * Controller also requires modifing IP/TCP header to make TSO work
1847          * so we firstly get a writable mbuf chain then coalesce ethernet/
1848          * IP/TCP header into a single buffer to meet the requirement of
1849          * controller. This also simplifies IP/TCP/UDP checksum offloading
1850          * which also has similiar restrictions.
1851          */
1852         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1853                 if (do_tso || (m_head->m_next != NULL && 
1854                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1855                         if (M_WRITABLE(*m_headp) == 0) {
1856                                 m_head = m_dup(*m_headp, M_NOWAIT);
1857                                 m_freem(*m_headp);
1858                                 if (m_head == NULL) {
1859                                         *m_headp = NULL;
1860                                         return (ENOBUFS);
1861                                 }
1862                                 *m_headp = m_head;
1863                         }
1864                 }
1865                 /*
1866                  * Assume IPv4, we don't have TSO/checksum offload support
1867                  * for IPv6 yet.
1868                  */
1869                 ehdrlen = sizeof(struct ether_header);
1870                 m_head = m_pullup(m_head, ehdrlen);
1871                 if (m_head == NULL) {
1872                         *m_headp = NULL;
1873                         return (ENOBUFS);
1874                 }
1875                 eh = mtod(m_head, struct ether_vlan_header *);
1876                 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1877                         ehdrlen = sizeof(struct ether_vlan_header);
1878                         m_head = m_pullup(m_head, ehdrlen);
1879                         if (m_head == NULL) {
1880                                 *m_headp = NULL;
1881                                 return (ENOBUFS);
1882                         }
1883                 }
1884                 m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1885                 if (m_head == NULL) {
1886                         *m_headp = NULL;
1887                         return (ENOBUFS);
1888                 }
1889                 ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1890                 poff = ehdrlen + (ip->ip_hl << 2);
1891                 if (do_tso) {
1892                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1893                         if (m_head == NULL) {
1894                                 *m_headp = NULL;
1895                                 return (ENOBUFS);
1896                         }
1897                         /*
1898                          * The pseudo TCP checksum does not include TCP payload
1899                          * length so driver should recompute the checksum here
1900                          * what hardware expect to see. This is adherence of
1901                          * Microsoft's Large Send specification.
1902                          */
1903                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1904                         th->th_sum = in_pseudo(ip->ip_src.s_addr,
1905                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1906                         /* Keep track of the full header length */
1907                         hdrlen = poff + (th->th_off << 2);
1908                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1909                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1910                         if (m_head == NULL) {
1911                                 *m_headp = NULL;
1912                                 return (ENOBUFS);
1913                         }
1914                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1915                         m_head = m_pullup(m_head, poff + (th->th_off << 2));
1916                         if (m_head == NULL) {
1917                                 *m_headp = NULL;
1918                                 return (ENOBUFS);
1919                         }
1920                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1921                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1922                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1923                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1924                         if (m_head == NULL) {
1925                                 *m_headp = NULL;
1926                                 return (ENOBUFS);
1927                         }
1928                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1929                 }
1930                 *m_headp = m_head;
1931         }
1932
1933         /*
1934          * Map the packet for DMA
1935          *
1936          * Capture the first descriptor index,
1937          * this descriptor will have the index
1938          * of the EOP which is the only one that
1939          * now gets a DONE bit writeback.
1940          */
1941         first = txr->next_avail_desc;
1942         tx_buffer = &txr->tx_buffers[first];
1943         tx_buffer_mapped = tx_buffer;
1944         map = tx_buffer->map;
1945
1946         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1947             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1948
1949         /*
1950          * There are two types of errors we can (try) to handle:
1951          * - EFBIG means the mbuf chain was too long and bus_dma ran
1952          *   out of segments.  Defragment the mbuf chain and try again.
1953          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1954          *   at this point in time.  Defer sending and try again later.
1955          * All other errors, in particular EINVAL, are fatal and prevent the
1956          * mbuf chain from ever going through.  Drop it and report error.
1957          */
1958         if (error == EFBIG && remap) {
1959                 struct mbuf *m;
1960
1961                 m = m_defrag(*m_headp, M_NOWAIT);
1962                 if (m == NULL) {
1963                         adapter->mbuf_defrag_failed++;
1964                         m_freem(*m_headp);
1965                         *m_headp = NULL;
1966                         return (ENOBUFS);
1967                 }
1968                 *m_headp = m;
1969
1970                 /* Try it again, but only once */
1971                 remap = 0;
1972                 goto retry;
1973         } else if (error == ENOMEM) {
1974                 adapter->no_tx_dma_setup++;
1975                 return (error);
1976         } else if (error != 0) {
1977                 adapter->no_tx_dma_setup++;
1978                 m_freem(*m_headp);
1979                 *m_headp = NULL;
1980                 return (error);
1981         }
1982
1983         /*
1984         ** Make sure we don't overrun the ring,
1985         ** we need nsegs descriptors and one for
1986         ** the context descriptor used for the
1987         ** offloads.
1988         */
1989         if ((nsegs + 1) > (txr->tx_avail - 2)) {
1990                 txr->no_desc_avail++;
1991                 bus_dmamap_unload(txr->txtag, map);
1992                 return (ENOBUFS);
1993         }
1994         m_head = *m_headp;
1995
1996         /* Do hardware assists:
1997          * Set up the context descriptor, used
1998          * when any hardware offload is done.
1999          * This includes CSUM, VLAN, and TSO.
2000          * It will use the first descriptor.
2001          */
2002
2003         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2004                 if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
2005                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2006                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2007                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2008                 } else
2009                         return (ENXIO);
2010         } else if (igb_tx_ctx_setup(txr, m_head))
2011                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2012
2013         /* Calculate payload length */
2014         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2015             << E1000_ADVTXD_PAYLEN_SHIFT);
2016
2017         /* 82575 needs the queue index added */
2018         if (adapter->hw.mac.type == e1000_82575)
2019                 olinfo_status |= txr->me << 4;
2020
2021         /* Set up our transmit descriptors */
2022         i = txr->next_avail_desc;
2023         for (int j = 0; j < nsegs; j++) {
2024                 bus_size_t seg_len;
2025                 bus_addr_t seg_addr;
2026
2027                 tx_buffer = &txr->tx_buffers[i];
2028                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2029                 seg_addr = segs[j].ds_addr;
2030                 seg_len  = segs[j].ds_len;
2031
2032                 txd->read.buffer_addr = htole64(seg_addr);
2033                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2034                 txd->read.olinfo_status = htole32(olinfo_status);
2035                 last = i;
2036                 if (++i == adapter->num_tx_desc)
2037                         i = 0;
2038                 tx_buffer->m_head = NULL;
2039                 tx_buffer->next_eop = -1;
2040         }
2041
2042         txr->next_avail_desc = i;
2043         txr->tx_avail -= nsegs;
2044         tx_buffer->m_head = m_head;
2045
2046         /*
2047         ** Here we swap the map so the last descriptor,
2048         ** which gets the completion interrupt has the
2049         ** real map, and the first descriptor gets the
2050         ** unused map from this descriptor.
2051         */
2052         tx_buffer_mapped->map = tx_buffer->map;
2053         tx_buffer->map = map;
2054         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2055
2056         /*
2057          * Last Descriptor of Packet
2058          * needs End Of Packet (EOP)
2059          * and Report Status (RS)
2060          */
2061         txd->read.cmd_type_len |=
2062             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2063         /*
2064          * Keep track in the first buffer which
2065          * descriptor will be written back
2066          */
2067         tx_buffer = &txr->tx_buffers[first];
2068         tx_buffer->next_eop = last;
2069         /* Update the watchdog time early and often */
2070         txr->watchdog_time = ticks;
2071
2072         /*
2073          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2074          * that this frame is available to transmit.
2075          */
2076         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2077             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2078         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2079         ++txr->tx_packets;
2080
2081         return (0);
2082 }
2083 static void
2084 igb_set_promisc(struct adapter *adapter)
2085 {
2086         struct ifnet    *ifp = adapter->ifp;
2087         struct e1000_hw *hw = &adapter->hw;
2088         u32             reg;
2089
2090         if (adapter->vf_ifp) {
2091                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2092                 return;
2093         }
2094
2095         reg = E1000_READ_REG(hw, E1000_RCTL);
2096         if (ifp->if_flags & IFF_PROMISC) {
2097                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2098                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2099         } else if (ifp->if_flags & IFF_ALLMULTI) {
2100                 reg |= E1000_RCTL_MPE;
2101                 reg &= ~E1000_RCTL_UPE;
2102                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2103         }
2104 }
2105
2106 static void
2107 igb_disable_promisc(struct adapter *adapter)
2108 {
2109         struct e1000_hw *hw = &adapter->hw;
2110         struct ifnet    *ifp = adapter->ifp;
2111         u32             reg;
2112         int             mcnt = 0;
2113
2114         if (adapter->vf_ifp) {
2115                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2116                 return;
2117         }
2118         reg = E1000_READ_REG(hw, E1000_RCTL);
2119         reg &=  (~E1000_RCTL_UPE);
2120         if (ifp->if_flags & IFF_ALLMULTI)
2121                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2122         else {
2123                 struct  ifmultiaddr *ifma;
2124 #if __FreeBSD_version < 800000
2125                 IF_ADDR_LOCK(ifp);
2126 #else   
2127                 if_maddr_rlock(ifp);
2128 #endif
2129                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2130                         if (ifma->ifma_addr->sa_family != AF_LINK)
2131                                 continue;
2132                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2133                                 break;
2134                         mcnt++;
2135                 }
2136 #if __FreeBSD_version < 800000
2137                 IF_ADDR_UNLOCK(ifp);
2138 #else
2139                 if_maddr_runlock(ifp);
2140 #endif
2141         }
2142         /* Don't disable if in MAX groups */
2143         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2144                 reg &=  (~E1000_RCTL_MPE);
2145         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2146 }
2147
2148
2149 /*********************************************************************
2150  *  Multicast Update
2151  *
2152  *  This routine is called whenever multicast address list is updated.
2153  *
2154  **********************************************************************/
2155
2156 static void
2157 igb_set_multi(struct adapter *adapter)
2158 {
2159         struct ifnet    *ifp = adapter->ifp;
2160         struct ifmultiaddr *ifma;
2161         u32 reg_rctl = 0;
2162         u8  *mta;
2163
2164         int mcnt = 0;
2165
2166         IOCTL_DEBUGOUT("igb_set_multi: begin");
2167
2168         mta = adapter->mta;
2169         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2170             MAX_NUM_MULTICAST_ADDRESSES);
2171
2172 #if __FreeBSD_version < 800000
2173         IF_ADDR_LOCK(ifp);
2174 #else
2175         if_maddr_rlock(ifp);
2176 #endif
2177         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2178                 if (ifma->ifma_addr->sa_family != AF_LINK)
2179                         continue;
2180
2181                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2182                         break;
2183
2184                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2185                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2186                 mcnt++;
2187         }
2188 #if __FreeBSD_version < 800000
2189         IF_ADDR_UNLOCK(ifp);
2190 #else
2191         if_maddr_runlock(ifp);
2192 #endif
2193
2194         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2195                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2196                 reg_rctl |= E1000_RCTL_MPE;
2197                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2198         } else
2199                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2200 }
2201
2202
2203 /*********************************************************************
2204  *  Timer routine:
2205  *      This routine checks for link status,
2206  *      updates statistics, and does the watchdog.
2207  *
2208  **********************************************************************/
2209
2210 static void
2211 igb_local_timer(void *arg)
2212 {
2213         struct adapter          *adapter = arg;
2214         device_t                dev = adapter->dev;
2215         struct ifnet            *ifp = adapter->ifp;
2216         struct tx_ring          *txr = adapter->tx_rings;
2217         struct igb_queue        *que = adapter->queues;
2218         int                     hung = 0, busy = 0;
2219
2220
2221         IGB_CORE_LOCK_ASSERT(adapter);
2222
2223         igb_update_link_status(adapter);
2224         igb_update_stats_counters(adapter);
2225
2226         /*
2227         ** Check the TX queues status
2228         **      - central locked handling of OACTIVE
2229         **      - watchdog only if all queues show hung
2230         */
2231         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2232                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2233                     (adapter->pause_frames == 0))
2234                         ++hung;
2235                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2236                         ++busy;
2237                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2238                         taskqueue_enqueue(que->tq, &que->que_task);
2239         }
2240         if (hung == adapter->num_queues)
2241                 goto timeout;
2242         if (busy == adapter->num_queues)
2243                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2244         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2245             (busy < adapter->num_queues))
2246                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2247
2248         adapter->pause_frames = 0;
2249         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2250 #ifndef DEVICE_POLLING
2251         /* Schedule all queue interrupts - deadlock protection */
2252         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2253 #endif
2254         return;
2255
2256 timeout:
2257         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2258         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2259             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2260             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2261         device_printf(dev,"TX(%d) desc avail = %d,"
2262             "Next TX to Clean = %d\n",
2263             txr->me, txr->tx_avail, txr->next_to_clean);
2264         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2265         adapter->watchdog_events++;
2266         igb_init_locked(adapter);
2267 }
2268
2269 static void
2270 igb_update_link_status(struct adapter *adapter)
2271 {
2272         struct e1000_hw         *hw = &adapter->hw;
2273         struct e1000_fc_info    *fc = &hw->fc;
2274         struct ifnet            *ifp = adapter->ifp;
2275         device_t                dev = adapter->dev;
2276         struct tx_ring          *txr = adapter->tx_rings;
2277         u32                     link_check, thstat, ctrl;
2278         char                    *flowctl = NULL;
2279
2280         link_check = thstat = ctrl = 0;
2281
2282         /* Get the cached link value or read for real */
2283         switch (hw->phy.media_type) {
2284         case e1000_media_type_copper:
2285                 if (hw->mac.get_link_status) {
2286                         /* Do the work to read phy */
2287                         e1000_check_for_link(hw);
2288                         link_check = !hw->mac.get_link_status;
2289                 } else
2290                         link_check = TRUE;
2291                 break;
2292         case e1000_media_type_fiber:
2293                 e1000_check_for_link(hw);
2294                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2295                                  E1000_STATUS_LU);
2296                 break;
2297         case e1000_media_type_internal_serdes:
2298                 e1000_check_for_link(hw);
2299                 link_check = adapter->hw.mac.serdes_has_link;
2300                 break;
2301         /* VF device is type_unknown */
2302         case e1000_media_type_unknown:
2303                 e1000_check_for_link(hw);
2304                 link_check = !hw->mac.get_link_status;
2305                 /* Fall thru */
2306         default:
2307                 break;
2308         }
2309
2310         /* Check for thermal downshift or shutdown */
2311         if (hw->mac.type == e1000_i350) {
2312                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2313                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2314         }
2315
2316         /* Get the flow control for display */
2317         switch (fc->current_mode) {
2318         case e1000_fc_rx_pause:
2319                 flowctl = "RX";
2320                 break;  
2321         case e1000_fc_tx_pause:
2322                 flowctl = "TX";
2323                 break;  
2324         case e1000_fc_full:
2325                 flowctl = "Full";
2326                 break;  
2327         case e1000_fc_none:
2328         default:
2329                 flowctl = "None";
2330                 break;  
2331         }
2332
2333         /* Now we check if a transition has happened */
2334         if (link_check && (adapter->link_active == 0)) {
2335                 e1000_get_speed_and_duplex(&adapter->hw, 
2336                     &adapter->link_speed, &adapter->link_duplex);
2337                 if (bootverbose)
2338                         device_printf(dev, "Link is up %d Mbps %s,"
2339                             " Flow Control: %s\n",
2340                             adapter->link_speed,
2341                             ((adapter->link_duplex == FULL_DUPLEX) ?
2342                             "Full Duplex" : "Half Duplex"), flowctl);
2343                 adapter->link_active = 1;
2344                 ifp->if_baudrate = adapter->link_speed * 1000000;
2345                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2346                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2347                         device_printf(dev, "Link: thermal downshift\n");
2348                 /* This can sleep */
2349                 if_link_state_change(ifp, LINK_STATE_UP);
2350         } else if (!link_check && (adapter->link_active == 1)) {
2351                 ifp->if_baudrate = adapter->link_speed = 0;
2352                 adapter->link_duplex = 0;
2353                 if (bootverbose)
2354                         device_printf(dev, "Link is Down\n");
2355                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2356                     (thstat & E1000_THSTAT_PWR_DOWN))
2357                         device_printf(dev, "Link: thermal shutdown\n");
2358                 adapter->link_active = 0;
2359                 /* This can sleep */
2360                 if_link_state_change(ifp, LINK_STATE_DOWN);
2361                 /* Reset queue state */
2362                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2363                         txr->queue_status = IGB_QUEUE_IDLE;
2364         }
2365 }
2366
2367 /*********************************************************************
2368  *
2369  *  This routine disables all traffic on the adapter by issuing a
2370  *  global reset on the MAC and deallocates TX/RX buffers.
2371  *
2372  **********************************************************************/
2373
2374 static void
2375 igb_stop(void *arg)
2376 {
2377         struct adapter  *adapter = arg;
2378         struct ifnet    *ifp = adapter->ifp;
2379         struct tx_ring *txr = adapter->tx_rings;
2380
2381         IGB_CORE_LOCK_ASSERT(adapter);
2382
2383         INIT_DEBUGOUT("igb_stop: begin");
2384
2385         igb_disable_intr(adapter);
2386
2387         callout_stop(&adapter->timer);
2388
2389         /* Tell the stack that the interface is no longer active */
2390         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2391         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2392
2393         /* Disarm watchdog timer. */
2394         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2395                 IGB_TX_LOCK(txr);
2396                 txr->queue_status = IGB_QUEUE_IDLE;
2397                 IGB_TX_UNLOCK(txr);
2398         }
2399
2400         e1000_reset_hw(&adapter->hw);
2401         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2402
2403         e1000_led_off(&adapter->hw);
2404         e1000_cleanup_led(&adapter->hw);
2405 }
2406
2407
2408 /*********************************************************************
2409  *
2410  *  Determine hardware revision.
2411  *
2412  **********************************************************************/
2413 static void
2414 igb_identify_hardware(struct adapter *adapter)
2415 {
2416         device_t dev = adapter->dev;
2417
2418         /* Make sure our PCI config space has the necessary stuff set */
2419         pci_enable_busmaster(dev);
2420         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2421
2422         /* Save off the information about this board */
2423         adapter->hw.vendor_id = pci_get_vendor(dev);
2424         adapter->hw.device_id = pci_get_device(dev);
2425         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2426         adapter->hw.subsystem_vendor_id =
2427             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2428         adapter->hw.subsystem_device_id =
2429             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2430
2431         /* Set MAC type early for PCI setup */
2432         e1000_set_mac_type(&adapter->hw);
2433
2434         /* Are we a VF device? */
2435         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2436             (adapter->hw.mac.type == e1000_vfadapt_i350))
2437                 adapter->vf_ifp = 1;
2438         else
2439                 adapter->vf_ifp = 0;
2440 }
2441
2442 static int
2443 igb_allocate_pci_resources(struct adapter *adapter)
2444 {
2445         device_t        dev = adapter->dev;
2446         int             rid;
2447
2448         rid = PCIR_BAR(0);
2449         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2450             &rid, RF_ACTIVE);
2451         if (adapter->pci_mem == NULL) {
2452                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2453                 return (ENXIO);
2454         }
2455         adapter->osdep.mem_bus_space_tag =
2456             rman_get_bustag(adapter->pci_mem);
2457         adapter->osdep.mem_bus_space_handle =
2458             rman_get_bushandle(adapter->pci_mem);
2459         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2460
2461         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2462
2463         /* This will setup either MSI/X or MSI */
2464         adapter->msix = igb_setup_msix(adapter);
2465         adapter->hw.back = &adapter->osdep;
2466
2467         return (0);
2468 }
2469
2470 /*********************************************************************
2471  *
2472  *  Setup the Legacy or MSI Interrupt handler
2473  *
2474  **********************************************************************/
2475 static int
2476 igb_allocate_legacy(struct adapter *adapter)
2477 {
2478         device_t                dev = adapter->dev;
2479         struct igb_queue        *que = adapter->queues;
2480         int                     error, rid = 0;
2481
2482         /* Turn off all interrupts */
2483         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2484
2485         /* MSI RID is 1 */
2486         if (adapter->msix == 1)
2487                 rid = 1;
2488
2489         /* We allocate a single interrupt resource */
2490         adapter->res = bus_alloc_resource_any(dev,
2491             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2492         if (adapter->res == NULL) {
2493                 device_printf(dev, "Unable to allocate bus resource: "
2494                     "interrupt\n");
2495                 return (ENXIO);
2496         }
2497
2498 #ifndef IGB_LEGACY_TX
2499         TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr);
2500 #endif
2501
2502         /*
2503          * Try allocating a fast interrupt and the associated deferred
2504          * processing contexts.
2505          */
2506         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2507         /* Make tasklet for deferred link handling */
2508         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2509         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2510             taskqueue_thread_enqueue, &que->tq);
2511         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2512             device_get_nameunit(adapter->dev));
2513         if ((error = bus_setup_intr(dev, adapter->res,
2514             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2515             adapter, &adapter->tag)) != 0) {
2516                 device_printf(dev, "Failed to register fast interrupt "
2517                             "handler: %d\n", error);
2518                 taskqueue_free(que->tq);
2519                 que->tq = NULL;
2520                 return (error);
2521         }
2522
2523         return (0);
2524 }
2525
2526
2527 /*********************************************************************
2528  *
2529  *  Setup the MSIX Queue Interrupt handlers: 
2530  *
2531  **********************************************************************/
2532 static int
2533 igb_allocate_msix(struct adapter *adapter)
2534 {
2535         device_t                dev = adapter->dev;
2536         struct igb_queue        *que = adapter->queues;
2537         int                     error, rid, vector = 0;
2538
2539         /* Be sure to start with all interrupts disabled */
2540         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2541         E1000_WRITE_FLUSH(&adapter->hw);
2542
2543         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2544                 rid = vector +1;
2545                 que->res = bus_alloc_resource_any(dev,
2546                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2547                 if (que->res == NULL) {
2548                         device_printf(dev,
2549                             "Unable to allocate bus resource: "
2550                             "MSIX Queue Interrupt\n");
2551                         return (ENXIO);
2552                 }
2553                 error = bus_setup_intr(dev, que->res,
2554                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2555                     igb_msix_que, que, &que->tag);
2556                 if (error) {
2557                         que->res = NULL;
2558                         device_printf(dev, "Failed to register Queue handler");
2559                         return (error);
2560                 }
2561 #if __FreeBSD_version >= 800504
2562                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2563 #endif
2564                 que->msix = vector;
2565                 if (adapter->hw.mac.type == e1000_82575)
2566                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2567                 else
2568                         que->eims = 1 << vector;
2569                 /*
2570                 ** Bind the msix vector, and thus the
2571                 ** rings to the corresponding cpu.
2572                 */
2573                 if (adapter->num_queues > 1) {
2574                         if (igb_last_bind_cpu < 0)
2575                                 igb_last_bind_cpu = CPU_FIRST();
2576                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2577                         device_printf(dev,
2578                                 "Bound queue %d to cpu %d\n",
2579                                 i,igb_last_bind_cpu);
2580                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2581                 }
2582 #ifndef IGB_LEGACY_TX
2583                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2584                     que->txr);
2585 #endif
2586                 /* Make tasklet for deferred handling */
2587                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2588                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2589                     taskqueue_thread_enqueue, &que->tq);
2590                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2591                     device_get_nameunit(adapter->dev));
2592         }
2593
2594         /* And Link */
2595         rid = vector + 1;
2596         adapter->res = bus_alloc_resource_any(dev,
2597             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2598         if (adapter->res == NULL) {
2599                 device_printf(dev,
2600                     "Unable to allocate bus resource: "
2601                     "MSIX Link Interrupt\n");
2602                 return (ENXIO);
2603         }
2604         if ((error = bus_setup_intr(dev, adapter->res,
2605             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2606             igb_msix_link, adapter, &adapter->tag)) != 0) {
2607                 device_printf(dev, "Failed to register Link handler");
2608                 return (error);
2609         }
2610 #if __FreeBSD_version >= 800504
2611         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2612 #endif
2613         adapter->linkvec = vector;
2614
2615         return (0);
2616 }
2617
2618
2619 static void
2620 igb_configure_queues(struct adapter *adapter)
2621 {
2622         struct  e1000_hw        *hw = &adapter->hw;
2623         struct  igb_queue       *que;
2624         u32                     tmp, ivar = 0, newitr = 0;
2625
2626         /* First turn on RSS capability */
2627         if (adapter->hw.mac.type != e1000_82575)
2628                 E1000_WRITE_REG(hw, E1000_GPIE,
2629                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2630                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2631
2632         /* Turn on MSIX */
2633         switch (adapter->hw.mac.type) {
2634         case e1000_82580:
2635         case e1000_i350:
2636         case e1000_i210:
2637         case e1000_i211:
2638         case e1000_vfadapt:
2639         case e1000_vfadapt_i350:
2640                 /* RX entries */
2641                 for (int i = 0; i < adapter->num_queues; i++) {
2642                         u32 index = i >> 1;
2643                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2644                         que = &adapter->queues[i];
2645                         if (i & 1) {
2646                                 ivar &= 0xFF00FFFF;
2647                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2648                         } else {
2649                                 ivar &= 0xFFFFFF00;
2650                                 ivar |= que->msix | E1000_IVAR_VALID;
2651                         }
2652                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2653                 }
2654                 /* TX entries */
2655                 for (int i = 0; i < adapter->num_queues; i++) {
2656                         u32 index = i >> 1;
2657                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2658                         que = &adapter->queues[i];
2659                         if (i & 1) {
2660                                 ivar &= 0x00FFFFFF;
2661                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2662                         } else {
2663                                 ivar &= 0xFFFF00FF;
2664                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2665                         }
2666                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2667                         adapter->que_mask |= que->eims;
2668                 }
2669
2670                 /* And for the link interrupt */
2671                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2672                 adapter->link_mask = 1 << adapter->linkvec;
2673                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2674                 break;
2675         case e1000_82576:
2676                 /* RX entries */
2677                 for (int i = 0; i < adapter->num_queues; i++) {
2678                         u32 index = i & 0x7; /* Each IVAR has two entries */
2679                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2680                         que = &adapter->queues[i];
2681                         if (i < 8) {
2682                                 ivar &= 0xFFFFFF00;
2683                                 ivar |= que->msix | E1000_IVAR_VALID;
2684                         } else {
2685                                 ivar &= 0xFF00FFFF;
2686                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2687                         }
2688                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2689                         adapter->que_mask |= que->eims;
2690                 }
2691                 /* TX entries */
2692                 for (int i = 0; i < adapter->num_queues; i++) {
2693                         u32 index = i & 0x7; /* Each IVAR has two entries */
2694                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2695                         que = &adapter->queues[i];
2696                         if (i < 8) {
2697                                 ivar &= 0xFFFF00FF;
2698                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2699                         } else {
2700                                 ivar &= 0x00FFFFFF;
2701                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2702                         }
2703                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2704                         adapter->que_mask |= que->eims;
2705                 }
2706
2707                 /* And for the link interrupt */
2708                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2709                 adapter->link_mask = 1 << adapter->linkvec;
2710                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2711                 break;
2712
2713         case e1000_82575:
2714                 /* enable MSI-X support*/
2715                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2716                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2717                 /* Auto-Mask interrupts upon ICR read. */
2718                 tmp |= E1000_CTRL_EXT_EIAME;
2719                 tmp |= E1000_CTRL_EXT_IRCA;
2720                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2721
2722                 /* Queues */
2723                 for (int i = 0; i < adapter->num_queues; i++) {
2724                         que = &adapter->queues[i];
2725                         tmp = E1000_EICR_RX_QUEUE0 << i;
2726                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2727                         que->eims = tmp;
2728                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2729                             i, que->eims);
2730                         adapter->que_mask |= que->eims;
2731                 }
2732
2733                 /* Link */
2734                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2735                     E1000_EIMS_OTHER);
2736                 adapter->link_mask |= E1000_EIMS_OTHER;
2737         default:
2738                 break;
2739         }
2740
2741         /* Set the starting interrupt rate */
2742         if (igb_max_interrupt_rate > 0)
2743                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2744
2745         if (hw->mac.type == e1000_82575)
2746                 newitr |= newitr << 16;
2747         else
2748                 newitr |= E1000_EITR_CNT_IGNR;
2749
2750         for (int i = 0; i < adapter->num_queues; i++) {
2751                 que = &adapter->queues[i];
2752                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2753         }
2754
2755         return;
2756 }
2757
2758
2759 static void
2760 igb_free_pci_resources(struct adapter *adapter)
2761 {
2762         struct          igb_queue *que = adapter->queues;
2763         device_t        dev = adapter->dev;
2764         int             rid;
2765
2766         /*
2767         ** There is a slight possibility of a failure mode
2768         ** in attach that will result in entering this function
2769         ** before interrupt resources have been initialized, and
2770         ** in that case we do not want to execute the loops below
2771         ** We can detect this reliably by the state of the adapter
2772         ** res pointer.
2773         */
2774         if (adapter->res == NULL)
2775                 goto mem;
2776
2777         /*
2778          * First release all the interrupt resources:
2779          */
2780         for (int i = 0; i < adapter->num_queues; i++, que++) {
2781                 rid = que->msix + 1;
2782                 if (que->tag != NULL) {
2783                         bus_teardown_intr(dev, que->res, que->tag);
2784                         que->tag = NULL;
2785                 }
2786                 if (que->res != NULL)
2787                         bus_release_resource(dev,
2788                             SYS_RES_IRQ, rid, que->res);
2789         }
2790
2791         /* Clean the Legacy or Link interrupt last */
2792         if (adapter->linkvec) /* we are doing MSIX */
2793                 rid = adapter->linkvec + 1;
2794         else
2795                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2796
2797         que = adapter->queues;
2798         if (adapter->tag != NULL) {
2799                 taskqueue_drain(que->tq, &adapter->link_task);
2800                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2801                 adapter->tag = NULL;
2802         }
2803         if (adapter->res != NULL)
2804                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2805
2806         for (int i = 0; i < adapter->num_queues; i++, que++) {
2807                 if (que->tq != NULL) {
2808 #ifndef IGB_LEGACY_TX
2809                         taskqueue_drain(que->tq, &que->txr->txq_task);
2810 #endif
2811                         taskqueue_drain(que->tq, &que->que_task);
2812                         taskqueue_free(que->tq);
2813                 }
2814         }
2815 mem:
2816         if (adapter->msix)
2817                 pci_release_msi(dev);
2818
2819         if (adapter->msix_mem != NULL)
2820                 bus_release_resource(dev, SYS_RES_MEMORY,
2821                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2822
2823         if (adapter->pci_mem != NULL)
2824                 bus_release_resource(dev, SYS_RES_MEMORY,
2825                     PCIR_BAR(0), adapter->pci_mem);
2826
2827 }
2828
2829 /*
2830  * Setup Either MSI/X or MSI
2831  */
2832 static int
2833 igb_setup_msix(struct adapter *adapter)
2834 {
2835         device_t dev = adapter->dev;
2836         int rid, want, queues, msgs, maxqueues;
2837
2838         /* tuneable override */
2839         if (igb_enable_msix == 0)
2840                 goto msi;
2841
2842         /* First try MSI/X */
2843         msgs = pci_msix_count(dev); 
2844         if (msgs == 0)
2845                 goto msi;
2846         rid = PCIR_BAR(IGB_MSIX_BAR);
2847         adapter->msix_mem = bus_alloc_resource_any(dev,
2848             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2849         if (adapter->msix_mem == NULL) {
2850                 /* May not be enabled */
2851                 device_printf(adapter->dev,
2852                     "Unable to map MSIX table \n");
2853                 goto msi;
2854         }
2855
2856         /* Figure out a reasonable auto config value */
2857         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2858
2859         /* Manual override */
2860         if (igb_num_queues != 0)
2861                 queues = igb_num_queues;
2862
2863         /* Sanity check based on HW */
2864         switch (adapter->hw.mac.type) {
2865                 case e1000_82575:
2866                         maxqueues = 4;
2867                         break;
2868                 case e1000_82576:
2869                 case e1000_82580:
2870                 case e1000_i350:
2871                         maxqueues = 8;
2872                         break;
2873                 case e1000_i210:
2874                         maxqueues = 4;
2875                         break;
2876                 case e1000_i211:
2877                         maxqueues = 2;
2878                         break;
2879                 default:  /* VF interfaces */
2880                         maxqueues = 1;
2881                         break;
2882         }
2883         if (queues > maxqueues)
2884                 queues = maxqueues;
2885
2886         /* reflect correct sysctl value */
2887         igb_num_queues = queues;
2888
2889         /*
2890         ** One vector (RX/TX pair) per queue
2891         ** plus an additional for Link interrupt
2892         */
2893         want = queues + 1;
2894         if (msgs >= want)
2895                 msgs = want;
2896         else {
2897                 device_printf(adapter->dev,
2898                     "MSIX Configuration Problem, "
2899                     "%d vectors configured, but %d queues wanted!\n",
2900                     msgs, want);
2901                 goto msi;
2902         }
2903         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2904                 device_printf(adapter->dev,
2905                     "Using MSIX interrupts with %d vectors\n", msgs);
2906                 adapter->num_queues = queues;
2907                 return (msgs);
2908         }
2909         /*
2910         ** If MSIX alloc failed or provided us with
2911         ** less than needed, free and fall through to MSI
2912         */
2913         pci_release_msi(dev);
2914
2915 msi:
2916         if (adapter->msix_mem != NULL) {
2917                 bus_release_resource(dev, SYS_RES_MEMORY,
2918                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2919                 adapter->msix_mem = NULL;
2920         }
2921         msgs = 1;
2922         if (pci_alloc_msi(dev, &msgs) == 0) {
2923                 device_printf(adapter->dev," Using an MSI interrupt\n");
2924                 return (msgs);
2925         }
2926         device_printf(adapter->dev," Using a Legacy interrupt\n");
2927         return (0);
2928 }
2929
2930 /*********************************************************************
2931  *
2932  *  Set up an fresh starting state
2933  *
2934  **********************************************************************/
2935 static void
2936 igb_reset(struct adapter *adapter)
2937 {
2938         device_t        dev = adapter->dev;
2939         struct e1000_hw *hw = &adapter->hw;
2940         struct e1000_fc_info *fc = &hw->fc;
2941         struct ifnet    *ifp = adapter->ifp;
2942         u32             pba = 0;
2943         u16             hwm;
2944
2945         INIT_DEBUGOUT("igb_reset: begin");
2946
2947         /* Let the firmware know the OS is in control */
2948         igb_get_hw_control(adapter);
2949
2950         /*
2951          * Packet Buffer Allocation (PBA)
2952          * Writing PBA sets the receive portion of the buffer
2953          * the remainder is used for the transmit buffer.
2954          */
2955         switch (hw->mac.type) {
2956         case e1000_82575:
2957                 pba = E1000_PBA_32K;
2958                 break;
2959         case e1000_82576:
2960         case e1000_vfadapt:
2961                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2962                 pba &= E1000_RXPBS_SIZE_MASK_82576;
2963                 break;
2964         case e1000_82580:
2965         case e1000_i350:
2966         case e1000_vfadapt_i350:
2967                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2968                 pba = e1000_rxpbs_adjust_82580(pba);
2969                 break;
2970         case e1000_i210:
2971         case e1000_i211:
2972                 pba = E1000_PBA_34K;
2973         default:
2974                 break;
2975         }
2976
2977         /* Special needs in case of Jumbo frames */
2978         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2979                 u32 tx_space, min_tx, min_rx;
2980                 pba = E1000_READ_REG(hw, E1000_PBA);
2981                 tx_space = pba >> 16;
2982                 pba &= 0xffff;
2983                 min_tx = (adapter->max_frame_size +
2984                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2985                 min_tx = roundup2(min_tx, 1024);
2986                 min_tx >>= 10;
2987                 min_rx = adapter->max_frame_size;
2988                 min_rx = roundup2(min_rx, 1024);
2989                 min_rx >>= 10;
2990                 if (tx_space < min_tx &&
2991                     ((min_tx - tx_space) < pba)) {
2992                         pba = pba - (min_tx - tx_space);
2993                         /*
2994                          * if short on rx space, rx wins
2995                          * and must trump tx adjustment
2996                          */
2997                         if (pba < min_rx)
2998                                 pba = min_rx;
2999                 }
3000                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3001         }
3002
3003         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3004
3005         /*
3006          * These parameters control the automatic generation (Tx) and
3007          * response (Rx) to Ethernet PAUSE frames.
3008          * - High water mark should allow for at least two frames to be
3009          *   received after sending an XOFF.
3010          * - Low water mark works best when it is very near the high water mark.
3011          *   This allows the receiver to restart by sending XON when it has
3012          *   drained a bit.
3013          */
3014         hwm = min(((pba << 10) * 9 / 10),
3015             ((pba << 10) - 2 * adapter->max_frame_size));
3016
3017         if (hw->mac.type < e1000_82576) {
3018                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3019                 fc->low_water = fc->high_water - 8;
3020         } else {
3021                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3022                 fc->low_water = fc->high_water - 16;
3023         }
3024
3025         fc->pause_time = IGB_FC_PAUSE_TIME;
3026         fc->send_xon = TRUE;
3027         if (adapter->fc)
3028                 fc->requested_mode = adapter->fc;
3029         else
3030                 fc->requested_mode = e1000_fc_default;
3031
3032         /* Issue a global reset */
3033         e1000_reset_hw(hw);
3034         E1000_WRITE_REG(hw, E1000_WUC, 0);
3035
3036         if (e1000_init_hw(hw) < 0)
3037                 device_printf(dev, "Hardware Initialization Failed\n");
3038
3039         /* Setup DMA Coalescing */
3040         if ((hw->mac.type > e1000_82580) &&
3041             (hw->mac.type != e1000_i211)) {
3042                 u32 dmac;
3043                 u32 reg = ~E1000_DMACR_DMAC_EN;
3044
3045                 if (adapter->dmac == 0) { /* Disabling it */
3046                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
3047                         goto reset_out;
3048                 }
3049
3050                 /* Set starting thresholds */
3051                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
3052                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3053
3054                 hwm = 64 * pba - adapter->max_frame_size / 16;
3055                 if (hwm < 64 * (pba - 6))
3056                         hwm = 64 * (pba - 6);
3057                 reg = E1000_READ_REG(hw, E1000_FCRTC);
3058                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3059                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3060                     & E1000_FCRTC_RTH_COAL_MASK);
3061                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3062
3063
3064                 dmac = pba - adapter->max_frame_size / 512;
3065                 if (dmac < pba - 10)
3066                         dmac = pba - 10;
3067                 reg = E1000_READ_REG(hw, E1000_DMACR);
3068                 reg &= ~E1000_DMACR_DMACTHR_MASK;
3069                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3070                     & E1000_DMACR_DMACTHR_MASK);
3071                 /* transition to L0x or L1 if available..*/
3072                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3073                 /* timer = value in adapter->dmac in 32usec intervals */
3074                 reg |= (adapter->dmac >> 5);
3075                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
3076
3077                 /* Set the interval before transition */
3078                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
3079                 reg |= 0x80000004;
3080                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3081
3082                 /* free space in tx packet buffer to wake from DMA coal */
3083                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
3084                     (20480 - (2 * adapter->max_frame_size)) >> 6);
3085
3086                 /* make low power state decision controlled by DMA coal */
3087                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3088                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3089                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3090                 device_printf(dev, "DMA Coalescing enabled\n");
3091
3092         } else if (hw->mac.type == e1000_82580) {
3093                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3094                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3095                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3096                     reg & ~E1000_PCIEMISC_LX_DECISION);
3097         }
3098
3099 reset_out:
3100         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3101         e1000_get_phy_info(hw);
3102         e1000_check_for_link(hw);
3103         return;
3104 }
3105
3106 /*********************************************************************
3107  *
3108  *  Setup networking device structure and register an interface.
3109  *
3110  **********************************************************************/
3111 static int
3112 igb_setup_interface(device_t dev, struct adapter *adapter)
3113 {
3114         struct ifnet   *ifp;
3115
3116         INIT_DEBUGOUT("igb_setup_interface: begin");
3117
3118         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3119         if (ifp == NULL) {
3120                 device_printf(dev, "can not allocate ifnet structure\n");
3121                 return (-1);
3122         }
3123         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3124         ifp->if_init =  igb_init;
3125         ifp->if_softc = adapter;
3126         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3127         ifp->if_ioctl = igb_ioctl;
3128 #ifndef IGB_LEGACY_TX
3129         ifp->if_transmit = igb_mq_start;
3130         ifp->if_qflush = igb_qflush;
3131 #else
3132         ifp->if_start = igb_start;
3133         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3134         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3135         IFQ_SET_READY(&ifp->if_snd);
3136 #endif
3137
3138         ether_ifattach(ifp, adapter->hw.mac.addr);
3139
3140         ifp->if_capabilities = ifp->if_capenable = 0;
3141
3142         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3143         ifp->if_capabilities |= IFCAP_TSO4;
3144         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3145         ifp->if_capenable = ifp->if_capabilities;
3146
3147         /* Don't enable LRO by default */
3148         ifp->if_capabilities |= IFCAP_LRO;
3149
3150 #ifdef DEVICE_POLLING
3151         ifp->if_capabilities |= IFCAP_POLLING;
3152 #endif
3153
3154         /*
3155          * Tell the upper layer(s) we
3156          * support full VLAN capability.
3157          */
3158         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3159         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3160                              |  IFCAP_VLAN_HWTSO
3161                              |  IFCAP_VLAN_MTU;
3162         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3163                           |  IFCAP_VLAN_HWTSO
3164                           |  IFCAP_VLAN_MTU;
3165
3166         /*
3167         ** Don't turn this on by default, if vlans are
3168         ** created on another pseudo device (eg. lagg)
3169         ** then vlan events are not passed thru, breaking
3170         ** operation, but with HW FILTER off it works. If
3171         ** using vlans directly on the igb driver you can
3172         ** enable this and get full hardware tag filtering.
3173         */
3174         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3175
3176         /*
3177          * Specify the media types supported by this adapter and register
3178          * callbacks to update media and link information
3179          */
3180         ifmedia_init(&adapter->media, IFM_IMASK,
3181             igb_media_change, igb_media_status);
3182         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3183             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3184                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3185                             0, NULL);
3186                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3187         } else {
3188                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3189                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3190                             0, NULL);
3191                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3192                             0, NULL);
3193                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3194                             0, NULL);
3195                 if (adapter->hw.phy.type != e1000_phy_ife) {
3196                         ifmedia_add(&adapter->media,
3197                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3198                         ifmedia_add(&adapter->media,
3199                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3200                 }
3201         }
3202         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3203         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3204         return (0);
3205 }
3206
3207
3208 /*
3209  * Manage DMA'able memory.
3210  */
3211 static void
3212 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3213 {
3214         if (error)
3215                 return;
3216         *(bus_addr_t *) arg = segs[0].ds_addr;
3217 }
3218
3219 static int
3220 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3221         struct igb_dma_alloc *dma, int mapflags)
3222 {
3223         int error;
3224
3225         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3226                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3227                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3228                                 BUS_SPACE_MAXADDR,      /* highaddr */
3229                                 NULL, NULL,             /* filter, filterarg */
3230                                 size,                   /* maxsize */
3231                                 1,                      /* nsegments */
3232                                 size,                   /* maxsegsize */
3233                                 0,                      /* flags */
3234                                 NULL,                   /* lockfunc */
3235                                 NULL,                   /* lockarg */
3236                                 &dma->dma_tag);
3237         if (error) {
3238                 device_printf(adapter->dev,
3239                     "%s: bus_dma_tag_create failed: %d\n",
3240                     __func__, error);
3241                 goto fail_0;
3242         }
3243
3244         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3245             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3246         if (error) {
3247                 device_printf(adapter->dev,
3248                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3249                     __func__, (uintmax_t)size, error);
3250                 goto fail_2;
3251         }
3252
3253         dma->dma_paddr = 0;
3254         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3255             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3256         if (error || dma->dma_paddr == 0) {
3257                 device_printf(adapter->dev,
3258                     "%s: bus_dmamap_load failed: %d\n",
3259                     __func__, error);
3260                 goto fail_3;
3261         }
3262
3263         return (0);
3264
3265 fail_3:
3266         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3267 fail_2:
3268         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3269         bus_dma_tag_destroy(dma->dma_tag);
3270 fail_0:
3271         dma->dma_map = NULL;
3272         dma->dma_tag = NULL;
3273
3274         return (error);
3275 }
3276
3277 static void
3278 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3279 {
3280         if (dma->dma_tag == NULL)
3281                 return;
3282         if (dma->dma_map != NULL) {
3283                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3284                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3285                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3286                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3287                 dma->dma_map = NULL;
3288         }
3289         bus_dma_tag_destroy(dma->dma_tag);
3290         dma->dma_tag = NULL;
3291 }
3292
3293
3294 /*********************************************************************
3295  *
3296  *  Allocate memory for the transmit and receive rings, and then
3297  *  the descriptors associated with each, called only once at attach.
3298  *
3299  **********************************************************************/
3300 static int
3301 igb_allocate_queues(struct adapter *adapter)
3302 {
3303         device_t dev = adapter->dev;
3304         struct igb_queue        *que = NULL;
3305         struct tx_ring          *txr = NULL;
3306         struct rx_ring          *rxr = NULL;
3307         int rsize, tsize, error = E1000_SUCCESS;
3308         int txconf = 0, rxconf = 0;
3309
3310         /* First allocate the top level queue structs */
3311         if (!(adapter->queues =
3312             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3313             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3314                 device_printf(dev, "Unable to allocate queue memory\n");
3315                 error = ENOMEM;
3316                 goto fail;
3317         }
3318
3319         /* Next allocate the TX ring struct memory */
3320         if (!(adapter->tx_rings =
3321             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3322             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3323                 device_printf(dev, "Unable to allocate TX ring memory\n");
3324                 error = ENOMEM;
3325                 goto tx_fail;
3326         }
3327
3328         /* Now allocate the RX */
3329         if (!(adapter->rx_rings =
3330             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3331             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3332                 device_printf(dev, "Unable to allocate RX ring memory\n");
3333                 error = ENOMEM;
3334                 goto rx_fail;
3335         }
3336
3337         tsize = roundup2(adapter->num_tx_desc *
3338             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3339         /*
3340          * Now set up the TX queues, txconf is needed to handle the
3341          * possibility that things fail midcourse and we need to
3342          * undo memory gracefully
3343          */ 
3344         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3345                 /* Set up some basics */
3346                 txr = &adapter->tx_rings[i];
3347                 txr->adapter = adapter;
3348                 txr->me = i;
3349
3350                 /* Initialize the TX lock */
3351                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3352                     device_get_nameunit(dev), txr->me);
3353                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3354
3355                 if (igb_dma_malloc(adapter, tsize,
3356                         &txr->txdma, BUS_DMA_NOWAIT)) {
3357                         device_printf(dev,
3358                             "Unable to allocate TX Descriptor memory\n");
3359                         error = ENOMEM;
3360                         goto err_tx_desc;
3361                 }
3362                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3363                 bzero((void *)txr->tx_base, tsize);
3364
3365                 /* Now allocate transmit buffers for the ring */
3366                 if (igb_allocate_transmit_buffers(txr)) {
3367                         device_printf(dev,
3368                             "Critical Failure setting up transmit buffers\n");
3369                         error = ENOMEM;
3370                         goto err_tx_desc;
3371                 }
3372 #ifndef IGB_LEGACY_TX
3373                 /* Allocate a buf ring */
3374                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3375                     M_WAITOK, &txr->tx_mtx);
3376 #endif
3377         }
3378
3379         /*
3380          * Next the RX queues...
3381          */ 
3382         rsize = roundup2(adapter->num_rx_desc *
3383             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3384         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3385                 rxr = &adapter->rx_rings[i];
3386                 rxr->adapter = adapter;
3387                 rxr->me = i;
3388
3389                 /* Initialize the RX lock */
3390                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3391                     device_get_nameunit(dev), txr->me);
3392                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3393
3394                 if (igb_dma_malloc(adapter, rsize,
3395                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3396                         device_printf(dev,
3397                             "Unable to allocate RxDescriptor memory\n");
3398                         error = ENOMEM;
3399                         goto err_rx_desc;
3400                 }
3401                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3402                 bzero((void *)rxr->rx_base, rsize);
3403
3404                 /* Allocate receive buffers for the ring*/
3405                 if (igb_allocate_receive_buffers(rxr)) {
3406                         device_printf(dev,
3407                             "Critical Failure setting up receive buffers\n");
3408                         error = ENOMEM;
3409                         goto err_rx_desc;
3410                 }
3411         }
3412
3413         /*
3414         ** Finally set up the queue holding structs
3415         */
3416         for (int i = 0; i < adapter->num_queues; i++) {
3417                 que = &adapter->queues[i];
3418                 que->adapter = adapter;
3419                 que->txr = &adapter->tx_rings[i];
3420                 que->rxr = &adapter->rx_rings[i];
3421         }
3422
3423         return (0);
3424
3425 err_rx_desc:
3426         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3427                 igb_dma_free(adapter, &rxr->rxdma);
3428 err_tx_desc:
3429         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3430                 igb_dma_free(adapter, &txr->txdma);
3431         free(adapter->rx_rings, M_DEVBUF);
3432 rx_fail:
3433 #ifndef IGB_LEGACY_TX
3434         buf_ring_free(txr->br, M_DEVBUF);
3435 #endif
3436         free(adapter->tx_rings, M_DEVBUF);
3437 tx_fail:
3438         free(adapter->queues, M_DEVBUF);
3439 fail:
3440         return (error);
3441 }
3442
3443 /*********************************************************************
3444  *
3445  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3446  *  the information needed to transmit a packet on the wire. This is
3447  *  called only once at attach, setup is done every reset.
3448  *
3449  **********************************************************************/
3450 static int
3451 igb_allocate_transmit_buffers(struct tx_ring *txr)
3452 {
3453         struct adapter *adapter = txr->adapter;
3454         device_t dev = adapter->dev;
3455         struct igb_tx_buffer *txbuf;
3456         int error, i;
3457
3458         /*
3459          * Setup DMA descriptor areas.
3460          */
3461         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3462                                1, 0,                    /* alignment, bounds */
3463                                BUS_SPACE_MAXADDR,       /* lowaddr */
3464                                BUS_SPACE_MAXADDR,       /* highaddr */
3465                                NULL, NULL,              /* filter, filterarg */
3466                                IGB_TSO_SIZE,            /* maxsize */
3467                                IGB_MAX_SCATTER,         /* nsegments */
3468                                PAGE_SIZE,               /* maxsegsize */
3469                                0,                       /* flags */
3470                                NULL,                    /* lockfunc */
3471                                NULL,                    /* lockfuncarg */
3472                                &txr->txtag))) {
3473                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3474                 goto fail;
3475         }
3476
3477         if (!(txr->tx_buffers =
3478             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3479             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3480                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3481                 error = ENOMEM;
3482                 goto fail;
3483         }
3484
3485         /* Create the descriptor buffer dma maps */
3486         txbuf = txr->tx_buffers;
3487         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3488                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3489                 if (error != 0) {
3490                         device_printf(dev, "Unable to create TX DMA map\n");
3491                         goto fail;
3492                 }
3493         }
3494
3495         return 0;
3496 fail:
3497         /* We free all, it handles case where we are in the middle */
3498         igb_free_transmit_structures(adapter);
3499         return (error);
3500 }
3501
3502 /*********************************************************************
3503  *
3504  *  Initialize a transmit ring.
3505  *
3506  **********************************************************************/
3507 static void
3508 igb_setup_transmit_ring(struct tx_ring *txr)
3509 {
3510         struct adapter *adapter = txr->adapter;
3511         struct igb_tx_buffer *txbuf;
3512         int i;
3513 #ifdef DEV_NETMAP
3514         struct netmap_adapter *na = NA(adapter->ifp);
3515         struct netmap_slot *slot;
3516 #endif /* DEV_NETMAP */
3517
3518         /* Clear the old descriptor contents */
3519         IGB_TX_LOCK(txr);
3520 #ifdef DEV_NETMAP
3521         slot = netmap_reset(na, NR_TX, txr->me, 0);
3522 #endif /* DEV_NETMAP */
3523         bzero((void *)txr->tx_base,
3524               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3525         /* Reset indices */
3526         txr->next_avail_desc = 0;
3527         txr->next_to_clean = 0;
3528
3529         /* Free any existing tx buffers. */
3530         txbuf = txr->tx_buffers;
3531         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3532                 if (txbuf->m_head != NULL) {
3533                         bus_dmamap_sync(txr->txtag, txbuf->map,
3534                             BUS_DMASYNC_POSTWRITE);
3535                         bus_dmamap_unload(txr->txtag, txbuf->map);
3536                         m_freem(txbuf->m_head);
3537                         txbuf->m_head = NULL;
3538                 }
3539 #ifdef DEV_NETMAP
3540                 if (slot) {
3541                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3542                         /* no need to set the address */
3543                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3544                 }
3545 #endif /* DEV_NETMAP */
3546                 /* clear the watch index */
3547                 txbuf->next_eop = -1;
3548         }
3549
3550         /* Set number of descriptors available */
3551         txr->tx_avail = adapter->num_tx_desc;
3552
3553         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3554             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3555         IGB_TX_UNLOCK(txr);
3556 }
3557
3558 /*********************************************************************
3559  *
3560  *  Initialize all transmit rings.
3561  *
3562  **********************************************************************/
3563 static void
3564 igb_setup_transmit_structures(struct adapter *adapter)
3565 {
3566         struct tx_ring *txr = adapter->tx_rings;
3567
3568         for (int i = 0; i < adapter->num_queues; i++, txr++)
3569                 igb_setup_transmit_ring(txr);
3570
3571         return;
3572 }
3573
3574 /*********************************************************************
3575  *
3576  *  Enable transmit unit.
3577  *
3578  **********************************************************************/
3579 static void
3580 igb_initialize_transmit_units(struct adapter *adapter)
3581 {
3582         struct tx_ring  *txr = adapter->tx_rings;
3583         struct e1000_hw *hw = &adapter->hw;
3584         u32             tctl, txdctl;
3585
3586         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3587         tctl = txdctl = 0;
3588
3589         /* Setup the Tx Descriptor Rings */
3590         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3591                 u64 bus_addr = txr->txdma.dma_paddr;
3592
3593                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3594                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3595                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3596                     (uint32_t)(bus_addr >> 32));
3597                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3598                     (uint32_t)bus_addr);
3599
3600                 /* Setup the HW Tx Head and Tail descriptor pointers */
3601                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3602                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3603
3604                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3605                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3606                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3607
3608                 txr->queue_status = IGB_QUEUE_IDLE;
3609
3610                 txdctl |= IGB_TX_PTHRESH;
3611                 txdctl |= IGB_TX_HTHRESH << 8;
3612                 txdctl |= IGB_TX_WTHRESH << 16;
3613                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3614                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3615         }
3616
3617         if (adapter->vf_ifp)
3618                 return;
3619
3620         e1000_config_collision_dist(hw);
3621
3622         /* Program the Transmit Control Register */
3623         tctl = E1000_READ_REG(hw, E1000_TCTL);
3624         tctl &= ~E1000_TCTL_CT;
3625         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3626                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3627
3628         /* This write will effectively turn on the transmit unit. */
3629         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3630 }
3631
3632 /*********************************************************************
3633  *
3634  *  Free all transmit rings.
3635  *
3636  **********************************************************************/
3637 static void
3638 igb_free_transmit_structures(struct adapter *adapter)
3639 {
3640         struct tx_ring *txr = adapter->tx_rings;
3641
3642         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3643                 IGB_TX_LOCK(txr);
3644                 igb_free_transmit_buffers(txr);
3645                 igb_dma_free(adapter, &txr->txdma);
3646                 IGB_TX_UNLOCK(txr);
3647                 IGB_TX_LOCK_DESTROY(txr);
3648         }
3649         free(adapter->tx_rings, M_DEVBUF);
3650 }
3651
3652 /*********************************************************************
3653  *
3654  *  Free transmit ring related data structures.
3655  *
3656  **********************************************************************/
3657 static void
3658 igb_free_transmit_buffers(struct tx_ring *txr)
3659 {
3660         struct adapter *adapter = txr->adapter;
3661         struct igb_tx_buffer *tx_buffer;
3662         int             i;
3663
3664         INIT_DEBUGOUT("free_transmit_ring: begin");
3665
3666         if (txr->tx_buffers == NULL)
3667                 return;
3668
3669         tx_buffer = txr->tx_buffers;
3670         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3671                 if (tx_buffer->m_head != NULL) {
3672                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3673                             BUS_DMASYNC_POSTWRITE);
3674                         bus_dmamap_unload(txr->txtag,
3675                             tx_buffer->map);
3676                         m_freem(tx_buffer->m_head);
3677                         tx_buffer->m_head = NULL;
3678                         if (tx_buffer->map != NULL) {
3679                                 bus_dmamap_destroy(txr->txtag,
3680                                     tx_buffer->map);
3681                                 tx_buffer->map = NULL;
3682                         }
3683                 } else if (tx_buffer->map != NULL) {
3684                         bus_dmamap_unload(txr->txtag,
3685                             tx_buffer->map);
3686                         bus_dmamap_destroy(txr->txtag,
3687                             tx_buffer->map);
3688                         tx_buffer->map = NULL;
3689                 }
3690         }
3691 #ifndef IGB_LEGACY_TX
3692         if (txr->br != NULL)
3693                 buf_ring_free(txr->br, M_DEVBUF);
3694 #endif
3695         if (txr->tx_buffers != NULL) {
3696                 free(txr->tx_buffers, M_DEVBUF);
3697                 txr->tx_buffers = NULL;
3698         }
3699         if (txr->txtag != NULL) {
3700                 bus_dma_tag_destroy(txr->txtag);
3701                 txr->txtag = NULL;
3702         }
3703         return;
3704 }
3705
3706 /**********************************************************************
3707  *
3708  *  Setup work for hardware segmentation offload (TSO)
3709  *
3710  **********************************************************************/
3711 static bool
3712 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3713         struct ip *ip, struct tcphdr *th)
3714 {
3715         struct adapter *adapter = txr->adapter;
3716         struct e1000_adv_tx_context_desc *TXD;
3717         struct igb_tx_buffer        *tx_buffer;
3718         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3719         u32 mss_l4len_idx = 0;
3720         u16 vtag = 0;
3721         int ctxd, ip_hlen, tcp_hlen;
3722
3723         ctxd = txr->next_avail_desc;
3724         tx_buffer = &txr->tx_buffers[ctxd];
3725         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3726
3727         ip->ip_sum = 0;
3728         ip_hlen = ip->ip_hl << 2;
3729         tcp_hlen = th->th_off << 2;
3730
3731         /* VLAN MACLEN IPLEN */
3732         if (mp->m_flags & M_VLANTAG) {
3733                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3734                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3735         }
3736
3737         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3738         vlan_macip_lens |= ip_hlen;
3739         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3740
3741         /* ADV DTYPE TUCMD */
3742         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3743         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3744         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3745         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3746
3747         /* MSS L4LEN IDX */
3748         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3749         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3750         /* 82575 needs the queue index added */
3751         if (adapter->hw.mac.type == e1000_82575)
3752                 mss_l4len_idx |= txr->me << 4;
3753         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3754
3755         TXD->seqnum_seed = htole32(0);
3756         tx_buffer->m_head = NULL;
3757         tx_buffer->next_eop = -1;
3758
3759         if (++ctxd == adapter->num_tx_desc)
3760                 ctxd = 0;
3761
3762         txr->tx_avail--;
3763         txr->next_avail_desc = ctxd;
3764         return TRUE;
3765 }
3766
3767
3768 /*********************************************************************
3769  *
3770  *  Context Descriptor setup for VLAN or CSUM
3771  *
3772  **********************************************************************/
3773
3774 static bool
3775 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3776 {
3777         struct adapter *adapter = txr->adapter;
3778         struct e1000_adv_tx_context_desc *TXD;
3779         struct igb_tx_buffer        *tx_buffer;
3780         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3781         struct ether_vlan_header *eh;
3782         struct ip *ip = NULL;
3783         struct ip6_hdr *ip6;
3784         int  ehdrlen, ctxd, ip_hlen = 0;
3785         u16     etype, vtag = 0;
3786         u8      ipproto = 0;
3787         bool    offload = TRUE;
3788
3789         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3790                 offload = FALSE;
3791
3792         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3793         ctxd = txr->next_avail_desc;
3794         tx_buffer = &txr->tx_buffers[ctxd];
3795         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3796
3797         /*
3798         ** In advanced descriptors the vlan tag must 
3799         ** be placed into the context descriptor, thus
3800         ** we need to be here just for that setup.
3801         */
3802         if (mp->m_flags & M_VLANTAG) {
3803                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3804                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3805         } else if (offload == FALSE)
3806                 return FALSE;
3807
3808         /*
3809          * Determine where frame payload starts.
3810          * Jump over vlan headers if already present,
3811          * helpful for QinQ too.
3812          */
3813         eh = mtod(mp, struct ether_vlan_header *);
3814         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3815                 etype = ntohs(eh->evl_proto);
3816                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3817         } else {
3818                 etype = ntohs(eh->evl_encap_proto);
3819                 ehdrlen = ETHER_HDR_LEN;
3820         }
3821
3822         /* Set the ether header length */
3823         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3824
3825         switch (etype) {
3826                 case ETHERTYPE_IP:
3827                         ip = (struct ip *)(mp->m_data + ehdrlen);
3828                         ip_hlen = ip->ip_hl << 2;
3829                         if (mp->m_len < ehdrlen + ip_hlen) {
3830                                 offload = FALSE;
3831                                 break;
3832                         }
3833                         ipproto = ip->ip_p;
3834                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3835                         break;
3836                 case ETHERTYPE_IPV6:
3837                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3838                         ip_hlen = sizeof(struct ip6_hdr);
3839                         ipproto = ip6->ip6_nxt;
3840                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3841                         break;
3842                 default:
3843                         offload = FALSE;
3844                         break;
3845         }
3846
3847         vlan_macip_lens |= ip_hlen;
3848         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3849
3850         switch (ipproto) {
3851                 case IPPROTO_TCP:
3852                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3853                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3854                         break;
3855                 case IPPROTO_UDP:
3856                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3857                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3858                         break;
3859 #if __FreeBSD_version >= 800000
3860                 case IPPROTO_SCTP:
3861                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3862                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3863                         break;
3864 #endif
3865                 default:
3866                         offload = FALSE;
3867                         break;
3868         }
3869
3870         /* 82575 needs the queue index added */
3871         if (adapter->hw.mac.type == e1000_82575)
3872                 mss_l4len_idx = txr->me << 4;
3873
3874         /* Now copy bits into descriptor */
3875         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3876         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3877         TXD->seqnum_seed = htole32(0);
3878         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3879
3880         tx_buffer->m_head = NULL;
3881         tx_buffer->next_eop = -1;
3882
3883         /* We've consumed the first desc, adjust counters */
3884         if (++ctxd == adapter->num_tx_desc)
3885                 ctxd = 0;
3886         txr->next_avail_desc = ctxd;
3887         --txr->tx_avail;
3888
3889         return (offload);
3890 }
3891
3892
3893 /**********************************************************************
3894  *
3895  *  Examine each tx_buffer in the used queue. If the hardware is done
3896  *  processing the packet then free associated resources. The
3897  *  tx_buffer is put back on the free queue.
3898  *
3899  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3900  **********************************************************************/
3901 static bool
3902 igb_txeof(struct tx_ring *txr)
3903 {
3904         struct adapter  *adapter = txr->adapter;
3905         int first, last, done, processed;
3906         struct igb_tx_buffer *tx_buffer;
3907         struct e1000_tx_desc   *tx_desc, *eop_desc;
3908         struct ifnet   *ifp = adapter->ifp;
3909
3910         IGB_TX_LOCK_ASSERT(txr);
3911
3912 #ifdef DEV_NETMAP
3913         if (netmap_tx_irq(ifp, txr->me |
3914             (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
3915                 return (FALSE);
3916 #endif /* DEV_NETMAP */
3917         if (txr->tx_avail == adapter->num_tx_desc) {
3918                 txr->queue_status = IGB_QUEUE_IDLE;
3919                 return FALSE;
3920         }
3921
3922         processed = 0;
3923         first = txr->next_to_clean;
3924         tx_desc = &txr->tx_base[first];
3925         tx_buffer = &txr->tx_buffers[first];
3926         last = tx_buffer->next_eop;
3927         eop_desc = &txr->tx_base[last];
3928
3929         /*
3930          * What this does is get the index of the
3931          * first descriptor AFTER the EOP of the 
3932          * first packet, that way we can do the
3933          * simple comparison on the inner while loop.
3934          */
3935         if (++last == adapter->num_tx_desc)
3936                 last = 0;
3937         done = last;
3938
3939         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3940             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3941
3942         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3943                 /* We clean the range of the packet */
3944                 while (first != done) {
3945                         tx_desc->upper.data = 0;
3946                         tx_desc->lower.data = 0;
3947                         tx_desc->buffer_addr = 0;
3948                         ++txr->tx_avail;
3949                         ++processed;
3950
3951                         if (tx_buffer->m_head) {
3952                                 txr->bytes +=
3953                                     tx_buffer->m_head->m_pkthdr.len;
3954                                 bus_dmamap_sync(txr->txtag,
3955                                     tx_buffer->map,
3956                                     BUS_DMASYNC_POSTWRITE);
3957                                 bus_dmamap_unload(txr->txtag,
3958                                     tx_buffer->map);
3959
3960                                 m_freem(tx_buffer->m_head);
3961                                 tx_buffer->m_head = NULL;
3962                         }
3963                         tx_buffer->next_eop = -1;
3964                         txr->watchdog_time = ticks;
3965
3966                         if (++first == adapter->num_tx_desc)
3967                                 first = 0;
3968
3969                         tx_buffer = &txr->tx_buffers[first];
3970                         tx_desc = &txr->tx_base[first];
3971                 }
3972                 ++txr->packets;
3973                 ++ifp->if_opackets;
3974                 /* See if we can continue to the next packet */
3975                 last = tx_buffer->next_eop;
3976                 if (last != -1) {
3977                         eop_desc = &txr->tx_base[last];
3978                         /* Get new done point */
3979                         if (++last == adapter->num_tx_desc) last = 0;
3980                         done = last;
3981                 } else
3982                         break;
3983         }
3984         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3985             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3986
3987         txr->next_to_clean = first;
3988
3989         /*
3990         ** Watchdog calculation, we know there's
3991         ** work outstanding or the first return
3992         ** would have been taken, so none processed
3993         ** for too long indicates a hang.
3994         */
3995         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3996                 txr->queue_status |= IGB_QUEUE_HUNG;
3997         /*
3998          * If we have a minimum free,
3999          * clear depleted state bit
4000          */
4001         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)          
4002                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4003
4004         /* All clean, turn off the watchdog */
4005         if (txr->tx_avail == adapter->num_tx_desc) {
4006                 txr->queue_status = IGB_QUEUE_IDLE;
4007                 return (FALSE);
4008         }
4009
4010         return (TRUE);
4011 }
4012
4013 /*********************************************************************
4014  *
4015  *  Refresh mbuf buffers for RX descriptor rings
4016  *   - now keeps its own state so discards due to resource
4017  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4018  *     it just returns, keeping its placeholder, thus it can simply
4019  *     be recalled to try again.
4020  *
4021  **********************************************************************/
4022 static void
4023 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4024 {
4025         struct adapter          *adapter = rxr->adapter;
4026         bus_dma_segment_t       hseg[1];
4027         bus_dma_segment_t       pseg[1];
4028         struct igb_rx_buf       *rxbuf;
4029         struct mbuf             *mh, *mp;
4030         int                     i, j, nsegs, error;
4031         bool                    refreshed = FALSE;
4032
4033         i = j = rxr->next_to_refresh;
4034         /*
4035         ** Get one descriptor beyond
4036         ** our work mark to control
4037         ** the loop.
4038         */
4039         if (++j == adapter->num_rx_desc)
4040                 j = 0;
4041
4042         while (j != limit) {
4043                 rxbuf = &rxr->rx_buffers[i];
4044                 /* No hdr mbuf used with header split off */
4045                 if (rxr->hdr_split == FALSE)
4046                         goto no_split;
4047                 if (rxbuf->m_head == NULL) {
4048                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4049                         if (mh == NULL)
4050                                 goto update;
4051                 } else
4052                         mh = rxbuf->m_head;
4053
4054                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4055                 mh->m_len = MHLEN;
4056                 mh->m_flags |= M_PKTHDR;
4057                 /* Get the memory mapping */
4058                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4059                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4060                 if (error != 0) {
4061                         printf("Refresh mbufs: hdr dmamap load"
4062                             " failure - %d\n", error);
4063                         m_free(mh);
4064                         rxbuf->m_head = NULL;
4065                         goto update;
4066                 }
4067                 rxbuf->m_head = mh;
4068                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4069                     BUS_DMASYNC_PREREAD);
4070                 rxr->rx_base[i].read.hdr_addr =
4071                     htole64(hseg[0].ds_addr);
4072 no_split:
4073                 if (rxbuf->m_pack == NULL) {
4074                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4075                             M_PKTHDR, adapter->rx_mbuf_sz);
4076                         if (mp == NULL)
4077                                 goto update;
4078                 } else
4079                         mp = rxbuf->m_pack;
4080
4081                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4082                 /* Get the memory mapping */
4083                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4084                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4085                 if (error != 0) {
4086                         printf("Refresh mbufs: payload dmamap load"
4087                             " failure - %d\n", error);
4088                         m_free(mp);
4089                         rxbuf->m_pack = NULL;
4090                         goto update;
4091                 }
4092                 rxbuf->m_pack = mp;
4093                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4094                     BUS_DMASYNC_PREREAD);
4095                 rxr->rx_base[i].read.pkt_addr =
4096                     htole64(pseg[0].ds_addr);
4097                 refreshed = TRUE; /* I feel wefreshed :) */
4098
4099                 i = j; /* our next is precalculated */
4100                 rxr->next_to_refresh = i;
4101                 if (++j == adapter->num_rx_desc)
4102                         j = 0;
4103         }
4104 update:
4105         if (refreshed) /* update tail */
4106                 E1000_WRITE_REG(&adapter->hw,
4107                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4108         return;
4109 }
4110
4111
4112 /*********************************************************************
4113  *
4114  *  Allocate memory for rx_buffer structures. Since we use one
4115  *  rx_buffer per received packet, the maximum number of rx_buffer's
4116  *  that we'll need is equal to the number of receive descriptors
4117  *  that we've allocated.
4118  *
4119  **********************************************************************/
4120 static int
4121 igb_allocate_receive_buffers(struct rx_ring *rxr)
4122 {
4123         struct  adapter         *adapter = rxr->adapter;
4124         device_t                dev = adapter->dev;
4125         struct igb_rx_buf       *rxbuf;
4126         int                     i, bsize, error;
4127
4128         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4129         if (!(rxr->rx_buffers =
4130             (struct igb_rx_buf *) malloc(bsize,
4131             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4132                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4133                 error = ENOMEM;
4134                 goto fail;
4135         }
4136
4137         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4138                                    1, 0,                /* alignment, bounds */
4139                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4140                                    BUS_SPACE_MAXADDR,   /* highaddr */
4141                                    NULL, NULL,          /* filter, filterarg */
4142                                    MSIZE,               /* maxsize */
4143                                    1,                   /* nsegments */
4144                                    MSIZE,               /* maxsegsize */
4145                                    0,                   /* flags */
4146                                    NULL,                /* lockfunc */
4147                                    NULL,                /* lockfuncarg */
4148                                    &rxr->htag))) {
4149                 device_printf(dev, "Unable to create RX DMA tag\n");
4150                 goto fail;
4151         }
4152
4153         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4154                                    1, 0,                /* alignment, bounds */
4155                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4156                                    BUS_SPACE_MAXADDR,   /* highaddr */
4157                                    NULL, NULL,          /* filter, filterarg */
4158                                    MJUM9BYTES,          /* maxsize */
4159                                    1,                   /* nsegments */
4160                                    MJUM9BYTES,          /* maxsegsize */
4161                                    0,                   /* flags */
4162                                    NULL,                /* lockfunc */
4163                                    NULL,                /* lockfuncarg */
4164                                    &rxr->ptag))) {
4165                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4166                 goto fail;
4167         }
4168
4169         for (i = 0; i < adapter->num_rx_desc; i++) {
4170                 rxbuf = &rxr->rx_buffers[i];
4171                 error = bus_dmamap_create(rxr->htag,
4172                     BUS_DMA_NOWAIT, &rxbuf->hmap);
4173                 if (error) {
4174                         device_printf(dev,
4175                             "Unable to create RX head DMA maps\n");
4176                         goto fail;
4177                 }
4178                 error = bus_dmamap_create(rxr->ptag,
4179                     BUS_DMA_NOWAIT, &rxbuf->pmap);
4180                 if (error) {
4181                         device_printf(dev,
4182                             "Unable to create RX packet DMA maps\n");
4183                         goto fail;
4184                 }
4185         }
4186
4187         return (0);
4188
4189 fail:
4190         /* Frees all, but can handle partial completion */
4191         igb_free_receive_structures(adapter);
4192         return (error);
4193 }
4194
4195
4196 static void
4197 igb_free_receive_ring(struct rx_ring *rxr)
4198 {
4199         struct  adapter         *adapter = rxr->adapter;
4200         struct igb_rx_buf       *rxbuf;
4201
4202
4203         for (int i = 0; i < adapter->num_rx_desc; i++) {
4204                 rxbuf = &rxr->rx_buffers[i];
4205                 if (rxbuf->m_head != NULL) {
4206                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4207                             BUS_DMASYNC_POSTREAD);
4208                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4209                         rxbuf->m_head->m_flags |= M_PKTHDR;
4210                         m_freem(rxbuf->m_head);
4211                 }
4212                 if (rxbuf->m_pack != NULL) {
4213                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4214                             BUS_DMASYNC_POSTREAD);
4215                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4216                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4217                         m_freem(rxbuf->m_pack);
4218                 }
4219                 rxbuf->m_head = NULL;
4220                 rxbuf->m_pack = NULL;
4221         }
4222 }
4223
4224
4225 /*********************************************************************
4226  *
4227  *  Initialize a receive ring and its buffers.
4228  *
4229  **********************************************************************/
4230 static int
4231 igb_setup_receive_ring(struct rx_ring *rxr)
4232 {
4233         struct  adapter         *adapter;
4234         struct  ifnet           *ifp;
4235         device_t                dev;
4236         struct igb_rx_buf       *rxbuf;
4237         bus_dma_segment_t       pseg[1], hseg[1];
4238         struct lro_ctrl         *lro = &rxr->lro;
4239         int                     rsize, nsegs, error = 0;
4240 #ifdef DEV_NETMAP
4241         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4242         struct netmap_slot *slot;
4243 #endif /* DEV_NETMAP */
4244
4245         adapter = rxr->adapter;
4246         dev = adapter->dev;
4247         ifp = adapter->ifp;
4248
4249         /* Clear the ring contents */
4250         IGB_RX_LOCK(rxr);
4251 #ifdef DEV_NETMAP
4252         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4253 #endif /* DEV_NETMAP */
4254         rsize = roundup2(adapter->num_rx_desc *
4255             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4256         bzero((void *)rxr->rx_base, rsize);
4257
4258         /*
4259         ** Free current RX buffer structures and their mbufs
4260         */
4261         igb_free_receive_ring(rxr);
4262
4263         /* Configure for header split? */
4264         if (igb_header_split)
4265                 rxr->hdr_split = TRUE;
4266
4267         /* Now replenish the ring mbufs */
4268         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4269                 struct mbuf     *mh, *mp;
4270
4271                 rxbuf = &rxr->rx_buffers[j];
4272 #ifdef DEV_NETMAP
4273                 if (slot) {
4274                         /* slot sj is mapped to the i-th NIC-ring entry */
4275                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4276                         uint64_t paddr;
4277                         void *addr;
4278
4279                         addr = PNMB(slot + sj, &paddr);
4280                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4281                         /* Update descriptor */
4282                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4283                         continue;
4284                 }
4285 #endif /* DEV_NETMAP */
4286                 if (rxr->hdr_split == FALSE)
4287                         goto skip_head;
4288
4289                 /* First the header */
4290                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4291                 if (rxbuf->m_head == NULL) {
4292                         error = ENOBUFS;
4293                         goto fail;
4294                 }
4295                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4296                 mh = rxbuf->m_head;
4297                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4298                 mh->m_flags |= M_PKTHDR;
4299                 /* Get the memory mapping */
4300                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4301                     rxbuf->hmap, rxbuf->m_head, hseg,
4302                     &nsegs, BUS_DMA_NOWAIT);
4303                 if (error != 0) /* Nothing elegant to do here */
4304                         goto fail;
4305                 bus_dmamap_sync(rxr->htag,
4306                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4307                 /* Update descriptor */
4308                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4309
4310 skip_head:
4311                 /* Now the payload cluster */
4312                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4313                     M_PKTHDR, adapter->rx_mbuf_sz);
4314                 if (rxbuf->m_pack == NULL) {
4315                         error = ENOBUFS;
4316                         goto fail;
4317                 }
4318                 mp = rxbuf->m_pack;
4319                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4320                 /* Get the memory mapping */
4321                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4322                     rxbuf->pmap, mp, pseg,
4323                     &nsegs, BUS_DMA_NOWAIT);
4324                 if (error != 0)
4325                         goto fail;
4326                 bus_dmamap_sync(rxr->ptag,
4327                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4328                 /* Update descriptor */
4329                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4330         }
4331
4332         /* Setup our descriptor indices */
4333         rxr->next_to_check = 0;
4334         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4335         rxr->lro_enabled = FALSE;
4336         rxr->rx_split_packets = 0;
4337         rxr->rx_bytes = 0;
4338
4339         rxr->fmp = NULL;
4340         rxr->lmp = NULL;
4341         rxr->discard = FALSE;
4342
4343         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4344             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4345
4346         /*
4347         ** Now set up the LRO interface, we
4348         ** also only do head split when LRO
4349         ** is enabled, since so often they
4350         ** are undesireable in similar setups.
4351         */
4352         if (ifp->if_capenable & IFCAP_LRO) {
4353                 error = tcp_lro_init(lro);
4354                 if (error) {
4355                         device_printf(dev, "LRO Initialization failed!\n");
4356                         goto fail;
4357                 }
4358                 INIT_DEBUGOUT("RX LRO Initialized\n");
4359                 rxr->lro_enabled = TRUE;
4360                 lro->ifp = adapter->ifp;
4361         }
4362
4363         IGB_RX_UNLOCK(rxr);
4364         return (0);
4365
4366 fail:
4367         igb_free_receive_ring(rxr);
4368         IGB_RX_UNLOCK(rxr);
4369         return (error);
4370 }
4371
4372
4373 /*********************************************************************
4374  *
4375  *  Initialize all receive rings.
4376  *
4377  **********************************************************************/
4378 static int
4379 igb_setup_receive_structures(struct adapter *adapter)
4380 {
4381         struct rx_ring *rxr = adapter->rx_rings;
4382         int i;
4383
4384         for (i = 0; i < adapter->num_queues; i++, rxr++)
4385                 if (igb_setup_receive_ring(rxr))
4386                         goto fail;
4387
4388         return (0);
4389 fail:
4390         /*
4391          * Free RX buffers allocated so far, we will only handle
4392          * the rings that completed, the failing case will have
4393          * cleaned up for itself. 'i' is the endpoint.
4394          */
4395         for (int j = 0; j < i; ++j) {
4396                 rxr = &adapter->rx_rings[j];
4397                 IGB_RX_LOCK(rxr);
4398                 igb_free_receive_ring(rxr);
4399                 IGB_RX_UNLOCK(rxr);
4400         }
4401
4402         return (ENOBUFS);
4403 }
4404
4405 /*********************************************************************
4406  *
4407  *  Enable receive unit.
4408  *
4409  **********************************************************************/
4410 static void
4411 igb_initialize_receive_units(struct adapter *adapter)
4412 {
4413         struct rx_ring  *rxr = adapter->rx_rings;
4414         struct ifnet    *ifp = adapter->ifp;
4415         struct e1000_hw *hw = &adapter->hw;
4416         u32             rctl, rxcsum, psize, srrctl = 0;
4417
4418         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4419
4420         /*
4421          * Make sure receives are disabled while setting
4422          * up the descriptor ring
4423          */
4424         rctl = E1000_READ_REG(hw, E1000_RCTL);
4425         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4426
4427         /*
4428         ** Set up for header split
4429         */
4430         if (igb_header_split) {
4431                 /* Use a standard mbuf for the header */
4432                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4433                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4434         } else
4435                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4436
4437         /*
4438         ** Set up for jumbo frames
4439         */
4440         if (ifp->if_mtu > ETHERMTU) {
4441                 rctl |= E1000_RCTL_LPE;
4442                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4443                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4444                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4445                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4446                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4447                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4448                 }
4449                 /* Set maximum packet len */
4450                 psize = adapter->max_frame_size;
4451                 /* are we on a vlan? */
4452                 if (adapter->ifp->if_vlantrunk != NULL)
4453                         psize += VLAN_TAG_SIZE;
4454                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4455         } else {
4456                 rctl &= ~E1000_RCTL_LPE;
4457                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4458                 rctl |= E1000_RCTL_SZ_2048;
4459         }
4460
4461         /* Setup the Base and Length of the Rx Descriptor Rings */
4462         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4463                 u64 bus_addr = rxr->rxdma.dma_paddr;
4464                 u32 rxdctl;
4465
4466                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4467                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4468                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4469                     (uint32_t)(bus_addr >> 32));
4470                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4471                     (uint32_t)bus_addr);
4472                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4473                 /* Enable this Queue */
4474                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4475                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4476                 rxdctl &= 0xFFF00000;
4477                 rxdctl |= IGB_RX_PTHRESH;
4478                 rxdctl |= IGB_RX_HTHRESH << 8;
4479                 rxdctl |= IGB_RX_WTHRESH << 16;
4480                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4481         }
4482
4483         /*
4484         ** Setup for RX MultiQueue
4485         */
4486         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4487         if (adapter->num_queues >1) {
4488                 u32 random[10], mrqc, shift = 0;
4489                 union igb_reta {
4490                         u32 dword;
4491                         u8  bytes[4];
4492                 } reta;
4493
4494                 arc4rand(&random, sizeof(random), 0);
4495                 if (adapter->hw.mac.type == e1000_82575)
4496                         shift = 6;
4497                 /* Warning FM follows */
4498                 for (int i = 0; i < 128; i++) {
4499                         reta.bytes[i & 3] =
4500                             (i % adapter->num_queues) << shift;
4501                         if ((i & 3) == 3)
4502                                 E1000_WRITE_REG(hw,
4503                                     E1000_RETA(i >> 2), reta.dword);
4504                 }
4505                 /* Now fill in hash table */
4506                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4507                 for (int i = 0; i < 10; i++)
4508                         E1000_WRITE_REG_ARRAY(hw,
4509                             E1000_RSSRK(0), i, random[i]);
4510
4511                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4512                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4513                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4514                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4515                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4516                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4517                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4518                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4519
4520                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4521
4522                 /*
4523                 ** NOTE: Receive Full-Packet Checksum Offload 
4524                 ** is mutually exclusive with Multiqueue. However
4525                 ** this is not the same as TCP/IP checksums which
4526                 ** still work.
4527                 */
4528                 rxcsum |= E1000_RXCSUM_PCSD;
4529 #if __FreeBSD_version >= 800000
4530                 /* For SCTP Offload */
4531                 if ((hw->mac.type == e1000_82576)
4532                     && (ifp->if_capenable & IFCAP_RXCSUM))
4533                         rxcsum |= E1000_RXCSUM_CRCOFL;
4534 #endif
4535         } else {
4536                 /* Non RSS setup */
4537                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4538                         rxcsum |= E1000_RXCSUM_IPPCSE;
4539 #if __FreeBSD_version >= 800000
4540                         if (adapter->hw.mac.type == e1000_82576)
4541                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4542 #endif
4543                 } else
4544                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4545         }
4546         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4547
4548         /* Setup the Receive Control Register */
4549         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4550         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4551                    E1000_RCTL_RDMTS_HALF |
4552                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4553         /* Strip CRC bytes. */
4554         rctl |= E1000_RCTL_SECRC;
4555         /* Make sure VLAN Filters are off */
4556         rctl &= ~E1000_RCTL_VFE;
4557         /* Don't store bad packets */
4558         rctl &= ~E1000_RCTL_SBP;
4559
4560         /* Enable Receives */
4561         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4562
4563         /*
4564          * Setup the HW Rx Head and Tail Descriptor Pointers
4565          *   - needs to be after enable
4566          */
4567         for (int i = 0; i < adapter->num_queues; i++) {
4568                 rxr = &adapter->rx_rings[i];
4569                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4570 #ifdef DEV_NETMAP
4571                 /*
4572                  * an init() while a netmap client is active must
4573                  * preserve the rx buffers passed to userspace.
4574                  * In this driver it means we adjust RDT to
4575                  * somthing different from next_to_refresh
4576                  * (which is not used in netmap mode).
4577                  */
4578                 if (ifp->if_capenable & IFCAP_NETMAP) {
4579                         struct netmap_adapter *na = NA(adapter->ifp);
4580                         struct netmap_kring *kring = &na->rx_rings[i];
4581                         int t = rxr->next_to_refresh - kring->nr_hwavail;
4582
4583                         if (t >= adapter->num_rx_desc)
4584                                 t -= adapter->num_rx_desc;
4585                         else if (t < 0)
4586                                 t += adapter->num_rx_desc;
4587                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4588                 } else
4589 #endif /* DEV_NETMAP */
4590                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4591         }
4592         return;
4593 }
4594
4595 /*********************************************************************
4596  *
4597  *  Free receive rings.
4598  *
4599  **********************************************************************/
4600 static void
4601 igb_free_receive_structures(struct adapter *adapter)
4602 {
4603         struct rx_ring *rxr = adapter->rx_rings;
4604
4605         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4606                 struct lro_ctrl *lro = &rxr->lro;
4607                 igb_free_receive_buffers(rxr);
4608                 tcp_lro_free(lro);
4609                 igb_dma_free(adapter, &rxr->rxdma);
4610         }
4611
4612         free(adapter->rx_rings, M_DEVBUF);
4613 }
4614
4615 /*********************************************************************
4616  *
4617  *  Free receive ring data structures.
4618  *
4619  **********************************************************************/
4620 static void
4621 igb_free_receive_buffers(struct rx_ring *rxr)
4622 {
4623         struct adapter          *adapter = rxr->adapter;
4624         struct igb_rx_buf       *rxbuf;
4625         int i;
4626
4627         INIT_DEBUGOUT("free_receive_structures: begin");
4628
4629         /* Cleanup any existing buffers */
4630         if (rxr->rx_buffers != NULL) {
4631                 for (i = 0; i < adapter->num_rx_desc; i++) {
4632                         rxbuf = &rxr->rx_buffers[i];
4633                         if (rxbuf->m_head != NULL) {
4634                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4635                                     BUS_DMASYNC_POSTREAD);
4636                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4637                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4638                                 m_freem(rxbuf->m_head);
4639                         }
4640                         if (rxbuf->m_pack != NULL) {
4641                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4642                                     BUS_DMASYNC_POSTREAD);
4643                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4644                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4645                                 m_freem(rxbuf->m_pack);
4646                         }
4647                         rxbuf->m_head = NULL;
4648                         rxbuf->m_pack = NULL;
4649                         if (rxbuf->hmap != NULL) {
4650                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4651                                 rxbuf->hmap = NULL;
4652                         }
4653                         if (rxbuf->pmap != NULL) {
4654                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4655                                 rxbuf->pmap = NULL;
4656                         }
4657                 }
4658                 if (rxr->rx_buffers != NULL) {
4659                         free(rxr->rx_buffers, M_DEVBUF);
4660                         rxr->rx_buffers = NULL;
4661                 }
4662         }
4663
4664         if (rxr->htag != NULL) {
4665                 bus_dma_tag_destroy(rxr->htag);
4666                 rxr->htag = NULL;
4667         }
4668         if (rxr->ptag != NULL) {
4669                 bus_dma_tag_destroy(rxr->ptag);
4670                 rxr->ptag = NULL;
4671         }
4672 }
4673
4674 static __inline void
4675 igb_rx_discard(struct rx_ring *rxr, int i)
4676 {
4677         struct igb_rx_buf       *rbuf;
4678
4679         rbuf = &rxr->rx_buffers[i];
4680
4681         /* Partially received? Free the chain */
4682         if (rxr->fmp != NULL) {
4683                 rxr->fmp->m_flags |= M_PKTHDR;
4684                 m_freem(rxr->fmp);
4685                 rxr->fmp = NULL;
4686                 rxr->lmp = NULL;
4687         }
4688
4689         /*
4690         ** With advanced descriptors the writeback
4691         ** clobbers the buffer addrs, so its easier
4692         ** to just free the existing mbufs and take
4693         ** the normal refresh path to get new buffers
4694         ** and mapping.
4695         */
4696         if (rbuf->m_head) {
4697                 m_free(rbuf->m_head);
4698                 rbuf->m_head = NULL;
4699         }
4700
4701         if (rbuf->m_pack) {
4702                 m_free(rbuf->m_pack);
4703                 rbuf->m_pack = NULL;
4704         }
4705
4706         return;
4707 }
4708
4709 static __inline void
4710 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4711 {
4712
4713         /*
4714          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4715          * should be computed by hardware. Also it should not have VLAN tag in
4716          * ethernet header.
4717          */
4718         if (rxr->lro_enabled &&
4719             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4720             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4721             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4722             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4723             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4724             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4725                 /*
4726                  * Send to the stack if:
4727                  **  - LRO not enabled, or
4728                  **  - no LRO resources, or
4729                  **  - lro enqueue fails
4730                  */
4731                 if (rxr->lro.lro_cnt != 0)
4732                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4733                                 return;
4734         }
4735         IGB_RX_UNLOCK(rxr);
4736         (*ifp->if_input)(ifp, m);
4737         IGB_RX_LOCK(rxr);
4738 }
4739
4740 /*********************************************************************
4741  *
4742  *  This routine executes in interrupt context. It replenishes
4743  *  the mbufs in the descriptor and sends data which has been
4744  *  dma'ed into host memory to upper layer.
4745  *
4746  *  We loop at most count times if count is > 0, or until done if
4747  *  count < 0.
4748  *
4749  *  Return TRUE if more to clean, FALSE otherwise
4750  *********************************************************************/
4751 static bool
4752 igb_rxeof(struct igb_queue *que, int count, int *done)
4753 {
4754         struct adapter          *adapter = que->adapter;
4755         struct rx_ring          *rxr = que->rxr;
4756         struct ifnet            *ifp = adapter->ifp;
4757         struct lro_ctrl         *lro = &rxr->lro;
4758         struct lro_entry        *queued;
4759         int                     i, processed = 0, rxdone = 0;
4760         u32                     ptype, staterr = 0;
4761         union e1000_adv_rx_desc *cur;
4762
4763         IGB_RX_LOCK(rxr);
4764         /* Sync the ring. */
4765         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4766             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4767
4768 #ifdef DEV_NETMAP
4769         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4770                 return (FALSE);
4771 #endif /* DEV_NETMAP */
4772
4773         /* Main clean loop */
4774         for (i = rxr->next_to_check; count != 0;) {
4775                 struct mbuf             *sendmp, *mh, *mp;
4776                 struct igb_rx_buf       *rxbuf;
4777                 u16                     hlen, plen, hdr, vtag;
4778                 bool                    eop = FALSE;
4779  
4780                 cur = &rxr->rx_base[i];
4781                 staterr = le32toh(cur->wb.upper.status_error);
4782                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4783                         break;
4784                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4785                         break;
4786                 count--;
4787                 sendmp = mh = mp = NULL;
4788                 cur->wb.upper.status_error = 0;
4789                 rxbuf = &rxr->rx_buffers[i];
4790                 plen = le16toh(cur->wb.upper.length);
4791                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4792                 if ((adapter->hw.mac.type == e1000_i350) &&
4793                     (staterr & E1000_RXDEXT_STATERR_LB))
4794                         vtag = be16toh(cur->wb.upper.vlan);
4795                 else
4796                         vtag = le16toh(cur->wb.upper.vlan);
4797                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4798                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4799
4800                 /* Make sure all segments of a bad packet are discarded */
4801                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4802                     (rxr->discard)) {
4803                         adapter->dropped_pkts++;
4804                         ++rxr->rx_discarded;
4805                         if (!eop) /* Catch subsequent segs */
4806                                 rxr->discard = TRUE;
4807                         else
4808                                 rxr->discard = FALSE;
4809                         igb_rx_discard(rxr, i);
4810                         goto next_desc;
4811                 }
4812
4813                 /*
4814                 ** The way the hardware is configured to
4815                 ** split, it will ONLY use the header buffer
4816                 ** when header split is enabled, otherwise we
4817                 ** get normal behavior, ie, both header and
4818                 ** payload are DMA'd into the payload buffer.
4819                 **
4820                 ** The fmp test is to catch the case where a
4821                 ** packet spans multiple descriptors, in that
4822                 ** case only the first header is valid.
4823                 */
4824                 if (rxr->hdr_split && rxr->fmp == NULL) {
4825                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4826                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4827                         if (hlen > IGB_HDR_BUF)
4828                                 hlen = IGB_HDR_BUF;
4829                         mh = rxr->rx_buffers[i].m_head;
4830                         mh->m_len = hlen;
4831                         /* clear buf pointer for refresh */
4832                         rxbuf->m_head = NULL;
4833                         /*
4834                         ** Get the payload length, this
4835                         ** could be zero if its a small
4836                         ** packet.
4837                         */
4838                         if (plen > 0) {
4839                                 mp = rxr->rx_buffers[i].m_pack;
4840                                 mp->m_len = plen;
4841                                 mh->m_next = mp;
4842                                 /* clear buf pointer */
4843                                 rxbuf->m_pack = NULL;
4844                                 rxr->rx_split_packets++;
4845                         }
4846                 } else {
4847                         /*
4848                         ** Either no header split, or a
4849                         ** secondary piece of a fragmented
4850                         ** split packet.
4851                         */
4852                         mh = rxr->rx_buffers[i].m_pack;
4853                         mh->m_len = plen;
4854                         /* clear buf info for refresh */
4855                         rxbuf->m_pack = NULL;
4856                 }
4857
4858                 ++processed; /* So we know when to refresh */
4859
4860                 /* Initial frame - setup */
4861                 if (rxr->fmp == NULL) {
4862                         mh->m_pkthdr.len = mh->m_len;
4863                         /* Save the head of the chain */
4864                         rxr->fmp = mh;
4865                         rxr->lmp = mh;
4866                         if (mp != NULL) {
4867                                 /* Add payload if split */
4868                                 mh->m_pkthdr.len += mp->m_len;
4869                                 rxr->lmp = mh->m_next;
4870                         }
4871                 } else {
4872                         /* Chain mbuf's together */
4873                         rxr->lmp->m_next = mh;
4874                         rxr->lmp = rxr->lmp->m_next;
4875                         rxr->fmp->m_pkthdr.len += mh->m_len;
4876                 }
4877
4878                 if (eop) {
4879                         rxr->fmp->m_pkthdr.rcvif = ifp;
4880                         ifp->if_ipackets++;
4881                         rxr->rx_packets++;
4882                         /* capture data for AIM */
4883                         rxr->packets++;
4884                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4885                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4886
4887                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4888                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4889
4890                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4891                             (staterr & E1000_RXD_STAT_VP) != 0) {
4892                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4893                                 rxr->fmp->m_flags |= M_VLANTAG;
4894                         }
4895 #ifndef IGB_LEGACY_TX
4896                         rxr->fmp->m_pkthdr.flowid = que->msix;
4897                         rxr->fmp->m_flags |= M_FLOWID;
4898 #endif
4899                         sendmp = rxr->fmp;
4900                         /* Make sure to set M_PKTHDR. */
4901                         sendmp->m_flags |= M_PKTHDR;
4902                         rxr->fmp = NULL;
4903                         rxr->lmp = NULL;
4904                 }
4905
4906 next_desc:
4907                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4908                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4909
4910                 /* Advance our pointers to the next descriptor. */
4911                 if (++i == adapter->num_rx_desc)
4912                         i = 0;
4913                 /*
4914                 ** Send to the stack or LRO
4915                 */
4916                 if (sendmp != NULL) {
4917                         rxr->next_to_check = i;
4918                         igb_rx_input(rxr, ifp, sendmp, ptype);
4919                         i = rxr->next_to_check;
4920                         rxdone++;
4921                 }
4922
4923                 /* Every 8 descriptors we go to refresh mbufs */
4924                 if (processed == 8) {
4925                         igb_refresh_mbufs(rxr, i);
4926                         processed = 0;
4927                 }
4928         }
4929
4930         /* Catch any remainders */
4931         if (igb_rx_unrefreshed(rxr))
4932                 igb_refresh_mbufs(rxr, i);
4933
4934         rxr->next_to_check = i;
4935
4936         /*
4937          * Flush any outstanding LRO work
4938          */
4939         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4940                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4941                 tcp_lro_flush(lro, queued);
4942         }
4943
4944         if (done != NULL)
4945                 *done += rxdone;
4946
4947         IGB_RX_UNLOCK(rxr);
4948         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4949 }
4950
4951 /*********************************************************************
4952  *
4953  *  Verify that the hardware indicated that the checksum is valid.
4954  *  Inform the stack about the status of checksum so that stack
4955  *  doesn't spend time verifying the checksum.
4956  *
4957  *********************************************************************/
4958 static void
4959 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4960 {
4961         u16 status = (u16)staterr;
4962         u8  errors = (u8) (staterr >> 24);
4963         int sctp;
4964
4965         /* Ignore Checksum bit is set */
4966         if (status & E1000_RXD_STAT_IXSM) {
4967                 mp->m_pkthdr.csum_flags = 0;
4968                 return;
4969         }
4970
4971         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4972             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4973                 sctp = 1;
4974         else
4975                 sctp = 0;
4976         if (status & E1000_RXD_STAT_IPCS) {
4977                 /* Did it pass? */
4978                 if (!(errors & E1000_RXD_ERR_IPE)) {
4979                         /* IP Checksum Good */
4980                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4981                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4982                 } else
4983                         mp->m_pkthdr.csum_flags = 0;
4984         }
4985
4986         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4987                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4988 #if __FreeBSD_version >= 800000
4989                 if (sctp) /* reassign */
4990                         type = CSUM_SCTP_VALID;
4991 #endif
4992                 /* Did it pass? */
4993                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4994                         mp->m_pkthdr.csum_flags |= type;
4995                         if (sctp == 0)
4996                                 mp->m_pkthdr.csum_data = htons(0xffff);
4997                 }
4998         }
4999         return;
5000 }
5001
5002 /*
5003  * This routine is run via an vlan
5004  * config EVENT
5005  */
5006 static void
5007 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5008 {
5009         struct adapter  *adapter = ifp->if_softc;
5010         u32             index, bit;
5011
5012         if (ifp->if_softc !=  arg)   /* Not our event */
5013                 return;
5014
5015         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5016                 return;
5017
5018         IGB_CORE_LOCK(adapter);
5019         index = (vtag >> 5) & 0x7F;
5020         bit = vtag & 0x1F;
5021         adapter->shadow_vfta[index] |= (1 << bit);
5022         ++adapter->num_vlans;
5023         /* Change hw filter setting */
5024         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5025                 igb_setup_vlan_hw_support(adapter);
5026         IGB_CORE_UNLOCK(adapter);
5027 }
5028
5029 /*
5030  * This routine is run via an vlan
5031  * unconfig EVENT
5032  */
5033 static void
5034 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5035 {
5036         struct adapter  *adapter = ifp->if_softc;
5037         u32             index, bit;
5038
5039         if (ifp->if_softc !=  arg)
5040                 return;
5041
5042         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5043                 return;
5044
5045         IGB_CORE_LOCK(adapter);
5046         index = (vtag >> 5) & 0x7F;
5047         bit = vtag & 0x1F;
5048         adapter->shadow_vfta[index] &= ~(1 << bit);
5049         --adapter->num_vlans;
5050         /* Change hw filter setting */
5051         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5052                 igb_setup_vlan_hw_support(adapter);
5053         IGB_CORE_UNLOCK(adapter);
5054 }
5055
5056 static void
5057 igb_setup_vlan_hw_support(struct adapter *adapter)
5058 {
5059         struct e1000_hw *hw = &adapter->hw;
5060         struct ifnet    *ifp = adapter->ifp;
5061         u32             reg;
5062
5063         if (adapter->vf_ifp) {
5064                 e1000_rlpml_set_vf(hw,
5065                     adapter->max_frame_size + VLAN_TAG_SIZE);
5066                 return;
5067         }
5068
5069         reg = E1000_READ_REG(hw, E1000_CTRL);
5070         reg |= E1000_CTRL_VME;
5071         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5072
5073         /* Enable the Filter Table */
5074         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5075                 reg = E1000_READ_REG(hw, E1000_RCTL);
5076                 reg &= ~E1000_RCTL_CFIEN;
5077                 reg |= E1000_RCTL_VFE;
5078                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5079         }
5080
5081         /* Update the frame size */
5082         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5083             adapter->max_frame_size + VLAN_TAG_SIZE);
5084
5085         /* Don't bother with table if no vlans */
5086         if ((adapter->num_vlans == 0) ||
5087             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5088                 return;
5089         /*
5090         ** A soft reset zero's out the VFTA, so
5091         ** we need to repopulate it now.
5092         */
5093         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5094                 if (adapter->shadow_vfta[i] != 0) {
5095                         if (adapter->vf_ifp)
5096                                 e1000_vfta_set_vf(hw,
5097                                     adapter->shadow_vfta[i], TRUE);
5098                         else
5099                                 e1000_write_vfta(hw,
5100                                     i, adapter->shadow_vfta[i]);
5101                 }
5102 }
5103
5104 static void
5105 igb_enable_intr(struct adapter *adapter)
5106 {
5107         /* With RSS set up what to auto clear */
5108         if (adapter->msix_mem) {
5109                 u32 mask = (adapter->que_mask | adapter->link_mask);
5110                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5111                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5112                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5113                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5114                     E1000_IMS_LSC);
5115         } else {
5116                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5117                     IMS_ENABLE_MASK);
5118         }
5119         E1000_WRITE_FLUSH(&adapter->hw);
5120
5121         return;
5122 }
5123
5124 static void
5125 igb_disable_intr(struct adapter *adapter)
5126 {
5127         if (adapter->msix_mem) {
5128                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5129                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5130         } 
5131         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5132         E1000_WRITE_FLUSH(&adapter->hw);
5133         return;
5134 }
5135
5136 /*
5137  * Bit of a misnomer, what this really means is
5138  * to enable OS management of the system... aka
5139  * to disable special hardware management features 
5140  */
5141 static void
5142 igb_init_manageability(struct adapter *adapter)
5143 {
5144         if (adapter->has_manage) {
5145                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5146                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5147
5148                 /* disable hardware interception of ARP */
5149                 manc &= ~(E1000_MANC_ARP_EN);
5150
5151                 /* enable receiving management packets to the host */
5152                 manc |= E1000_MANC_EN_MNG2HOST;
5153                 manc2h |= 1 << 5;  /* Mng Port 623 */
5154                 manc2h |= 1 << 6;  /* Mng Port 664 */
5155                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5156                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5157         }
5158 }
5159
5160 /*
5161  * Give control back to hardware management
5162  * controller if there is one.
5163  */
5164 static void
5165 igb_release_manageability(struct adapter *adapter)
5166 {
5167         if (adapter->has_manage) {
5168                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5169
5170                 /* re-enable hardware interception of ARP */
5171                 manc |= E1000_MANC_ARP_EN;
5172                 manc &= ~E1000_MANC_EN_MNG2HOST;
5173
5174                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5175         }
5176 }
5177
5178 /*
5179  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5180  * For ASF and Pass Through versions of f/w this means that
5181  * the driver is loaded. 
5182  *
5183  */
5184 static void
5185 igb_get_hw_control(struct adapter *adapter)
5186 {
5187         u32 ctrl_ext;
5188
5189         if (adapter->vf_ifp)
5190                 return;
5191
5192         /* Let firmware know the driver has taken over */
5193         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5194         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5195             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5196 }
5197
5198 /*
5199  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5200  * For ASF and Pass Through versions of f/w this means that the
5201  * driver is no longer loaded.
5202  *
5203  */
5204 static void
5205 igb_release_hw_control(struct adapter *adapter)
5206 {
5207         u32 ctrl_ext;
5208
5209         if (adapter->vf_ifp)
5210                 return;
5211
5212         /* Let firmware taken over control of h/w */
5213         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5214         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5215             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5216 }
5217
5218 static int
5219 igb_is_valid_ether_addr(uint8_t *addr)
5220 {
5221         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5222
5223         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5224                 return (FALSE);
5225         }
5226
5227         return (TRUE);
5228 }
5229
5230
5231 /*
5232  * Enable PCI Wake On Lan capability
5233  */
5234 static void
5235 igb_enable_wakeup(device_t dev)
5236 {
5237         u16     cap, status;
5238         u8      id;
5239
5240         /* First find the capabilities pointer*/
5241         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5242         /* Read the PM Capabilities */
5243         id = pci_read_config(dev, cap, 1);
5244         if (id != PCIY_PMG)     /* Something wrong */
5245                 return;
5246         /* OK, we have the power capabilities, so
5247            now get the status register */
5248         cap += PCIR_POWER_STATUS;
5249         status = pci_read_config(dev, cap, 2);
5250         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5251         pci_write_config(dev, cap, status, 2);
5252         return;
5253 }
5254
5255 static void
5256 igb_led_func(void *arg, int onoff)
5257 {
5258         struct adapter  *adapter = arg;
5259
5260         IGB_CORE_LOCK(adapter);
5261         if (onoff) {
5262                 e1000_setup_led(&adapter->hw);
5263                 e1000_led_on(&adapter->hw);
5264         } else {
5265                 e1000_led_off(&adapter->hw);
5266                 e1000_cleanup_led(&adapter->hw);
5267         }
5268         IGB_CORE_UNLOCK(adapter);
5269 }
5270
5271 /**********************************************************************
5272  *
5273  *  Update the board statistics counters.
5274  *
5275  **********************************************************************/
5276 static void
5277 igb_update_stats_counters(struct adapter *adapter)
5278 {
5279         struct ifnet            *ifp;
5280         struct e1000_hw         *hw = &adapter->hw;
5281         struct e1000_hw_stats   *stats;
5282
5283         /* 
5284         ** The virtual function adapter has only a
5285         ** small controlled set of stats, do only 
5286         ** those and return.
5287         */
5288         if (adapter->vf_ifp) {
5289                 igb_update_vf_stats_counters(adapter);
5290                 return;
5291         }
5292
5293         stats = (struct e1000_hw_stats  *)adapter->stats;
5294
5295         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5296            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5297                 stats->symerrs +=
5298                     E1000_READ_REG(hw,E1000_SYMERRS);
5299                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5300         }
5301
5302         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5303         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5304         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5305         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5306
5307         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5308         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5309         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5310         stats->dc += E1000_READ_REG(hw, E1000_DC);
5311         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5312         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5313         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5314         /*
5315         ** For watchdog management we need to know if we have been
5316         ** paused during the last interval, so capture that here.
5317         */ 
5318         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5319         stats->xoffrxc += adapter->pause_frames;
5320         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5321         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5322         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5323         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5324         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5325         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5326         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5327         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5328         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5329         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5330         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5331         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5332
5333         /* For the 64-bit byte counters the low dword must be read first. */
5334         /* Both registers clear on the read of the high dword */
5335
5336         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5337             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5338         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5339             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5340
5341         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5342         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5343         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5344         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5345         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5346
5347         stats->tor += E1000_READ_REG(hw, E1000_TORH);
5348         stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5349
5350         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5351         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5352         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5353         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5354         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5355         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5356         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5357         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5358         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5359         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5360
5361         /* Interrupt Counts */
5362
5363         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5364         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5365         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5366         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5367         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5368         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5369         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5370         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5371         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5372
5373         /* Host to Card Statistics */
5374
5375         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5376         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5377         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5378         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5379         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5380         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5381         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5382         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5383             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5384         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5385             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5386         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5387         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5388         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5389
5390         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5391         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5392         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5393         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5394         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5395         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5396
5397         ifp = adapter->ifp;
5398         ifp->if_collisions = stats->colc;
5399
5400         /* Rx Errors */
5401         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5402             stats->crcerrs + stats->algnerrc +
5403             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5404
5405         /* Tx Errors */
5406         ifp->if_oerrors = stats->ecol +
5407             stats->latecol + adapter->watchdog_events;
5408
5409         /* Driver specific counters */
5410         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5411         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5412         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5413         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5414         adapter->packet_buf_alloc_tx =
5415             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5416         adapter->packet_buf_alloc_rx =
5417             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5418 }
5419
5420
5421 /**********************************************************************
5422  *
5423  *  Initialize the VF board statistics counters.
5424  *
5425  **********************************************************************/
5426 static void
5427 igb_vf_init_stats(struct adapter *adapter)
5428 {
5429         struct e1000_hw *hw = &adapter->hw;
5430         struct e1000_vf_stats   *stats;
5431
5432         stats = (struct e1000_vf_stats  *)adapter->stats;
5433         if (stats == NULL)
5434                 return;
5435         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5436         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5437         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5438         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5439         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5440 }
5441  
5442 /**********************************************************************
5443  *
5444  *  Update the VF board statistics counters.
5445  *
5446  **********************************************************************/
5447 static void
5448 igb_update_vf_stats_counters(struct adapter *adapter)
5449 {
5450         struct e1000_hw *hw = &adapter->hw;
5451         struct e1000_vf_stats   *stats;
5452
5453         if (adapter->link_speed == 0)
5454                 return;
5455
5456         stats = (struct e1000_vf_stats  *)adapter->stats;
5457
5458         UPDATE_VF_REG(E1000_VFGPRC,
5459             stats->last_gprc, stats->gprc);
5460         UPDATE_VF_REG(E1000_VFGORC,
5461             stats->last_gorc, stats->gorc);
5462         UPDATE_VF_REG(E1000_VFGPTC,
5463             stats->last_gptc, stats->gptc);
5464         UPDATE_VF_REG(E1000_VFGOTC,
5465             stats->last_gotc, stats->gotc);
5466         UPDATE_VF_REG(E1000_VFMPRC,
5467             stats->last_mprc, stats->mprc);
5468 }
5469
5470 /* Export a single 32-bit register via a read-only sysctl. */
5471 static int
5472 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5473 {
5474         struct adapter *adapter;
5475         u_int val;
5476
5477         adapter = oidp->oid_arg1;
5478         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5479         return (sysctl_handle_int(oidp, &val, 0, req));
5480 }
5481
5482 /*
5483 **  Tuneable interrupt rate handler
5484 */
5485 static int
5486 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5487 {
5488         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5489         int                     error;
5490         u32                     reg, usec, rate;
5491                         
5492         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5493         usec = ((reg & 0x7FFC) >> 2);
5494         if (usec > 0)
5495                 rate = 1000000 / usec;
5496         else
5497                 rate = 0;
5498         error = sysctl_handle_int(oidp, &rate, 0, req);
5499         if (error || !req->newptr)
5500                 return error;
5501         return 0;
5502 }
5503
5504 /*
5505  * Add sysctl variables, one per statistic, to the system.
5506  */
5507 static void
5508 igb_add_hw_stats(struct adapter *adapter)
5509 {
5510         device_t dev = adapter->dev;
5511
5512         struct tx_ring *txr = adapter->tx_rings;
5513         struct rx_ring *rxr = adapter->rx_rings;
5514
5515         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5516         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5517         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5518         struct e1000_hw_stats *stats = adapter->stats;
5519
5520         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5521         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5522
5523 #define QUEUE_NAME_LEN 32
5524         char namebuf[QUEUE_NAME_LEN];
5525
5526         /* Driver Statistics */
5527         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5528                         CTLFLAG_RD, &adapter->link_irq, 0,
5529                         "Link MSIX IRQ Handled");
5530         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5531                         CTLFLAG_RD, &adapter->dropped_pkts,
5532                         "Driver dropped packets");
5533         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5534                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5535                         "Driver tx dma failure in xmit");
5536         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5537                         CTLFLAG_RD, &adapter->rx_overruns,
5538                         "RX overruns");
5539         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5540                         CTLFLAG_RD, &adapter->watchdog_events,
5541                         "Watchdog timeouts");
5542
5543         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5544                         CTLFLAG_RD, &adapter->device_control,
5545                         "Device Control Register");
5546         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5547                         CTLFLAG_RD, &adapter->rx_control,
5548                         "Receiver Control Register");
5549         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5550                         CTLFLAG_RD, &adapter->int_mask,
5551                         "Interrupt Mask");
5552         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5553                         CTLFLAG_RD, &adapter->eint_mask,
5554                         "Extended Interrupt Mask");
5555         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5556                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5557                         "Transmit Buffer Packet Allocation");
5558         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5559                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5560                         "Receive Buffer Packet Allocation");
5561         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5562                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5563                         "Flow Control High Watermark");
5564         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5565                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5566                         "Flow Control Low Watermark");
5567
5568         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5569                 struct lro_ctrl *lro = &rxr->lro;
5570
5571                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5572                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5573                                             CTLFLAG_RD, NULL, "Queue Name");
5574                 queue_list = SYSCTL_CHILDREN(queue_node);
5575
5576                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5577                                 CTLFLAG_RD, &adapter->queues[i],
5578                                 sizeof(&adapter->queues[i]),
5579                                 igb_sysctl_interrupt_rate_handler,
5580                                 "IU", "Interrupt Rate");
5581
5582                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5583                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5584                                 igb_sysctl_reg_handler, "IU",
5585                                 "Transmit Descriptor Head");
5586                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5587                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5588                                 igb_sysctl_reg_handler, "IU",
5589                                 "Transmit Descriptor Tail");
5590                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5591                                 CTLFLAG_RD, &txr->no_desc_avail,
5592                                 "Queue No Descriptor Available");
5593                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5594                                 CTLFLAG_RD, &txr->tx_packets,
5595                                 "Queue Packets Transmitted");
5596
5597                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5598                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5599                                 igb_sysctl_reg_handler, "IU",
5600                                 "Receive Descriptor Head");
5601                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5602                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5603                                 igb_sysctl_reg_handler, "IU",
5604                                 "Receive Descriptor Tail");
5605                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5606                                 CTLFLAG_RD, &rxr->rx_packets,
5607                                 "Queue Packets Received");
5608                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5609                                 CTLFLAG_RD, &rxr->rx_bytes,
5610                                 "Queue Bytes Received");
5611                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5612                                 CTLFLAG_RD, &lro->lro_queued, 0,
5613                                 "LRO Queued");
5614                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5615                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5616                                 "LRO Flushed");
5617         }
5618
5619         /* MAC stats get their own sub node */
5620
5621         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5622                                     CTLFLAG_RD, NULL, "MAC Statistics");
5623         stat_list = SYSCTL_CHILDREN(stat_node);
5624
5625         /*
5626         ** VF adapter has a very limited set of stats
5627         ** since its not managing the metal, so to speak.
5628         */
5629         if (adapter->vf_ifp) {
5630         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5631                         CTLFLAG_RD, &stats->gprc,
5632                         "Good Packets Received");
5633         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5634                         CTLFLAG_RD, &stats->gptc,
5635                         "Good Packets Transmitted");
5636         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5637                         CTLFLAG_RD, &stats->gorc, 
5638                         "Good Octets Received"); 
5639         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5640                         CTLFLAG_RD, &stats->gotc, 
5641                         "Good Octets Transmitted"); 
5642         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5643                         CTLFLAG_RD, &stats->mprc,
5644                         "Multicast Packets Received");
5645                 return;
5646         }
5647
5648         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5649                         CTLFLAG_RD, &stats->ecol,
5650                         "Excessive collisions");
5651         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5652                         CTLFLAG_RD, &stats->scc,
5653                         "Single collisions");
5654         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5655                         CTLFLAG_RD, &stats->mcc,
5656                         "Multiple collisions");
5657         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5658                         CTLFLAG_RD, &stats->latecol,
5659                         "Late collisions");
5660         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5661                         CTLFLAG_RD, &stats->colc,
5662                         "Collision Count");
5663         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5664                         CTLFLAG_RD, &stats->symerrs,
5665                         "Symbol Errors");
5666         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5667                         CTLFLAG_RD, &stats->sec,
5668                         "Sequence Errors");
5669         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5670                         CTLFLAG_RD, &stats->dc,
5671                         "Defer Count");
5672         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5673                         CTLFLAG_RD, &stats->mpc,
5674                         "Missed Packets");
5675         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5676                         CTLFLAG_RD, &stats->rnbc,
5677                         "Receive No Buffers");
5678         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5679                         CTLFLAG_RD, &stats->ruc,
5680                         "Receive Undersize");
5681         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5682                         CTLFLAG_RD, &stats->rfc,
5683                         "Fragmented Packets Received ");
5684         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5685                         CTLFLAG_RD, &stats->roc,
5686                         "Oversized Packets Received");
5687         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5688                         CTLFLAG_RD, &stats->rjc,
5689                         "Recevied Jabber");
5690         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5691                         CTLFLAG_RD, &stats->rxerrc,
5692                         "Receive Errors");
5693         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5694                         CTLFLAG_RD, &stats->crcerrs,
5695                         "CRC errors");
5696         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5697                         CTLFLAG_RD, &stats->algnerrc,
5698                         "Alignment Errors");
5699         /* On 82575 these are collision counts */
5700         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5701                         CTLFLAG_RD, &stats->cexterr,
5702                         "Collision/Carrier extension errors");
5703         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5704                         CTLFLAG_RD, &stats->xonrxc,
5705                         "XON Received");
5706         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5707                         CTLFLAG_RD, &stats->xontxc,
5708                         "XON Transmitted");
5709         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5710                         CTLFLAG_RD, &stats->xoffrxc,
5711                         "XOFF Received");
5712         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5713                         CTLFLAG_RD, &stats->xofftxc,
5714                         "XOFF Transmitted");
5715         /* Packet Reception Stats */
5716         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5717                         CTLFLAG_RD, &stats->tpr,
5718                         "Total Packets Received ");
5719         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5720                         CTLFLAG_RD, &stats->gprc,
5721                         "Good Packets Received");
5722         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5723                         CTLFLAG_RD, &stats->bprc,
5724                         "Broadcast Packets Received");
5725         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5726                         CTLFLAG_RD, &stats->mprc,
5727                         "Multicast Packets Received");
5728         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5729                         CTLFLAG_RD, &stats->prc64,
5730                         "64 byte frames received ");
5731         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5732                         CTLFLAG_RD, &stats->prc127,
5733                         "65-127 byte frames received");
5734         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5735                         CTLFLAG_RD, &stats->prc255,
5736                         "128-255 byte frames received");
5737         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5738                         CTLFLAG_RD, &stats->prc511,
5739                         "256-511 byte frames received");
5740         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5741                         CTLFLAG_RD, &stats->prc1023,
5742                         "512-1023 byte frames received");
5743         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5744                         CTLFLAG_RD, &stats->prc1522,
5745                         "1023-1522 byte frames received");
5746         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5747                         CTLFLAG_RD, &stats->gorc, 
5748                         "Good Octets Received"); 
5749
5750         /* Packet Transmission Stats */
5751         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5752                         CTLFLAG_RD, &stats->gotc, 
5753                         "Good Octets Transmitted"); 
5754         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5755                         CTLFLAG_RD, &stats->tpt,
5756                         "Total Packets Transmitted");
5757         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5758                         CTLFLAG_RD, &stats->gptc,
5759                         "Good Packets Transmitted");
5760         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5761                         CTLFLAG_RD, &stats->bptc,
5762                         "Broadcast Packets Transmitted");
5763         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5764                         CTLFLAG_RD, &stats->mptc,
5765                         "Multicast Packets Transmitted");
5766         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5767                         CTLFLAG_RD, &stats->ptc64,
5768                         "64 byte frames transmitted ");
5769         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5770                         CTLFLAG_RD, &stats->ptc127,
5771                         "65-127 byte frames transmitted");
5772         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5773                         CTLFLAG_RD, &stats->ptc255,
5774                         "128-255 byte frames transmitted");
5775         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5776                         CTLFLAG_RD, &stats->ptc511,
5777                         "256-511 byte frames transmitted");
5778         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5779                         CTLFLAG_RD, &stats->ptc1023,
5780                         "512-1023 byte frames transmitted");
5781         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5782                         CTLFLAG_RD, &stats->ptc1522,
5783                         "1024-1522 byte frames transmitted");
5784         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5785                         CTLFLAG_RD, &stats->tsctc,
5786                         "TSO Contexts Transmitted");
5787         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5788                         CTLFLAG_RD, &stats->tsctfc,
5789                         "TSO Contexts Failed");
5790
5791
5792         /* Interrupt Stats */
5793
5794         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5795                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5796         int_list = SYSCTL_CHILDREN(int_node);
5797
5798         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5799                         CTLFLAG_RD, &stats->iac,
5800                         "Interrupt Assertion Count");
5801
5802         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5803                         CTLFLAG_RD, &stats->icrxptc,
5804                         "Interrupt Cause Rx Pkt Timer Expire Count");
5805
5806         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5807                         CTLFLAG_RD, &stats->icrxatc,
5808                         "Interrupt Cause Rx Abs Timer Expire Count");
5809
5810         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5811                         CTLFLAG_RD, &stats->ictxptc,
5812                         "Interrupt Cause Tx Pkt Timer Expire Count");
5813
5814         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5815                         CTLFLAG_RD, &stats->ictxatc,
5816                         "Interrupt Cause Tx Abs Timer Expire Count");
5817
5818         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5819                         CTLFLAG_RD, &stats->ictxqec,
5820                         "Interrupt Cause Tx Queue Empty Count");
5821
5822         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5823                         CTLFLAG_RD, &stats->ictxqmtc,
5824                         "Interrupt Cause Tx Queue Min Thresh Count");
5825
5826         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5827                         CTLFLAG_RD, &stats->icrxdmtc,
5828                         "Interrupt Cause Rx Desc Min Thresh Count");
5829
5830         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5831                         CTLFLAG_RD, &stats->icrxoc,
5832                         "Interrupt Cause Receiver Overrun Count");
5833
5834         /* Host to Card Stats */
5835
5836         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5837                                     CTLFLAG_RD, NULL, 
5838                                     "Host to Card Statistics");
5839
5840         host_list = SYSCTL_CHILDREN(host_node);
5841
5842         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5843                         CTLFLAG_RD, &stats->cbtmpc,
5844                         "Circuit Breaker Tx Packet Count");
5845
5846         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5847                         CTLFLAG_RD, &stats->htdpmc,
5848                         "Host Transmit Discarded Packets");
5849
5850         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5851                         CTLFLAG_RD, &stats->rpthc,
5852                         "Rx Packets To Host");
5853
5854         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5855                         CTLFLAG_RD, &stats->cbrmpc,
5856                         "Circuit Breaker Rx Packet Count");
5857
5858         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5859                         CTLFLAG_RD, &stats->cbrdpc,
5860                         "Circuit Breaker Rx Dropped Count");
5861
5862         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5863                         CTLFLAG_RD, &stats->hgptc,
5864                         "Host Good Packets Tx Count");
5865
5866         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5867                         CTLFLAG_RD, &stats->htcbdpc,
5868                         "Host Tx Circuit Breaker Dropped Count");
5869
5870         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5871                         CTLFLAG_RD, &stats->hgorc,
5872                         "Host Good Octets Received Count");
5873
5874         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5875                         CTLFLAG_RD, &stats->hgotc,
5876                         "Host Good Octets Transmit Count");
5877
5878         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5879                         CTLFLAG_RD, &stats->lenerrs,
5880                         "Length Errors");
5881
5882         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5883                         CTLFLAG_RD, &stats->scvpc,
5884                         "SerDes/SGMII Code Violation Pkt Count");
5885
5886         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5887                         CTLFLAG_RD, &stats->hrmpc,
5888                         "Header Redirection Missed Packet Count");
5889 }
5890
5891
5892 /**********************************************************************
5893  *
5894  *  This routine provides a way to dump out the adapter eeprom,
5895  *  often a useful debug/service tool. This only dumps the first
5896  *  32 words, stuff that matters is in that extent.
5897  *
5898  **********************************************************************/
5899 static int
5900 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5901 {
5902         struct adapter *adapter;
5903         int error;
5904         int result;
5905
5906         result = -1;
5907         error = sysctl_handle_int(oidp, &result, 0, req);
5908
5909         if (error || !req->newptr)
5910                 return (error);
5911
5912         /*
5913          * This value will cause a hex dump of the
5914          * first 32 16-bit words of the EEPROM to
5915          * the screen.
5916          */
5917         if (result == 1) {
5918                 adapter = (struct adapter *)arg1;
5919                 igb_print_nvm_info(adapter);
5920         }
5921
5922         return (error);
5923 }
5924
5925 static void
5926 igb_print_nvm_info(struct adapter *adapter)
5927 {
5928         u16     eeprom_data;
5929         int     i, j, row = 0;
5930
5931         /* Its a bit crude, but it gets the job done */
5932         printf("\nInterface EEPROM Dump:\n");
5933         printf("Offset\n0x0000  ");
5934         for (i = 0, j = 0; i < 32; i++, j++) {
5935                 if (j == 8) { /* Make the offset block */
5936                         j = 0; ++row;
5937                         printf("\n0x00%x0  ",row);
5938                 }
5939                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5940                 printf("%04x ", eeprom_data);
5941         }
5942         printf("\n");
5943 }
5944
5945 static void
5946 igb_set_sysctl_value(struct adapter *adapter, const char *name,
5947         const char *description, int *limit, int value)
5948 {
5949         *limit = value;
5950         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5951             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5952             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5953 }
5954
5955 /*
5956 ** Set flow control using sysctl:
5957 ** Flow control values:
5958 **      0 - off
5959 **      1 - rx pause
5960 **      2 - tx pause
5961 **      3 - full
5962 */
5963 static int
5964 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5965 {
5966         int             error;
5967         static int      input = 3; /* default is full */
5968         struct adapter  *adapter = (struct adapter *) arg1;
5969
5970         error = sysctl_handle_int(oidp, &input, 0, req);
5971
5972         if ((error) || (req->newptr == NULL))
5973                 return (error);
5974
5975         switch (input) {
5976                 case e1000_fc_rx_pause:
5977                 case e1000_fc_tx_pause:
5978                 case e1000_fc_full:
5979                 case e1000_fc_none:
5980                         adapter->hw.fc.requested_mode = input;
5981                         adapter->fc = input;
5982                         break;
5983                 default:
5984                         /* Do nothing */
5985                         return (error);
5986         }
5987
5988         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5989         e1000_force_mac_fc(&adapter->hw);
5990         return (error);
5991 }
5992
5993 /*
5994 ** Manage DMA Coalesce:
5995 ** Control values:
5996 **      0/1 - off/on
5997 **      Legal timer values are:
5998 **      250,500,1000-10000 in thousands
5999 */
6000 static int
6001 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6002 {
6003         struct adapter *adapter = (struct adapter *) arg1;
6004         int             error;
6005
6006         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6007
6008         if ((error) || (req->newptr == NULL))
6009                 return (error);
6010
6011         switch (adapter->dmac) {
6012                 case 0:
6013                         /*Disabling */
6014                         break;
6015                 case 1: /* Just enable and use default */
6016                         adapter->dmac = 1000;
6017                         break;
6018                 case 250:
6019                 case 500:
6020                 case 1000:
6021                 case 2000:
6022                 case 3000:
6023                 case 4000:
6024                 case 5000:
6025                 case 6000:
6026                 case 7000:
6027                 case 8000:
6028                 case 9000:
6029                 case 10000:
6030                         /* Legal values - allow */
6031                         break;
6032                 default:
6033                         /* Do nothing, illegal value */
6034                         adapter->dmac = 0;
6035                         return (error);
6036         }
6037         /* Reinit the interface */
6038         igb_init(adapter);
6039         return (error);
6040 }
6041
6042 /*
6043 ** Manage Energy Efficient Ethernet:
6044 ** Control values:
6045 **     0/1 - enabled/disabled
6046 */
6047 static int
6048 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6049 {
6050         struct adapter  *adapter = (struct adapter *) arg1;
6051         int             error, value;
6052
6053         value = adapter->hw.dev_spec._82575.eee_disable;
6054         error = sysctl_handle_int(oidp, &value, 0, req);
6055         if (error || req->newptr == NULL)
6056                 return (error);
6057         IGB_CORE_LOCK(adapter);
6058         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6059         igb_init_locked(adapter);
6060         IGB_CORE_UNLOCK(adapter);
6061         return (0);
6062 }