]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/dev/e1000/if_igb.c
When the igb driver is static there are cases when early interrupts occur,
[FreeBSD/releng/9.2.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38
39 #ifdef HAVE_KERNEL_OPTION_HEADERS
40 #include "opt_device_polling.h"
41 #include "opt_altq.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifndef IGB_LEGACY_TX
47 #include <sys/buf_ring.h>
48 #endif
49 #include <sys/bus.h>
50 #include <sys/endian.h>
51 #include <sys/kernel.h>
52 #include <sys/kthread.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/module.h>
56 #include <sys/rman.h>
57 #include <sys/socket.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/taskqueue.h>
61 #include <sys/eventhandler.h>
62 #include <sys/pcpu.h>
63 #include <sys/smp.h>
64 #include <machine/smp.h>
65 #include <machine/bus.h>
66 #include <machine/resource.h>
67
68 #include <net/bpf.h>
69 #include <net/ethernet.h>
70 #include <net/if.h>
71 #include <net/if_arp.h>
72 #include <net/if_dl.h>
73 #include <net/if_media.h>
74
75 #include <net/if_types.h>
76 #include <net/if_vlan_var.h>
77
78 #include <netinet/in_systm.h>
79 #include <netinet/in.h>
80 #include <netinet/if_ether.h>
81 #include <netinet/ip.h>
82 #include <netinet/ip6.h>
83 #include <netinet/tcp.h>
84 #include <netinet/tcp_lro.h>
85 #include <netinet/udp.h>
86
87 #include <machine/in_cksum.h>
88 #include <dev/led/led.h>
89 #include <dev/pci/pcivar.h>
90 #include <dev/pci/pcireg.h>
91
92 #include "e1000_api.h"
93 #include "e1000_82575.h"
94 #include "if_igb.h"
95
96 /*********************************************************************
97  *  Set this to one to display debug statistics
98  *********************************************************************/
99 int     igb_display_debug_stats = 0;
100
101 /*********************************************************************
102  *  Driver version:
103  *********************************************************************/
104 char igb_driver_version[] = "version - 2.3.10";
105
106
107 /*********************************************************************
108  *  PCI Device ID Table
109  *
110  *  Used by probe to select devices to load on
111  *  Last field stores an index into e1000_strings
112  *  Last entry must be all 0s
113  *
114  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115  *********************************************************************/
116
117 static igb_vendor_info_t igb_vendor_info_array[] =
118 {
119         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
134                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
148                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_I210_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_I210_COPPER_IT,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
157                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_I210_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_I210_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_I210_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_I211_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
162         /* required last entry */
163         { 0, 0, 0, 0, 0}
164 };
165
166 /*********************************************************************
167  *  Table of branding strings for all supported NICs.
168  *********************************************************************/
169
170 static char *igb_strings[] = {
171         "Intel(R) PRO/1000 Network Connection"
172 };
173
174 /*********************************************************************
175  *  Function prototypes
176  *********************************************************************/
177 static int      igb_probe(device_t);
178 static int      igb_attach(device_t);
179 static int      igb_detach(device_t);
180 static int      igb_shutdown(device_t);
181 static int      igb_suspend(device_t);
182 static int      igb_resume(device_t);
183 #ifndef IGB_LEGACY_TX
184 static int      igb_mq_start(struct ifnet *, struct mbuf *);
185 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
186 static void     igb_qflush(struct ifnet *);
187 static void     igb_deferred_mq_start(void *, int);
188 #else
189 static void     igb_start(struct ifnet *);
190 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191 #endif
192 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
193 static void     igb_init(void *);
194 static void     igb_init_locked(struct adapter *);
195 static void     igb_stop(void *);
196 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
197 static int      igb_media_change(struct ifnet *);
198 static void     igb_identify_hardware(struct adapter *);
199 static int      igb_allocate_pci_resources(struct adapter *);
200 static int      igb_allocate_msix(struct adapter *);
201 static int      igb_allocate_legacy(struct adapter *);
202 static int      igb_setup_msix(struct adapter *);
203 static void     igb_free_pci_resources(struct adapter *);
204 static void     igb_local_timer(void *);
205 static void     igb_reset(struct adapter *);
206 static int      igb_setup_interface(device_t, struct adapter *);
207 static int      igb_allocate_queues(struct adapter *);
208 static void     igb_configure_queues(struct adapter *);
209
210 static int      igb_allocate_transmit_buffers(struct tx_ring *);
211 static void     igb_setup_transmit_structures(struct adapter *);
212 static void     igb_setup_transmit_ring(struct tx_ring *);
213 static void     igb_initialize_transmit_units(struct adapter *);
214 static void     igb_free_transmit_structures(struct adapter *);
215 static void     igb_free_transmit_buffers(struct tx_ring *);
216
217 static int      igb_allocate_receive_buffers(struct rx_ring *);
218 static int      igb_setup_receive_structures(struct adapter *);
219 static int      igb_setup_receive_ring(struct rx_ring *);
220 static void     igb_initialize_receive_units(struct adapter *);
221 static void     igb_free_receive_structures(struct adapter *);
222 static void     igb_free_receive_buffers(struct rx_ring *);
223 static void     igb_free_receive_ring(struct rx_ring *);
224
225 static void     igb_enable_intr(struct adapter *);
226 static void     igb_disable_intr(struct adapter *);
227 static void     igb_update_stats_counters(struct adapter *);
228 static bool     igb_txeof(struct tx_ring *);
229
230 static __inline void igb_rx_discard(struct rx_ring *, int);
231 static __inline void igb_rx_input(struct rx_ring *,
232                     struct ifnet *, struct mbuf *, u32);
233
234 static bool     igb_rxeof(struct igb_queue *, int, int *);
235 static void     igb_rx_checksum(u32, struct mbuf *, u32);
236 static bool     igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238                     struct ip *, struct tcphdr *);
239 static void     igb_set_promisc(struct adapter *);
240 static void     igb_disable_promisc(struct adapter *);
241 static void     igb_set_multi(struct adapter *);
242 static void     igb_update_link_status(struct adapter *);
243 static void     igb_refresh_mbufs(struct rx_ring *, int);
244
245 static void     igb_register_vlan(void *, struct ifnet *, u16);
246 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
247 static void     igb_setup_vlan_hw_support(struct adapter *);
248
249 static int      igb_xmit(struct tx_ring *, struct mbuf **);
250 static int      igb_dma_malloc(struct adapter *, bus_size_t,
251                     struct igb_dma_alloc *, int);
252 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254 static void     igb_print_nvm_info(struct adapter *);
255 static int      igb_is_valid_ether_addr(u8 *);
256 static void     igb_add_hw_stats(struct adapter *);
257
258 static void     igb_vf_init_stats(struct adapter *);
259 static void     igb_update_vf_stats_counters(struct adapter *);
260
261 /* Management and WOL Support */
262 static void     igb_init_manageability(struct adapter *);
263 static void     igb_release_manageability(struct adapter *);
264 static void     igb_get_hw_control(struct adapter *);
265 static void     igb_release_hw_control(struct adapter *);
266 static void     igb_enable_wakeup(device_t);
267 static void     igb_led_func(void *, int);
268
269 static int      igb_irq_fast(void *);
270 static void     igb_msix_que(void *);
271 static void     igb_msix_link(void *);
272 static void     igb_handle_que(void *context, int pending);
273 static void     igb_handle_link(void *context, int pending);
274 static void     igb_handle_link_locked(struct adapter *);
275
276 static void     igb_set_sysctl_value(struct adapter *, const char *,
277                     const char *, int *, int);
278 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282 #ifdef DEVICE_POLLING
283 static poll_handler_t igb_poll;
284 #endif /* POLLING */
285
286 /*********************************************************************
287  *  FreeBSD Device Interface Entry Points
288  *********************************************************************/
289
290 static device_method_t igb_methods[] = {
291         /* Device interface */
292         DEVMETHOD(device_probe, igb_probe),
293         DEVMETHOD(device_attach, igb_attach),
294         DEVMETHOD(device_detach, igb_detach),
295         DEVMETHOD(device_shutdown, igb_shutdown),
296         DEVMETHOD(device_suspend, igb_suspend),
297         DEVMETHOD(device_resume, igb_resume),
298         DEVMETHOD_END
299 };
300
301 static driver_t igb_driver = {
302         "igb", igb_methods, sizeof(struct adapter),
303 };
304
305 static devclass_t igb_devclass;
306 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307 MODULE_DEPEND(igb, pci, 1, 1, 1);
308 MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310 /*********************************************************************
311  *  Tunable default values.
312  *********************************************************************/
313
314 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316 /* Descriptor defaults */
317 static int igb_rxd = IGB_DEFAULT_RXD;
318 static int igb_txd = IGB_DEFAULT_TXD;
319 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320 TUNABLE_INT("hw.igb.txd", &igb_txd);
321 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322     "Number of receive descriptors per queue");
323 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324     "Number of transmit descriptors per queue");
325
326 /*
327 ** AIM: Adaptive Interrupt Moderation
328 ** which means that the interrupt rate
329 ** is varied over time based on the
330 ** traffic for that interrupt vector
331 */
332 static int igb_enable_aim = TRUE;
333 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335     "Enable adaptive interrupt moderation");
336
337 /*
338  * MSIX should be the default for best performance,
339  * but this allows it to be forced off for testing.
340  */         
341 static int igb_enable_msix = 1;
342 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344     "Enable MSI-X interrupts");
345
346 /*
347 ** Tuneable Interrupt rate
348 */
349 static int igb_max_interrupt_rate = 8000;
350 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354 #if __FreeBSD_version >= 800000
355 /*
356 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
357 */
358 static int igb_buf_ring_size = IGB_BR_SIZE;
359 TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
360 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
361     &igb_buf_ring_size, 0, "Size of the bufring");
362 #endif
363
364 /*
365 ** Header split causes the packet header to
366 ** be dma'd to a seperate mbuf from the payload.
367 ** this can have memory alignment benefits. But
368 ** another plus is that small packets often fit
369 ** into the header and thus use no cluster. Its
370 ** a very workload dependent type feature.
371 */
372 static int igb_header_split = FALSE;
373 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
374 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
375     "Enable receive mbuf header split");
376
377 /*
378 ** This will autoconfigure based on the
379 ** number of CPUs and max supported
380 ** MSIX messages if left at 0.
381 */
382 static int igb_num_queues = 0;
383 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
384 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
385     "Number of queues to configure, 0 indicates autoconfigure");
386
387 /*
388 ** Global variable to store last used CPU when binding queues
389 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
390 ** queue is bound to a cpu.
391 */
392 static int igb_last_bind_cpu = -1;
393
394 /* How many packets rxeof tries to clean at a time */
395 static int igb_rx_process_limit = 100;
396 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
397 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398     &igb_rx_process_limit, 0,
399     "Maximum number of received packets to process at a time, -1 means unlimited");
400
401 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
402 #include <dev/netmap/if_igb_netmap.h>
403 #endif /* DEV_NETMAP */
404 /*********************************************************************
405  *  Device identification routine
406  *
407  *  igb_probe determines if the driver should be loaded on
408  *  adapter based on PCI vendor/device id of the adapter.
409  *
410  *  return BUS_PROBE_DEFAULT on success, positive on failure
411  *********************************************************************/
412
413 static int
414 igb_probe(device_t dev)
415 {
416         char            adapter_name[60];
417         uint16_t        pci_vendor_id = 0;
418         uint16_t        pci_device_id = 0;
419         uint16_t        pci_subvendor_id = 0;
420         uint16_t        pci_subdevice_id = 0;
421         igb_vendor_info_t *ent;
422
423         INIT_DEBUGOUT("igb_probe: begin");
424
425         pci_vendor_id = pci_get_vendor(dev);
426         if (pci_vendor_id != IGB_VENDOR_ID)
427                 return (ENXIO);
428
429         pci_device_id = pci_get_device(dev);
430         pci_subvendor_id = pci_get_subvendor(dev);
431         pci_subdevice_id = pci_get_subdevice(dev);
432
433         ent = igb_vendor_info_array;
434         while (ent->vendor_id != 0) {
435                 if ((pci_vendor_id == ent->vendor_id) &&
436                     (pci_device_id == ent->device_id) &&
437
438                     ((pci_subvendor_id == ent->subvendor_id) ||
439                     (ent->subvendor_id == PCI_ANY_ID)) &&
440
441                     ((pci_subdevice_id == ent->subdevice_id) ||
442                     (ent->subdevice_id == PCI_ANY_ID))) {
443                         sprintf(adapter_name, "%s %s",
444                                 igb_strings[ent->index],
445                                 igb_driver_version);
446                         device_set_desc_copy(dev, adapter_name);
447                         return (BUS_PROBE_DEFAULT);
448                 }
449                 ent++;
450         }
451
452         return (ENXIO);
453 }
454
455 /*********************************************************************
456  *  Device initialization routine
457  *
458  *  The attach entry point is called when the driver is being loaded.
459  *  This routine identifies the type of hardware, allocates all resources
460  *  and initializes the hardware.
461  *
462  *  return 0 on success, positive on failure
463  *********************************************************************/
464
465 static int
466 igb_attach(device_t dev)
467 {
468         struct adapter  *adapter;
469         int             error = 0;
470         u16             eeprom_data;
471
472         INIT_DEBUGOUT("igb_attach: begin");
473
474         if (resource_disabled("igb", device_get_unit(dev))) {
475                 device_printf(dev, "Disabled by device hint\n");
476                 return (ENXIO);
477         }
478
479         adapter = device_get_softc(dev);
480         adapter->dev = adapter->osdep.dev = dev;
481         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
482
483         /* SYSCTL stuff */
484         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
485             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
486             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
487             igb_sysctl_nvm_info, "I", "NVM Information");
488
489         igb_set_sysctl_value(adapter, "enable_aim",
490             "Interrupt Moderation", &adapter->enable_aim,
491             igb_enable_aim);
492
493         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
494             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
495             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
496             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
497
498         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
499
500         /* Determine hardware and mac info */
501         igb_identify_hardware(adapter);
502
503         /* Setup PCI resources */
504         if (igb_allocate_pci_resources(adapter)) {
505                 device_printf(dev, "Allocation of PCI resources failed\n");
506                 error = ENXIO;
507                 goto err_pci;
508         }
509
510         /* Do Shared Code initialization */
511         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
512                 device_printf(dev, "Setup of Shared code failed\n");
513                 error = ENXIO;
514                 goto err_pci;
515         }
516
517         e1000_get_bus_info(&adapter->hw);
518
519         /* Sysctl for limiting the amount of work done in the taskqueue */
520         igb_set_sysctl_value(adapter, "rx_processing_limit",
521             "max number of rx packets to process",
522             &adapter->rx_process_limit, igb_rx_process_limit);
523
524         /*
525          * Validate number of transmit and receive descriptors. It
526          * must not exceed hardware maximum, and must be multiple
527          * of E1000_DBA_ALIGN.
528          */
529         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
530             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
531                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
532                     IGB_DEFAULT_TXD, igb_txd);
533                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
534         } else
535                 adapter->num_tx_desc = igb_txd;
536         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
537             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
538                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
539                     IGB_DEFAULT_RXD, igb_rxd);
540                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
541         } else
542                 adapter->num_rx_desc = igb_rxd;
543
544         adapter->hw.mac.autoneg = DO_AUTO_NEG;
545         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
546         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
547
548         /* Copper options */
549         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
550                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
551                 adapter->hw.phy.disable_polarity_correction = FALSE;
552                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
553         }
554
555         /*
556          * Set the frame limits assuming
557          * standard ethernet sized frames.
558          */
559         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
561
562         /*
563         ** Allocate and Setup Queues
564         */
565         if (igb_allocate_queues(adapter)) {
566                 error = ENOMEM;
567                 goto err_pci;
568         }
569
570         /* Allocate the appropriate stats memory */
571         if (adapter->vf_ifp) {
572                 adapter->stats =
573                     (struct e1000_vf_stats *)malloc(sizeof \
574                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
575                 igb_vf_init_stats(adapter);
576         } else
577                 adapter->stats =
578                     (struct e1000_hw_stats *)malloc(sizeof \
579                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
580         if (adapter->stats == NULL) {
581                 device_printf(dev, "Can not allocate stats memory\n");
582                 error = ENOMEM;
583                 goto err_late;
584         }
585
586         /* Allocate multicast array memory. */
587         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
588             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
589         if (adapter->mta == NULL) {
590                 device_printf(dev, "Can not allocate multicast setup array\n");
591                 error = ENOMEM;
592                 goto err_late;
593         }
594
595         /* Some adapter-specific advanced features */
596         if (adapter->hw.mac.type >= e1000_i350) {
597                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
598                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
599                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
600                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
601                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
602                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
603                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
604                     adapter, 0, igb_sysctl_eee, "I",
605                     "Disable Energy Efficient Ethernet");
606                 if (adapter->hw.phy.media_type == e1000_media_type_copper)
607                         e1000_set_eee_i350(&adapter->hw);
608         }
609
610         /*
611         ** Start from a known state, this is
612         ** important in reading the nvm and
613         ** mac from that.
614         */
615         e1000_reset_hw(&adapter->hw);
616
617         /* Make sure we have a good EEPROM before we read from it */
618         if (((adapter->hw.mac.type != e1000_i210) &&
619             (adapter->hw.mac.type != e1000_i211)) &&
620             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
621                 /*
622                 ** Some PCI-E parts fail the first check due to
623                 ** the link being in sleep state, call it again,
624                 ** if it fails a second time its a real issue.
625                 */
626                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
627                         device_printf(dev,
628                             "The EEPROM Checksum Is Not Valid\n");
629                         error = EIO;
630                         goto err_late;
631                 }
632         }
633
634         /*
635         ** Copy the permanent MAC address out of the EEPROM
636         */
637         if (e1000_read_mac_addr(&adapter->hw) < 0) {
638                 device_printf(dev, "EEPROM read error while reading MAC"
639                     " address\n");
640                 error = EIO;
641                 goto err_late;
642         }
643         /* Check its sanity */
644         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
645                 device_printf(dev, "Invalid MAC address\n");
646                 error = EIO;
647                 goto err_late;
648         }
649
650         /* Setup OS specific network interface */
651         if (igb_setup_interface(dev, adapter) != 0)
652                 goto err_late;
653
654         /* Now get a good starting state */
655         igb_reset(adapter);
656
657         /* Initialize statistics */
658         igb_update_stats_counters(adapter);
659
660         adapter->hw.mac.get_link_status = 1;
661         igb_update_link_status(adapter);
662
663         /* Indicate SOL/IDER usage */
664         if (e1000_check_reset_block(&adapter->hw))
665                 device_printf(dev,
666                     "PHY reset is blocked due to SOL/IDER session.\n");
667
668         /* Determine if we have to control management hardware */
669         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
670
671         /*
672          * Setup Wake-on-Lan
673          */
674         /* APME bit in EEPROM is mapped to WUC.APME */
675         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
676         if (eeprom_data)
677                 adapter->wol = E1000_WUFC_MAG;
678
679         /* Register for VLAN events */
680         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
681              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
682         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
683              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
684
685         igb_add_hw_stats(adapter);
686
687         /* Tell the stack that the interface is not active */
688         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
689         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
690
691         adapter->led_dev = led_create(igb_led_func, adapter,
692             device_get_nameunit(dev));
693
694         /* 
695         ** Configure Interrupts
696         */
697         if ((adapter->msix > 1) && (igb_enable_msix))
698                 error = igb_allocate_msix(adapter);
699         else /* MSI or Legacy */
700                 error = igb_allocate_legacy(adapter);
701         if (error)
702                 goto err_late;
703
704 #ifdef DEV_NETMAP
705         igb_netmap_attach(adapter);
706 #endif /* DEV_NETMAP */
707         INIT_DEBUGOUT("igb_attach: end");
708
709         return (0);
710
711 err_late:
712         igb_detach(dev);
713         igb_free_transmit_structures(adapter);
714         igb_free_receive_structures(adapter);
715         igb_release_hw_control(adapter);
716 err_pci:
717         igb_free_pci_resources(adapter);
718         if (adapter->ifp != NULL)
719                 if_free(adapter->ifp);
720         free(adapter->mta, M_DEVBUF);
721         IGB_CORE_LOCK_DESTROY(adapter);
722
723         return (error);
724 }
725
726 /*********************************************************************
727  *  Device removal routine
728  *
729  *  The detach entry point is called when the driver is being removed.
730  *  This routine stops the adapter and deallocates all the resources
731  *  that were allocated for driver operation.
732  *
733  *  return 0 on success, positive on failure
734  *********************************************************************/
735
736 static int
737 igb_detach(device_t dev)
738 {
739         struct adapter  *adapter = device_get_softc(dev);
740         struct ifnet    *ifp = adapter->ifp;
741
742         INIT_DEBUGOUT("igb_detach: begin");
743
744         /* Make sure VLANS are not using driver */
745         if (adapter->ifp->if_vlantrunk != NULL) {
746                 device_printf(dev,"Vlan in use, detach first\n");
747                 return (EBUSY);
748         }
749
750         ether_ifdetach(adapter->ifp);
751
752         if (adapter->led_dev != NULL)
753                 led_destroy(adapter->led_dev);
754
755 #ifdef DEVICE_POLLING
756         if (ifp->if_capenable & IFCAP_POLLING)
757                 ether_poll_deregister(ifp);
758 #endif
759
760         IGB_CORE_LOCK(adapter);
761         adapter->in_detach = 1;
762         igb_stop(adapter);
763         IGB_CORE_UNLOCK(adapter);
764
765         e1000_phy_hw_reset(&adapter->hw);
766
767         /* Give control back to firmware */
768         igb_release_manageability(adapter);
769         igb_release_hw_control(adapter);
770
771         if (adapter->wol) {
772                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
773                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
774                 igb_enable_wakeup(dev);
775         }
776
777         /* Unregister VLAN events */
778         if (adapter->vlan_attach != NULL)
779                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
780         if (adapter->vlan_detach != NULL)
781                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
782
783         callout_drain(&adapter->timer);
784
785 #ifdef DEV_NETMAP
786         netmap_detach(adapter->ifp);
787 #endif /* DEV_NETMAP */
788         igb_free_pci_resources(adapter);
789         bus_generic_detach(dev);
790         if_free(ifp);
791
792         igb_free_transmit_structures(adapter);
793         igb_free_receive_structures(adapter);
794         if (adapter->mta != NULL)
795                 free(adapter->mta, M_DEVBUF);
796
797         IGB_CORE_LOCK_DESTROY(adapter);
798
799         return (0);
800 }
801
802 /*********************************************************************
803  *
804  *  Shutdown entry point
805  *
806  **********************************************************************/
807
808 static int
809 igb_shutdown(device_t dev)
810 {
811         return igb_suspend(dev);
812 }
813
814 /*
815  * Suspend/resume device methods.
816  */
817 static int
818 igb_suspend(device_t dev)
819 {
820         struct adapter *adapter = device_get_softc(dev);
821
822         IGB_CORE_LOCK(adapter);
823
824         igb_stop(adapter);
825
826         igb_release_manageability(adapter);
827         igb_release_hw_control(adapter);
828
829         if (adapter->wol) {
830                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
831                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
832                 igb_enable_wakeup(dev);
833         }
834
835         IGB_CORE_UNLOCK(adapter);
836
837         return bus_generic_suspend(dev);
838 }
839
840 static int
841 igb_resume(device_t dev)
842 {
843         struct adapter *adapter = device_get_softc(dev);
844         struct tx_ring  *txr = adapter->tx_rings;
845         struct ifnet *ifp = adapter->ifp;
846
847         IGB_CORE_LOCK(adapter);
848         igb_init_locked(adapter);
849         igb_init_manageability(adapter);
850
851         if ((ifp->if_flags & IFF_UP) &&
852             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
853                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
854                         IGB_TX_LOCK(txr);
855 #ifndef IGB_LEGACY_TX
856                         /* Process the stack queue only if not depleted */
857                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
858                             !drbr_empty(ifp, txr->br))
859                                 igb_mq_start_locked(ifp, txr);
860 #else
861                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
862                                 igb_start_locked(txr, ifp);
863 #endif
864                         IGB_TX_UNLOCK(txr);
865                 }
866         }
867         IGB_CORE_UNLOCK(adapter);
868
869         return bus_generic_resume(dev);
870 }
871
872
873 #ifdef IGB_LEGACY_TX
874
875 /*********************************************************************
876  *  Transmit entry point
877  *
878  *  igb_start is called by the stack to initiate a transmit.
879  *  The driver will remain in this routine as long as there are
880  *  packets to transmit and transmit resources are available.
881  *  In case resources are not available stack is notified and
882  *  the packet is requeued.
883  **********************************************************************/
884
885 static void
886 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
887 {
888         struct adapter  *adapter = ifp->if_softc;
889         struct mbuf     *m_head;
890
891         IGB_TX_LOCK_ASSERT(txr);
892
893         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
894             IFF_DRV_RUNNING)
895                 return;
896         if (!adapter->link_active)
897                 return;
898
899         /* Call cleanup if number of TX descriptors low */
900         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
901                 igb_txeof(txr);
902
903         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
904                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
905                         txr->queue_status |= IGB_QUEUE_DEPLETED;
906                         break;
907                 }
908                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
909                 if (m_head == NULL)
910                         break;
911                 /*
912                  *  Encapsulation can modify our pointer, and or make it
913                  *  NULL on failure.  In that event, we can't requeue.
914                  */
915                 if (igb_xmit(txr, &m_head)) {
916                         if (m_head != NULL)
917                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
918                         if (txr->tx_avail <= IGB_MAX_SCATTER)
919                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
920                         break;
921                 }
922
923                 /* Send a copy of the frame to the BPF listener */
924                 ETHER_BPF_MTAP(ifp, m_head);
925
926                 /* Set watchdog on */
927                 txr->watchdog_time = ticks;
928                 txr->queue_status |= IGB_QUEUE_WORKING;
929         }
930 }
931  
932 /*
933  * Legacy TX driver routine, called from the
934  * stack, always uses tx[0], and spins for it.
935  * Should not be used with multiqueue tx
936  */
937 static void
938 igb_start(struct ifnet *ifp)
939 {
940         struct adapter  *adapter = ifp->if_softc;
941         struct tx_ring  *txr = adapter->tx_rings;
942
943         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
944                 IGB_TX_LOCK(txr);
945                 igb_start_locked(txr, ifp);
946                 IGB_TX_UNLOCK(txr);
947         }
948         return;
949 }
950
951 #else /* ~IGB_LEGACY_TX */
952
953 /*
954 ** Multiqueue Transmit Entry:
955 **  quick turnaround to the stack
956 **
957 */
958 static int
959 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
960 {
961         struct adapter          *adapter = ifp->if_softc;
962         struct igb_queue        *que;
963         struct tx_ring          *txr;
964         int                     i, err = 0;
965
966         /* Which queue to use */
967         if ((m->m_flags & M_FLOWID) != 0)
968                 i = m->m_pkthdr.flowid % adapter->num_queues;
969         else
970                 i = curcpu % adapter->num_queues;
971         txr = &adapter->tx_rings[i];
972         que = &adapter->queues[i];
973
974         err = drbr_enqueue(ifp, txr->br, m);
975         taskqueue_enqueue(que->tq, &txr->txq_task);
976
977         return (err);
978 }
979
980 static int
981 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
982 {
983         struct adapter  *adapter = txr->adapter;
984         struct mbuf     *next;
985         int             err = 0, enq;
986
987         IGB_TX_LOCK_ASSERT(txr);
988
989         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
990             adapter->link_active == 0)
991                 return (ENETDOWN);
992
993         enq = 0;
994
995         /* Process the queue */
996         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
997                 if ((err = igb_xmit(txr, &next)) != 0) {
998                         if (next == NULL) {
999                                 /* It was freed, move forward */
1000                                 drbr_advance(ifp, txr->br);
1001                         } else {
1002                                 /* 
1003                                  * Still have one left, it may not be
1004                                  * the same since the transmit function
1005                                  * may have changed it.
1006                                  */
1007                                 drbr_putback(ifp, txr->br, next);
1008                         }
1009                         break;
1010                 }
1011                 drbr_advance(ifp, txr->br);
1012                 enq++;
1013                 ifp->if_obytes += next->m_pkthdr.len;
1014                 if (next->m_flags & M_MCAST)
1015                         ifp->if_omcasts++;
1016                 ETHER_BPF_MTAP(ifp, next);
1017                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1018                         break;
1019         }
1020         if (enq > 0) {
1021                 /* Set the watchdog */
1022                 txr->queue_status |= IGB_QUEUE_WORKING;
1023                 txr->watchdog_time = ticks;
1024         }
1025         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1026                 igb_txeof(txr);
1027         if (txr->tx_avail <= IGB_MAX_SCATTER)
1028                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1029         return (err);
1030 }
1031
1032 /*
1033  * Called from a taskqueue to drain queued transmit packets.
1034  */
1035 static void
1036 igb_deferred_mq_start(void *arg, int pending)
1037 {
1038         struct tx_ring *txr = arg;
1039         struct adapter *adapter = txr->adapter;
1040         struct ifnet *ifp = adapter->ifp;
1041
1042         IGB_TX_LOCK(txr);
1043         if (!drbr_empty(ifp, txr->br))
1044                 igb_mq_start_locked(ifp, txr);
1045         IGB_TX_UNLOCK(txr);
1046 }
1047
1048 /*
1049 ** Flush all ring buffers
1050 */
1051 static void
1052 igb_qflush(struct ifnet *ifp)
1053 {
1054         struct adapter  *adapter = ifp->if_softc;
1055         struct tx_ring  *txr = adapter->tx_rings;
1056         struct mbuf     *m;
1057
1058         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1059                 IGB_TX_LOCK(txr);
1060                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1061                         m_freem(m);
1062                 IGB_TX_UNLOCK(txr);
1063         }
1064         if_qflush(ifp);
1065 }
1066 #endif /* ~IGB_LEGACY_TX */
1067
1068 /*********************************************************************
1069  *  Ioctl entry point
1070  *
1071  *  igb_ioctl is called when the user wants to configure the
1072  *  interface.
1073  *
1074  *  return 0 on success, positive on failure
1075  **********************************************************************/
1076
1077 static int
1078 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1079 {
1080         struct adapter  *adapter = ifp->if_softc;
1081         struct ifreq    *ifr = (struct ifreq *)data;
1082 #if defined(INET) || defined(INET6)
1083         struct ifaddr   *ifa = (struct ifaddr *)data;
1084 #endif
1085         bool            avoid_reset = FALSE;
1086         int             error = 0;
1087
1088         if (adapter->in_detach)
1089                 return (error);
1090
1091         switch (command) {
1092         case SIOCSIFADDR:
1093 #ifdef INET
1094                 if (ifa->ifa_addr->sa_family == AF_INET)
1095                         avoid_reset = TRUE;
1096 #endif
1097 #ifdef INET6
1098                 if (ifa->ifa_addr->sa_family == AF_INET6)
1099                         avoid_reset = TRUE;
1100 #endif
1101                 /*
1102                 ** Calling init results in link renegotiation,
1103                 ** so we avoid doing it when possible.
1104                 */
1105                 if (avoid_reset) {
1106                         ifp->if_flags |= IFF_UP;
1107                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1108                                 igb_init(adapter);
1109 #ifdef INET
1110                         if (!(ifp->if_flags & IFF_NOARP))
1111                                 arp_ifinit(ifp, ifa);
1112 #endif
1113                 } else
1114                         error = ether_ioctl(ifp, command, data);
1115                 break;
1116         case SIOCSIFMTU:
1117             {
1118                 int max_frame_size;
1119
1120                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1121
1122                 IGB_CORE_LOCK(adapter);
1123                 max_frame_size = 9234;
1124                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1125                     ETHER_CRC_LEN) {
1126                         IGB_CORE_UNLOCK(adapter);
1127                         error = EINVAL;
1128                         break;
1129                 }
1130
1131                 ifp->if_mtu = ifr->ifr_mtu;
1132                 adapter->max_frame_size =
1133                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1134                 igb_init_locked(adapter);
1135                 IGB_CORE_UNLOCK(adapter);
1136                 break;
1137             }
1138         case SIOCSIFFLAGS:
1139                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1140                     SIOCSIFFLAGS (Set Interface Flags)");
1141                 IGB_CORE_LOCK(adapter);
1142                 if (ifp->if_flags & IFF_UP) {
1143                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1144                                 if ((ifp->if_flags ^ adapter->if_flags) &
1145                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1146                                         igb_disable_promisc(adapter);
1147                                         igb_set_promisc(adapter);
1148                                 }
1149                         } else
1150                                 igb_init_locked(adapter);
1151                 } else
1152                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1153                                 igb_stop(adapter);
1154                 adapter->if_flags = ifp->if_flags;
1155                 IGB_CORE_UNLOCK(adapter);
1156                 break;
1157         case SIOCADDMULTI:
1158         case SIOCDELMULTI:
1159                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1160                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1161                         IGB_CORE_LOCK(adapter);
1162                         igb_disable_intr(adapter);
1163                         igb_set_multi(adapter);
1164 #ifdef DEVICE_POLLING
1165                         if (!(ifp->if_capenable & IFCAP_POLLING))
1166 #endif
1167                                 igb_enable_intr(adapter);
1168                         IGB_CORE_UNLOCK(adapter);
1169                 }
1170                 break;
1171         case SIOCSIFMEDIA:
1172                 /* Check SOL/IDER usage */
1173                 IGB_CORE_LOCK(adapter);
1174                 if (e1000_check_reset_block(&adapter->hw)) {
1175                         IGB_CORE_UNLOCK(adapter);
1176                         device_printf(adapter->dev, "Media change is"
1177                             " blocked due to SOL/IDER session.\n");
1178                         break;
1179                 }
1180                 IGB_CORE_UNLOCK(adapter);
1181         case SIOCGIFMEDIA:
1182                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1183                     SIOCxIFMEDIA (Get/Set Interface Media)");
1184                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1185                 break;
1186         case SIOCSIFCAP:
1187             {
1188                 int mask, reinit;
1189
1190                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1191                 reinit = 0;
1192                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1193 #ifdef DEVICE_POLLING
1194                 if (mask & IFCAP_POLLING) {
1195                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1196                                 error = ether_poll_register(igb_poll, ifp);
1197                                 if (error)
1198                                         return (error);
1199                                 IGB_CORE_LOCK(adapter);
1200                                 igb_disable_intr(adapter);
1201                                 ifp->if_capenable |= IFCAP_POLLING;
1202                                 IGB_CORE_UNLOCK(adapter);
1203                         } else {
1204                                 error = ether_poll_deregister(ifp);
1205                                 /* Enable interrupt even in error case */
1206                                 IGB_CORE_LOCK(adapter);
1207                                 igb_enable_intr(adapter);
1208                                 ifp->if_capenable &= ~IFCAP_POLLING;
1209                                 IGB_CORE_UNLOCK(adapter);
1210                         }
1211                 }
1212 #endif
1213                 if (mask & IFCAP_HWCSUM) {
1214                         ifp->if_capenable ^= IFCAP_HWCSUM;
1215                         reinit = 1;
1216                 }
1217                 if (mask & IFCAP_TSO4) {
1218                         ifp->if_capenable ^= IFCAP_TSO4;
1219                         reinit = 1;
1220                 }
1221                 if (mask & IFCAP_VLAN_HWTAGGING) {
1222                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1223                         reinit = 1;
1224                 }
1225                 if (mask & IFCAP_VLAN_HWFILTER) {
1226                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1227                         reinit = 1;
1228                 }
1229                 if (mask & IFCAP_VLAN_HWTSO) {
1230                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1231                         reinit = 1;
1232                 }
1233                 if (mask & IFCAP_LRO) {
1234                         ifp->if_capenable ^= IFCAP_LRO;
1235                         reinit = 1;
1236                 }
1237                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1238                         igb_init(adapter);
1239                 VLAN_CAPABILITIES(ifp);
1240                 break;
1241             }
1242
1243         default:
1244                 error = ether_ioctl(ifp, command, data);
1245                 break;
1246         }
1247
1248         return (error);
1249 }
1250
1251
1252 /*********************************************************************
1253  *  Init entry point
1254  *
1255  *  This routine is used in two ways. It is used by the stack as
1256  *  init entry point in network interface structure. It is also used
1257  *  by the driver as a hw/sw initialization routine to get to a
1258  *  consistent state.
1259  *
1260  *  return 0 on success, positive on failure
1261  **********************************************************************/
1262
1263 static void
1264 igb_init_locked(struct adapter *adapter)
1265 {
1266         struct ifnet    *ifp = adapter->ifp;
1267         device_t        dev = adapter->dev;
1268
1269         INIT_DEBUGOUT("igb_init: begin");
1270
1271         IGB_CORE_LOCK_ASSERT(adapter);
1272
1273         igb_disable_intr(adapter);
1274         callout_stop(&adapter->timer);
1275
1276         /* Get the latest mac address, User can use a LAA */
1277         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1278               ETHER_ADDR_LEN);
1279
1280         /* Put the address into the Receive Address Array */
1281         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1282
1283         igb_reset(adapter);
1284         igb_update_link_status(adapter);
1285
1286         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1287
1288         /* Set hardware offload abilities */
1289         ifp->if_hwassist = 0;
1290         if (ifp->if_capenable & IFCAP_TXCSUM) {
1291                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1292 #if __FreeBSD_version >= 800000
1293                 if (adapter->hw.mac.type == e1000_82576)
1294                         ifp->if_hwassist |= CSUM_SCTP;
1295 #endif
1296         }
1297
1298         if (ifp->if_capenable & IFCAP_TSO4)
1299                 ifp->if_hwassist |= CSUM_TSO;
1300
1301         /* Configure for OS presence */
1302         igb_init_manageability(adapter);
1303
1304         /* Prepare transmit descriptors and buffers */
1305         igb_setup_transmit_structures(adapter);
1306         igb_initialize_transmit_units(adapter);
1307
1308         /* Setup Multicast table */
1309         igb_set_multi(adapter);
1310
1311         /*
1312         ** Figure out the desired mbuf pool
1313         ** for doing jumbo/packetsplit
1314         */
1315         if (adapter->max_frame_size <= 2048)
1316                 adapter->rx_mbuf_sz = MCLBYTES;
1317         else if (adapter->max_frame_size <= 4096)
1318                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1319         else
1320                 adapter->rx_mbuf_sz = MJUM9BYTES;
1321
1322         /* Prepare receive descriptors and buffers */
1323         if (igb_setup_receive_structures(adapter)) {
1324                 device_printf(dev, "Could not setup receive structures\n");
1325                 return;
1326         }
1327         igb_initialize_receive_units(adapter);
1328
1329         /* Enable VLAN support */
1330         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1331                 igb_setup_vlan_hw_support(adapter);
1332                                 
1333         /* Don't lose promiscuous settings */
1334         igb_set_promisc(adapter);
1335
1336         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1337         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1338
1339         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1340         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1341
1342         if (adapter->msix > 1) /* Set up queue routing */
1343                 igb_configure_queues(adapter);
1344
1345         /* this clears any pending interrupts */
1346         E1000_READ_REG(&adapter->hw, E1000_ICR);
1347 #ifdef DEVICE_POLLING
1348         /*
1349          * Only enable interrupts if we are not polling, make sure
1350          * they are off otherwise.
1351          */
1352         if (ifp->if_capenable & IFCAP_POLLING)
1353                 igb_disable_intr(adapter);
1354         else
1355 #endif /* DEVICE_POLLING */
1356         {
1357                 igb_enable_intr(adapter);
1358                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1359         }
1360
1361         /* Set Energy Efficient Ethernet */
1362         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1363                 e1000_set_eee_i350(&adapter->hw);
1364 }
1365
1366 static void
1367 igb_init(void *arg)
1368 {
1369         struct adapter *adapter = arg;
1370
1371         IGB_CORE_LOCK(adapter);
1372         igb_init_locked(adapter);
1373         IGB_CORE_UNLOCK(adapter);
1374 }
1375
1376
1377 static void
1378 igb_handle_que(void *context, int pending)
1379 {
1380         struct igb_queue *que = context;
1381         struct adapter *adapter = que->adapter;
1382         struct tx_ring *txr = que->txr;
1383         struct ifnet    *ifp = adapter->ifp;
1384
1385         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1386                 bool    more;
1387
1388                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1389
1390                 IGB_TX_LOCK(txr);
1391                 igb_txeof(txr);
1392 #ifndef IGB_LEGACY_TX
1393                 /* Process the stack queue only if not depleted */
1394                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1395                     !drbr_empty(ifp, txr->br))
1396                         igb_mq_start_locked(ifp, txr);
1397 #else
1398                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1399                         igb_start_locked(txr, ifp);
1400 #endif
1401                 IGB_TX_UNLOCK(txr);
1402                 /* Do we need another? */
1403                 if (more) {
1404                         taskqueue_enqueue(que->tq, &que->que_task);
1405                         return;
1406                 }
1407         }
1408
1409 #ifdef DEVICE_POLLING
1410         if (ifp->if_capenable & IFCAP_POLLING)
1411                 return;
1412 #endif
1413         /* Reenable this interrupt */
1414         if (que->eims)
1415                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1416         else
1417                 igb_enable_intr(adapter);
1418 }
1419
1420 /* Deal with link in a sleepable context */
1421 static void
1422 igb_handle_link(void *context, int pending)
1423 {
1424         struct adapter *adapter = context;
1425
1426         IGB_CORE_LOCK(adapter);
1427         igb_handle_link_locked(adapter);
1428         IGB_CORE_UNLOCK(adapter);
1429 }
1430
1431 static void
1432 igb_handle_link_locked(struct adapter *adapter)
1433 {
1434         struct tx_ring  *txr = adapter->tx_rings;
1435         struct ifnet *ifp = adapter->ifp;
1436
1437         IGB_CORE_LOCK_ASSERT(adapter);
1438         adapter->hw.mac.get_link_status = 1;
1439         igb_update_link_status(adapter);
1440         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1441                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1442                         IGB_TX_LOCK(txr);
1443 #ifndef IGB_LEGACY_TX
1444                         /* Process the stack queue only if not depleted */
1445                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1446                             !drbr_empty(ifp, txr->br))
1447                                 igb_mq_start_locked(ifp, txr);
1448 #else
1449                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1450                                 igb_start_locked(txr, ifp);
1451 #endif
1452                         IGB_TX_UNLOCK(txr);
1453                 }
1454         }
1455 }
1456
1457 /*********************************************************************
1458  *
1459  *  MSI/Legacy Deferred
1460  *  Interrupt Service routine  
1461  *
1462  *********************************************************************/
1463 static int
1464 igb_irq_fast(void *arg)
1465 {
1466         struct adapter          *adapter = arg;
1467         struct igb_queue        *que = adapter->queues;
1468         u32                     reg_icr;
1469
1470
1471         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1472
1473         /* Hot eject?  */
1474         if (reg_icr == 0xffffffff)
1475                 return FILTER_STRAY;
1476
1477         /* Definitely not our interrupt.  */
1478         if (reg_icr == 0x0)
1479                 return FILTER_STRAY;
1480
1481         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1482                 return FILTER_STRAY;
1483
1484         /*
1485          * Mask interrupts until the taskqueue is finished running.  This is
1486          * cheap, just assume that it is needed.  This also works around the
1487          * MSI message reordering errata on certain systems.
1488          */
1489         igb_disable_intr(adapter);
1490         taskqueue_enqueue(que->tq, &que->que_task);
1491
1492         /* Link status change */
1493         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1494                 taskqueue_enqueue(que->tq, &adapter->link_task);
1495
1496         if (reg_icr & E1000_ICR_RXO)
1497                 adapter->rx_overruns++;
1498         return FILTER_HANDLED;
1499 }
1500
1501 #ifdef DEVICE_POLLING
1502 #if __FreeBSD_version >= 800000
1503 #define POLL_RETURN_COUNT(a) (a)
1504 static int
1505 #else
1506 #define POLL_RETURN_COUNT(a)
1507 static void
1508 #endif
1509 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1510 {
1511         struct adapter          *adapter = ifp->if_softc;
1512         struct igb_queue        *que;
1513         struct tx_ring          *txr;
1514         u32                     reg_icr, rx_done = 0;
1515         u32                     loop = IGB_MAX_LOOP;
1516         bool                    more;
1517
1518         IGB_CORE_LOCK(adapter);
1519         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1520                 IGB_CORE_UNLOCK(adapter);
1521                 return POLL_RETURN_COUNT(rx_done);
1522         }
1523
1524         if (cmd == POLL_AND_CHECK_STATUS) {
1525                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1526                 /* Link status change */
1527                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1528                         igb_handle_link_locked(adapter);
1529
1530                 if (reg_icr & E1000_ICR_RXO)
1531                         adapter->rx_overruns++;
1532         }
1533         IGB_CORE_UNLOCK(adapter);
1534
1535         for (int i = 0; i < adapter->num_queues; i++) {
1536                 que = &adapter->queues[i];
1537                 txr = que->txr;
1538
1539                 igb_rxeof(que, count, &rx_done);
1540
1541                 IGB_TX_LOCK(txr);
1542                 do {
1543                         more = igb_txeof(txr);
1544                 } while (loop-- && more);
1545 #ifndef IGB_LEGACY_TX
1546                 if (!drbr_empty(ifp, txr->br))
1547                         igb_mq_start_locked(ifp, txr);
1548 #else
1549                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1550                         igb_start_locked(txr, ifp);
1551 #endif
1552                 IGB_TX_UNLOCK(txr);
1553         }
1554
1555         return POLL_RETURN_COUNT(rx_done);
1556 }
1557 #endif /* DEVICE_POLLING */
1558
1559 /*********************************************************************
1560  *
1561  *  MSIX Que Interrupt Service routine
1562  *
1563  **********************************************************************/
1564 static void
1565 igb_msix_que(void *arg)
1566 {
1567         struct igb_queue *que = arg;
1568         struct adapter *adapter = que->adapter;
1569         struct ifnet   *ifp = adapter->ifp;
1570         struct tx_ring *txr = que->txr;
1571         struct rx_ring *rxr = que->rxr;
1572         u32             newitr = 0;
1573         bool            more_rx;
1574
1575         /* Ignore spurious interrupts */
1576         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1577                 return;
1578
1579         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1580         ++que->irqs;
1581
1582         IGB_TX_LOCK(txr);
1583         igb_txeof(txr);
1584 #ifndef IGB_LEGACY_TX
1585         /* Process the stack queue only if not depleted */
1586         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1587             !drbr_empty(ifp, txr->br))
1588                 igb_mq_start_locked(ifp, txr);
1589 #else
1590         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1591                 igb_start_locked(txr, ifp);
1592 #endif
1593         IGB_TX_UNLOCK(txr);
1594
1595         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1596
1597         if (adapter->enable_aim == FALSE)
1598                 goto no_calc;
1599         /*
1600         ** Do Adaptive Interrupt Moderation:
1601         **  - Write out last calculated setting
1602         **  - Calculate based on average size over
1603         **    the last interval.
1604         */
1605         if (que->eitr_setting)
1606                 E1000_WRITE_REG(&adapter->hw,
1607                     E1000_EITR(que->msix), que->eitr_setting);
1608  
1609         que->eitr_setting = 0;
1610
1611         /* Idle, do nothing */
1612         if ((txr->bytes == 0) && (rxr->bytes == 0))
1613                 goto no_calc;
1614                                 
1615         /* Used half Default if sub-gig */
1616         if (adapter->link_speed != 1000)
1617                 newitr = IGB_DEFAULT_ITR / 2;
1618         else {
1619                 if ((txr->bytes) && (txr->packets))
1620                         newitr = txr->bytes/txr->packets;
1621                 if ((rxr->bytes) && (rxr->packets))
1622                         newitr = max(newitr,
1623                             (rxr->bytes / rxr->packets));
1624                 newitr += 24; /* account for hardware frame, crc */
1625                 /* set an upper boundary */
1626                 newitr = min(newitr, 3000);
1627                 /* Be nice to the mid range */
1628                 if ((newitr > 300) && (newitr < 1200))
1629                         newitr = (newitr / 3);
1630                 else
1631                         newitr = (newitr / 2);
1632         }
1633         newitr &= 0x7FFC;  /* Mask invalid bits */
1634         if (adapter->hw.mac.type == e1000_82575)
1635                 newitr |= newitr << 16;
1636         else
1637                 newitr |= E1000_EITR_CNT_IGNR;
1638                  
1639         /* save for next interrupt */
1640         que->eitr_setting = newitr;
1641
1642         /* Reset state */
1643         txr->bytes = 0;
1644         txr->packets = 0;
1645         rxr->bytes = 0;
1646         rxr->packets = 0;
1647
1648 no_calc:
1649         /* Schedule a clean task if needed*/
1650         if (more_rx)
1651                 taskqueue_enqueue(que->tq, &que->que_task);
1652         else
1653                 /* Reenable this interrupt */
1654                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1655         return;
1656 }
1657
1658
1659 /*********************************************************************
1660  *
1661  *  MSIX Link Interrupt Service routine
1662  *
1663  **********************************************************************/
1664
1665 static void
1666 igb_msix_link(void *arg)
1667 {
1668         struct adapter  *adapter = arg;
1669         u32             icr;
1670
1671         ++adapter->link_irq;
1672         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1673         if (!(icr & E1000_ICR_LSC))
1674                 goto spurious;
1675         igb_handle_link(adapter, 0);
1676
1677 spurious:
1678         /* Rearm */
1679         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1680         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1681         return;
1682 }
1683
1684
1685 /*********************************************************************
1686  *
1687  *  Media Ioctl callback
1688  *
1689  *  This routine is called whenever the user queries the status of
1690  *  the interface using ifconfig.
1691  *
1692  **********************************************************************/
1693 static void
1694 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1695 {
1696         struct adapter *adapter = ifp->if_softc;
1697
1698         INIT_DEBUGOUT("igb_media_status: begin");
1699
1700         IGB_CORE_LOCK(adapter);
1701         igb_update_link_status(adapter);
1702
1703         ifmr->ifm_status = IFM_AVALID;
1704         ifmr->ifm_active = IFM_ETHER;
1705
1706         if (!adapter->link_active) {
1707                 IGB_CORE_UNLOCK(adapter);
1708                 return;
1709         }
1710
1711         ifmr->ifm_status |= IFM_ACTIVE;
1712
1713         switch (adapter->link_speed) {
1714         case 10:
1715                 ifmr->ifm_active |= IFM_10_T;
1716                 break;
1717         case 100:
1718                 /*
1719                 ** Support for 100Mb SFP - these are Fiber 
1720                 ** but the media type appears as serdes
1721                 */
1722                 if (adapter->hw.phy.media_type ==
1723                     e1000_media_type_internal_serdes)
1724                         ifmr->ifm_active |= IFM_100_FX;
1725                 else
1726                         ifmr->ifm_active |= IFM_100_TX;
1727                 break;
1728         case 1000:
1729                 ifmr->ifm_active |= IFM_1000_T;
1730                 break;
1731         }
1732
1733         if (adapter->link_duplex == FULL_DUPLEX)
1734                 ifmr->ifm_active |= IFM_FDX;
1735         else
1736                 ifmr->ifm_active |= IFM_HDX;
1737
1738         IGB_CORE_UNLOCK(adapter);
1739 }
1740
1741 /*********************************************************************
1742  *
1743  *  Media Ioctl callback
1744  *
1745  *  This routine is called when the user changes speed/duplex using
1746  *  media/mediopt option with ifconfig.
1747  *
1748  **********************************************************************/
1749 static int
1750 igb_media_change(struct ifnet *ifp)
1751 {
1752         struct adapter *adapter = ifp->if_softc;
1753         struct ifmedia  *ifm = &adapter->media;
1754
1755         INIT_DEBUGOUT("igb_media_change: begin");
1756
1757         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1758                 return (EINVAL);
1759
1760         IGB_CORE_LOCK(adapter);
1761         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1762         case IFM_AUTO:
1763                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1764                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1765                 break;
1766         case IFM_1000_LX:
1767         case IFM_1000_SX:
1768         case IFM_1000_T:
1769                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1770                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1771                 break;
1772         case IFM_100_TX:
1773                 adapter->hw.mac.autoneg = FALSE;
1774                 adapter->hw.phy.autoneg_advertised = 0;
1775                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1776                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1777                 else
1778                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1779                 break;
1780         case IFM_10_T:
1781                 adapter->hw.mac.autoneg = FALSE;
1782                 adapter->hw.phy.autoneg_advertised = 0;
1783                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1784                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1785                 else
1786                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1787                 break;
1788         default:
1789                 device_printf(adapter->dev, "Unsupported media type\n");
1790         }
1791
1792         igb_init_locked(adapter);
1793         IGB_CORE_UNLOCK(adapter);
1794
1795         return (0);
1796 }
1797
1798
1799 /*********************************************************************
1800  *
1801  *  This routine maps the mbufs to Advanced TX descriptors.
1802  *  
1803  **********************************************************************/
1804 static int
1805 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1806 {
1807         struct adapter          *adapter = txr->adapter;
1808         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1809         bus_dmamap_t            map;
1810         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1811         union e1000_adv_tx_desc *txd = NULL;
1812         struct mbuf             *m_head = *m_headp;
1813         struct ether_vlan_header *eh = NULL;
1814         struct ip               *ip = NULL;
1815         struct tcphdr           *th = NULL;
1816         u32                     hdrlen, cmd_type_len, olinfo_status = 0;
1817         int                     ehdrlen, poff;
1818         int                     nsegs, i, first, last = 0;
1819         int                     error, do_tso, remap = 1;
1820
1821         /* Set basic descriptor constants */
1822         cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1823         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1824         if (m_head->m_flags & M_VLANTAG)
1825                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1826
1827 retry:
1828         m_head = *m_headp;
1829         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830         hdrlen = ehdrlen = poff = 0;
1831
1832         /*
1833          * Intel recommends entire IP/TCP header length reside in a single
1834          * buffer. If multiple descriptors are used to describe the IP and
1835          * TCP header, each descriptor should describe one or more
1836          * complete headers; descriptors referencing only parts of headers
1837          * are not supported. If all layer headers are not coalesced into
1838          * a single buffer, each buffer should not cross a 4KB boundary,
1839          * or be larger than the maximum read request size.
1840          * Controller also requires modifing IP/TCP header to make TSO work
1841          * so we firstly get a writable mbuf chain then coalesce ethernet/
1842          * IP/TCP header into a single buffer to meet the requirement of
1843          * controller. This also simplifies IP/TCP/UDP checksum offloading
1844          * which also has similiar restrictions.
1845          */
1846         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847                 if (do_tso || (m_head->m_next != NULL && 
1848                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849                         if (M_WRITABLE(*m_headp) == 0) {
1850                                 m_head = m_dup(*m_headp, M_NOWAIT);
1851                                 m_freem(*m_headp);
1852                                 if (m_head == NULL) {
1853                                         *m_headp = NULL;
1854                                         return (ENOBUFS);
1855                                 }
1856                                 *m_headp = m_head;
1857                         }
1858                 }
1859                 /*
1860                  * Assume IPv4, we don't have TSO/checksum offload support
1861                  * for IPv6 yet.
1862                  */
1863                 ehdrlen = sizeof(struct ether_header);
1864                 m_head = m_pullup(m_head, ehdrlen);
1865                 if (m_head == NULL) {
1866                         *m_headp = NULL;
1867                         return (ENOBUFS);
1868                 }
1869                 eh = mtod(m_head, struct ether_vlan_header *);
1870                 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1871                         ehdrlen = sizeof(struct ether_vlan_header);
1872                         m_head = m_pullup(m_head, ehdrlen);
1873                         if (m_head == NULL) {
1874                                 *m_headp = NULL;
1875                                 return (ENOBUFS);
1876                         }
1877                 }
1878                 m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1879                 if (m_head == NULL) {
1880                         *m_headp = NULL;
1881                         return (ENOBUFS);
1882                 }
1883                 ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1884                 poff = ehdrlen + (ip->ip_hl << 2);
1885                 if (do_tso) {
1886                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1887                         if (m_head == NULL) {
1888                                 *m_headp = NULL;
1889                                 return (ENOBUFS);
1890                         }
1891                         /*
1892                          * The pseudo TCP checksum does not include TCP payload
1893                          * length so driver should recompute the checksum here
1894                          * what hardware expect to see. This is adherence of
1895                          * Microsoft's Large Send specification.
1896                          */
1897                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1898                         th->th_sum = in_pseudo(ip->ip_src.s_addr,
1899                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1900                         /* Keep track of the full header length */
1901                         hdrlen = poff + (th->th_off << 2);
1902                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1903                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1904                         if (m_head == NULL) {
1905                                 *m_headp = NULL;
1906                                 return (ENOBUFS);
1907                         }
1908                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1909                         m_head = m_pullup(m_head, poff + (th->th_off << 2));
1910                         if (m_head == NULL) {
1911                                 *m_headp = NULL;
1912                                 return (ENOBUFS);
1913                         }
1914                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1915                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1916                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1917                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1918                         if (m_head == NULL) {
1919                                 *m_headp = NULL;
1920                                 return (ENOBUFS);
1921                         }
1922                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1923                 }
1924                 *m_headp = m_head;
1925         }
1926
1927         /*
1928          * Map the packet for DMA
1929          *
1930          * Capture the first descriptor index,
1931          * this descriptor will have the index
1932          * of the EOP which is the only one that
1933          * now gets a DONE bit writeback.
1934          */
1935         first = txr->next_avail_desc;
1936         tx_buffer = &txr->tx_buffers[first];
1937         tx_buffer_mapped = tx_buffer;
1938         map = tx_buffer->map;
1939
1940         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1941             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1942
1943         /*
1944          * There are two types of errors we can (try) to handle:
1945          * - EFBIG means the mbuf chain was too long and bus_dma ran
1946          *   out of segments.  Defragment the mbuf chain and try again.
1947          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1948          *   at this point in time.  Defer sending and try again later.
1949          * All other errors, in particular EINVAL, are fatal and prevent the
1950          * mbuf chain from ever going through.  Drop it and report error.
1951          */
1952         if (error == EFBIG && remap) {
1953                 struct mbuf *m;
1954
1955                 m = m_defrag(*m_headp, M_NOWAIT);
1956                 if (m == NULL) {
1957                         adapter->mbuf_defrag_failed++;
1958                         m_freem(*m_headp);
1959                         *m_headp = NULL;
1960                         return (ENOBUFS);
1961                 }
1962                 *m_headp = m;
1963
1964                 /* Try it again, but only once */
1965                 remap = 0;
1966                 goto retry;
1967         } else if (error == ENOMEM) {
1968                 adapter->no_tx_dma_setup++;
1969                 return (error);
1970         } else if (error != 0) {
1971                 adapter->no_tx_dma_setup++;
1972                 m_freem(*m_headp);
1973                 *m_headp = NULL;
1974                 return (error);
1975         }
1976
1977         /*
1978         ** Make sure we don't overrun the ring,
1979         ** we need nsegs descriptors and one for
1980         ** the context descriptor used for the
1981         ** offloads.
1982         */
1983         if ((nsegs + 1) > (txr->tx_avail - 2)) {
1984                 txr->no_desc_avail++;
1985                 bus_dmamap_unload(txr->txtag, map);
1986                 return (ENOBUFS);
1987         }
1988         m_head = *m_headp;
1989
1990         /* Do hardware assists:
1991          * Set up the context descriptor, used
1992          * when any hardware offload is done.
1993          * This includes CSUM, VLAN, and TSO.
1994          * It will use the first descriptor.
1995          */
1996
1997         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1998                 if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1999                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2000                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2001                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2002                 } else
2003                         return (ENXIO);
2004         } else if (igb_tx_ctx_setup(txr, m_head))
2005                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2006
2007         /* Calculate payload length */
2008         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2009             << E1000_ADVTXD_PAYLEN_SHIFT);
2010
2011         /* 82575 needs the queue index added */
2012         if (adapter->hw.mac.type == e1000_82575)
2013                 olinfo_status |= txr->me << 4;
2014
2015         /* Set up our transmit descriptors */
2016         i = txr->next_avail_desc;
2017         for (int j = 0; j < nsegs; j++) {
2018                 bus_size_t seg_len;
2019                 bus_addr_t seg_addr;
2020
2021                 tx_buffer = &txr->tx_buffers[i];
2022                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2023                 seg_addr = segs[j].ds_addr;
2024                 seg_len  = segs[j].ds_len;
2025
2026                 txd->read.buffer_addr = htole64(seg_addr);
2027                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2028                 txd->read.olinfo_status = htole32(olinfo_status);
2029                 last = i;
2030                 if (++i == adapter->num_tx_desc)
2031                         i = 0;
2032                 tx_buffer->m_head = NULL;
2033                 tx_buffer->next_eop = -1;
2034         }
2035
2036         txr->next_avail_desc = i;
2037         txr->tx_avail -= nsegs;
2038         tx_buffer->m_head = m_head;
2039
2040         /*
2041         ** Here we swap the map so the last descriptor,
2042         ** which gets the completion interrupt has the
2043         ** real map, and the first descriptor gets the
2044         ** unused map from this descriptor.
2045         */
2046         tx_buffer_mapped->map = tx_buffer->map;
2047         tx_buffer->map = map;
2048         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2049
2050         /*
2051          * Last Descriptor of Packet
2052          * needs End Of Packet (EOP)
2053          * and Report Status (RS)
2054          */
2055         txd->read.cmd_type_len |=
2056             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2057         /*
2058          * Keep track in the first buffer which
2059          * descriptor will be written back
2060          */
2061         tx_buffer = &txr->tx_buffers[first];
2062         tx_buffer->next_eop = last;
2063         /* Update the watchdog time early and often */
2064         txr->watchdog_time = ticks;
2065
2066         /*
2067          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2068          * that this frame is available to transmit.
2069          */
2070         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2071             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2072         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2073         ++txr->tx_packets;
2074
2075         return (0);
2076 }
2077 static void
2078 igb_set_promisc(struct adapter *adapter)
2079 {
2080         struct ifnet    *ifp = adapter->ifp;
2081         struct e1000_hw *hw = &adapter->hw;
2082         u32             reg;
2083
2084         if (adapter->vf_ifp) {
2085                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2086                 return;
2087         }
2088
2089         reg = E1000_READ_REG(hw, E1000_RCTL);
2090         if (ifp->if_flags & IFF_PROMISC) {
2091                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2092                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2093         } else if (ifp->if_flags & IFF_ALLMULTI) {
2094                 reg |= E1000_RCTL_MPE;
2095                 reg &= ~E1000_RCTL_UPE;
2096                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2097         }
2098 }
2099
2100 static void
2101 igb_disable_promisc(struct adapter *adapter)
2102 {
2103         struct e1000_hw *hw = &adapter->hw;
2104         struct ifnet    *ifp = adapter->ifp;
2105         u32             reg;
2106         int             mcnt = 0;
2107
2108         if (adapter->vf_ifp) {
2109                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2110                 return;
2111         }
2112         reg = E1000_READ_REG(hw, E1000_RCTL);
2113         reg &=  (~E1000_RCTL_UPE);
2114         if (ifp->if_flags & IFF_ALLMULTI)
2115                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2116         else {
2117                 struct  ifmultiaddr *ifma;
2118 #if __FreeBSD_version < 800000
2119                 IF_ADDR_LOCK(ifp);
2120 #else   
2121                 if_maddr_rlock(ifp);
2122 #endif
2123                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2124                         if (ifma->ifma_addr->sa_family != AF_LINK)
2125                                 continue;
2126                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2127                                 break;
2128                         mcnt++;
2129                 }
2130 #if __FreeBSD_version < 800000
2131                 IF_ADDR_UNLOCK(ifp);
2132 #else
2133                 if_maddr_runlock(ifp);
2134 #endif
2135         }
2136         /* Don't disable if in MAX groups */
2137         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2138                 reg &=  (~E1000_RCTL_MPE);
2139         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2140 }
2141
2142
2143 /*********************************************************************
2144  *  Multicast Update
2145  *
2146  *  This routine is called whenever multicast address list is updated.
2147  *
2148  **********************************************************************/
2149
2150 static void
2151 igb_set_multi(struct adapter *adapter)
2152 {
2153         struct ifnet    *ifp = adapter->ifp;
2154         struct ifmultiaddr *ifma;
2155         u32 reg_rctl = 0;
2156         u8  *mta;
2157
2158         int mcnt = 0;
2159
2160         IOCTL_DEBUGOUT("igb_set_multi: begin");
2161
2162         mta = adapter->mta;
2163         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2164             MAX_NUM_MULTICAST_ADDRESSES);
2165
2166 #if __FreeBSD_version < 800000
2167         IF_ADDR_LOCK(ifp);
2168 #else
2169         if_maddr_rlock(ifp);
2170 #endif
2171         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2172                 if (ifma->ifma_addr->sa_family != AF_LINK)
2173                         continue;
2174
2175                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2176                         break;
2177
2178                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2179                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2180                 mcnt++;
2181         }
2182 #if __FreeBSD_version < 800000
2183         IF_ADDR_UNLOCK(ifp);
2184 #else
2185         if_maddr_runlock(ifp);
2186 #endif
2187
2188         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2189                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2190                 reg_rctl |= E1000_RCTL_MPE;
2191                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2192         } else
2193                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2194 }
2195
2196
2197 /*********************************************************************
2198  *  Timer routine:
2199  *      This routine checks for link status,
2200  *      updates statistics, and does the watchdog.
2201  *
2202  **********************************************************************/
2203
2204 static void
2205 igb_local_timer(void *arg)
2206 {
2207         struct adapter          *adapter = arg;
2208         device_t                dev = adapter->dev;
2209         struct ifnet            *ifp = adapter->ifp;
2210         struct tx_ring          *txr = adapter->tx_rings;
2211         struct igb_queue        *que = adapter->queues;
2212         int                     hung = 0, busy = 0;
2213
2214
2215         IGB_CORE_LOCK_ASSERT(adapter);
2216
2217         igb_update_link_status(adapter);
2218         igb_update_stats_counters(adapter);
2219
2220         /*
2221         ** Check the TX queues status
2222         **      - central locked handling of OACTIVE
2223         **      - watchdog only if all queues show hung
2224         */
2225         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2226                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2227                     (adapter->pause_frames == 0))
2228                         ++hung;
2229                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2230                         ++busy;
2231                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2232                         taskqueue_enqueue(que->tq, &que->que_task);
2233         }
2234         if (hung == adapter->num_queues)
2235                 goto timeout;
2236         if (busy == adapter->num_queues)
2237                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2238         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2239             (busy < adapter->num_queues))
2240                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2241
2242         adapter->pause_frames = 0;
2243         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2244 #ifndef DEVICE_POLLING
2245         /* Schedule all queue interrupts - deadlock protection */
2246         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2247 #endif
2248         return;
2249
2250 timeout:
2251         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2252         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2253             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2254             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2255         device_printf(dev,"TX(%d) desc avail = %d,"
2256             "Next TX to Clean = %d\n",
2257             txr->me, txr->tx_avail, txr->next_to_clean);
2258         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2259         adapter->watchdog_events++;
2260         igb_init_locked(adapter);
2261 }
2262
2263 static void
2264 igb_update_link_status(struct adapter *adapter)
2265 {
2266         struct e1000_hw         *hw = &adapter->hw;
2267         struct e1000_fc_info    *fc = &hw->fc;
2268         struct ifnet            *ifp = adapter->ifp;
2269         device_t                dev = adapter->dev;
2270         struct tx_ring          *txr = adapter->tx_rings;
2271         u32                     link_check, thstat, ctrl;
2272         char                    *flowctl = NULL;
2273
2274         link_check = thstat = ctrl = 0;
2275
2276         /* Get the cached link value or read for real */
2277         switch (hw->phy.media_type) {
2278         case e1000_media_type_copper:
2279                 if (hw->mac.get_link_status) {
2280                         /* Do the work to read phy */
2281                         e1000_check_for_link(hw);
2282                         link_check = !hw->mac.get_link_status;
2283                 } else
2284                         link_check = TRUE;
2285                 break;
2286         case e1000_media_type_fiber:
2287                 e1000_check_for_link(hw);
2288                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2289                                  E1000_STATUS_LU);
2290                 break;
2291         case e1000_media_type_internal_serdes:
2292                 e1000_check_for_link(hw);
2293                 link_check = adapter->hw.mac.serdes_has_link;
2294                 break;
2295         /* VF device is type_unknown */
2296         case e1000_media_type_unknown:
2297                 e1000_check_for_link(hw);
2298                 link_check = !hw->mac.get_link_status;
2299                 /* Fall thru */
2300         default:
2301                 break;
2302         }
2303
2304         /* Check for thermal downshift or shutdown */
2305         if (hw->mac.type == e1000_i350) {
2306                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2307                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2308         }
2309
2310         /* Get the flow control for display */
2311         switch (fc->current_mode) {
2312         case e1000_fc_rx_pause:
2313                 flowctl = "RX";
2314                 break;  
2315         case e1000_fc_tx_pause:
2316                 flowctl = "TX";
2317                 break;  
2318         case e1000_fc_full:
2319                 flowctl = "Full";
2320                 break;  
2321         case e1000_fc_none:
2322         default:
2323                 flowctl = "None";
2324                 break;  
2325         }
2326
2327         /* Now we check if a transition has happened */
2328         if (link_check && (adapter->link_active == 0)) {
2329                 e1000_get_speed_and_duplex(&adapter->hw, 
2330                     &adapter->link_speed, &adapter->link_duplex);
2331                 if (bootverbose)
2332                         device_printf(dev, "Link is up %d Mbps %s,"
2333                             " Flow Control: %s\n",
2334                             adapter->link_speed,
2335                             ((adapter->link_duplex == FULL_DUPLEX) ?
2336                             "Full Duplex" : "Half Duplex"), flowctl);
2337                 adapter->link_active = 1;
2338                 ifp->if_baudrate = adapter->link_speed * 1000000;
2339                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2340                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2341                         device_printf(dev, "Link: thermal downshift\n");
2342                 /* This can sleep */
2343                 if_link_state_change(ifp, LINK_STATE_UP);
2344         } else if (!link_check && (adapter->link_active == 1)) {
2345                 ifp->if_baudrate = adapter->link_speed = 0;
2346                 adapter->link_duplex = 0;
2347                 if (bootverbose)
2348                         device_printf(dev, "Link is Down\n");
2349                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2350                     (thstat & E1000_THSTAT_PWR_DOWN))
2351                         device_printf(dev, "Link: thermal shutdown\n");
2352                 adapter->link_active = 0;
2353                 /* This can sleep */
2354                 if_link_state_change(ifp, LINK_STATE_DOWN);
2355                 /* Reset queue state */
2356                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2357                         txr->queue_status = IGB_QUEUE_IDLE;
2358         }
2359 }
2360
2361 /*********************************************************************
2362  *
2363  *  This routine disables all traffic on the adapter by issuing a
2364  *  global reset on the MAC and deallocates TX/RX buffers.
2365  *
2366  **********************************************************************/
2367
2368 static void
2369 igb_stop(void *arg)
2370 {
2371         struct adapter  *adapter = arg;
2372         struct ifnet    *ifp = adapter->ifp;
2373         struct tx_ring *txr = adapter->tx_rings;
2374
2375         IGB_CORE_LOCK_ASSERT(adapter);
2376
2377         INIT_DEBUGOUT("igb_stop: begin");
2378
2379         igb_disable_intr(adapter);
2380
2381         callout_stop(&adapter->timer);
2382
2383         /* Tell the stack that the interface is no longer active */
2384         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2385         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2386
2387         /* Disarm watchdog timer. */
2388         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2389                 IGB_TX_LOCK(txr);
2390                 txr->queue_status = IGB_QUEUE_IDLE;
2391                 IGB_TX_UNLOCK(txr);
2392         }
2393
2394         e1000_reset_hw(&adapter->hw);
2395         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2396
2397         e1000_led_off(&adapter->hw);
2398         e1000_cleanup_led(&adapter->hw);
2399 }
2400
2401
2402 /*********************************************************************
2403  *
2404  *  Determine hardware revision.
2405  *
2406  **********************************************************************/
2407 static void
2408 igb_identify_hardware(struct adapter *adapter)
2409 {
2410         device_t dev = adapter->dev;
2411
2412         /* Make sure our PCI config space has the necessary stuff set */
2413         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2414         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2415             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2416                 INIT_DEBUGOUT("Memory Access and/or Bus Master "
2417                     "bits were not set!\n");
2418                 adapter->hw.bus.pci_cmd_word |=
2419                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2420                 pci_write_config(dev, PCIR_COMMAND,
2421                     adapter->hw.bus.pci_cmd_word, 2);
2422         }
2423
2424         /* Save off the information about this board */
2425         adapter->hw.vendor_id = pci_get_vendor(dev);
2426         adapter->hw.device_id = pci_get_device(dev);
2427         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2428         adapter->hw.subsystem_vendor_id =
2429             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2430         adapter->hw.subsystem_device_id =
2431             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2432
2433         /* Set MAC type early for PCI setup */
2434         e1000_set_mac_type(&adapter->hw);
2435
2436         /* Are we a VF device? */
2437         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2438             (adapter->hw.mac.type == e1000_vfadapt_i350))
2439                 adapter->vf_ifp = 1;
2440         else
2441                 adapter->vf_ifp = 0;
2442 }
2443
2444 static int
2445 igb_allocate_pci_resources(struct adapter *adapter)
2446 {
2447         device_t        dev = adapter->dev;
2448         int             rid;
2449
2450         rid = PCIR_BAR(0);
2451         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2452             &rid, RF_ACTIVE);
2453         if (adapter->pci_mem == NULL) {
2454                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2455                 return (ENXIO);
2456         }
2457         adapter->osdep.mem_bus_space_tag =
2458             rman_get_bustag(adapter->pci_mem);
2459         adapter->osdep.mem_bus_space_handle =
2460             rman_get_bushandle(adapter->pci_mem);
2461         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2462
2463         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2464
2465         /* This will setup either MSI/X or MSI */
2466         adapter->msix = igb_setup_msix(adapter);
2467         adapter->hw.back = &adapter->osdep;
2468
2469         return (0);
2470 }
2471
2472 /*********************************************************************
2473  *
2474  *  Setup the Legacy or MSI Interrupt handler
2475  *
2476  **********************************************************************/
2477 static int
2478 igb_allocate_legacy(struct adapter *adapter)
2479 {
2480         device_t                dev = adapter->dev;
2481         struct igb_queue        *que = adapter->queues;
2482         int                     error, rid = 0;
2483
2484         /* Turn off all interrupts */
2485         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2486
2487         /* MSI RID is 1 */
2488         if (adapter->msix == 1)
2489                 rid = 1;
2490
2491         /* We allocate a single interrupt resource */
2492         adapter->res = bus_alloc_resource_any(dev,
2493             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2494         if (adapter->res == NULL) {
2495                 device_printf(dev, "Unable to allocate bus resource: "
2496                     "interrupt\n");
2497                 return (ENXIO);
2498         }
2499
2500 #ifndef IGB_LEGACY_TX
2501         TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr);
2502 #endif
2503
2504         /*
2505          * Try allocating a fast interrupt and the associated deferred
2506          * processing contexts.
2507          */
2508         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2509         /* Make tasklet for deferred link handling */
2510         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2511         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2512             taskqueue_thread_enqueue, &que->tq);
2513         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2514             device_get_nameunit(adapter->dev));
2515         if ((error = bus_setup_intr(dev, adapter->res,
2516             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2517             adapter, &adapter->tag)) != 0) {
2518                 device_printf(dev, "Failed to register fast interrupt "
2519                             "handler: %d\n", error);
2520                 taskqueue_free(que->tq);
2521                 que->tq = NULL;
2522                 return (error);
2523         }
2524
2525         return (0);
2526 }
2527
2528
2529 /*********************************************************************
2530  *
2531  *  Setup the MSIX Queue Interrupt handlers: 
2532  *
2533  **********************************************************************/
2534 static int
2535 igb_allocate_msix(struct adapter *adapter)
2536 {
2537         device_t                dev = adapter->dev;
2538         struct igb_queue        *que = adapter->queues;
2539         int                     error, rid, vector = 0;
2540
2541         /* Be sure to start with all interrupts disabled */
2542         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2543         E1000_WRITE_FLUSH(&adapter->hw);
2544
2545         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2546                 rid = vector +1;
2547                 que->res = bus_alloc_resource_any(dev,
2548                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2549                 if (que->res == NULL) {
2550                         device_printf(dev,
2551                             "Unable to allocate bus resource: "
2552                             "MSIX Queue Interrupt\n");
2553                         return (ENXIO);
2554                 }
2555                 error = bus_setup_intr(dev, que->res,
2556                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2557                     igb_msix_que, que, &que->tag);
2558                 if (error) {
2559                         que->res = NULL;
2560                         device_printf(dev, "Failed to register Queue handler");
2561                         return (error);
2562                 }
2563 #if __FreeBSD_version >= 800504
2564                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2565 #endif
2566                 que->msix = vector;
2567                 if (adapter->hw.mac.type == e1000_82575)
2568                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2569                 else
2570                         que->eims = 1 << vector;
2571                 /*
2572                 ** Bind the msix vector, and thus the
2573                 ** rings to the corresponding cpu.
2574                 */
2575                 if (adapter->num_queues > 1) {
2576                         if (igb_last_bind_cpu < 0)
2577                                 igb_last_bind_cpu = CPU_FIRST();
2578                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2579                         device_printf(dev,
2580                                 "Bound queue %d to cpu %d\n",
2581                                 i,igb_last_bind_cpu);
2582                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2583                 }
2584 #ifndef IGB_LEGACY_TX
2585                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2586                     que->txr);
2587 #endif
2588                 /* Make tasklet for deferred handling */
2589                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2590                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2591                     taskqueue_thread_enqueue, &que->tq);
2592                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2593                     device_get_nameunit(adapter->dev));
2594         }
2595
2596         /* And Link */
2597         rid = vector + 1;
2598         adapter->res = bus_alloc_resource_any(dev,
2599             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2600         if (adapter->res == NULL) {
2601                 device_printf(dev,
2602                     "Unable to allocate bus resource: "
2603                     "MSIX Link Interrupt\n");
2604                 return (ENXIO);
2605         }
2606         if ((error = bus_setup_intr(dev, adapter->res,
2607             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2608             igb_msix_link, adapter, &adapter->tag)) != 0) {
2609                 device_printf(dev, "Failed to register Link handler");
2610                 return (error);
2611         }
2612 #if __FreeBSD_version >= 800504
2613         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2614 #endif
2615         adapter->linkvec = vector;
2616
2617         return (0);
2618 }
2619
2620
2621 static void
2622 igb_configure_queues(struct adapter *adapter)
2623 {
2624         struct  e1000_hw        *hw = &adapter->hw;
2625         struct  igb_queue       *que;
2626         u32                     tmp, ivar = 0, newitr = 0;
2627
2628         /* First turn on RSS capability */
2629         if (adapter->hw.mac.type != e1000_82575)
2630                 E1000_WRITE_REG(hw, E1000_GPIE,
2631                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2632                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2633
2634         /* Turn on MSIX */
2635         switch (adapter->hw.mac.type) {
2636         case e1000_82580:
2637         case e1000_i350:
2638         case e1000_i210:
2639         case e1000_i211:
2640         case e1000_vfadapt:
2641         case e1000_vfadapt_i350:
2642                 /* RX entries */
2643                 for (int i = 0; i < adapter->num_queues; i++) {
2644                         u32 index = i >> 1;
2645                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2646                         que = &adapter->queues[i];
2647                         if (i & 1) {
2648                                 ivar &= 0xFF00FFFF;
2649                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2650                         } else {
2651                                 ivar &= 0xFFFFFF00;
2652                                 ivar |= que->msix | E1000_IVAR_VALID;
2653                         }
2654                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2655                 }
2656                 /* TX entries */
2657                 for (int i = 0; i < adapter->num_queues; i++) {
2658                         u32 index = i >> 1;
2659                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2660                         que = &adapter->queues[i];
2661                         if (i & 1) {
2662                                 ivar &= 0x00FFFFFF;
2663                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2664                         } else {
2665                                 ivar &= 0xFFFF00FF;
2666                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2667                         }
2668                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2669                         adapter->que_mask |= que->eims;
2670                 }
2671
2672                 /* And for the link interrupt */
2673                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2674                 adapter->link_mask = 1 << adapter->linkvec;
2675                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2676                 break;
2677         case e1000_82576:
2678                 /* RX entries */
2679                 for (int i = 0; i < adapter->num_queues; i++) {
2680                         u32 index = i & 0x7; /* Each IVAR has two entries */
2681                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2682                         que = &adapter->queues[i];
2683                         if (i < 8) {
2684                                 ivar &= 0xFFFFFF00;
2685                                 ivar |= que->msix | E1000_IVAR_VALID;
2686                         } else {
2687                                 ivar &= 0xFF00FFFF;
2688                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2689                         }
2690                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2691                         adapter->que_mask |= que->eims;
2692                 }
2693                 /* TX entries */
2694                 for (int i = 0; i < adapter->num_queues; i++) {
2695                         u32 index = i & 0x7; /* Each IVAR has two entries */
2696                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2697                         que = &adapter->queues[i];
2698                         if (i < 8) {
2699                                 ivar &= 0xFFFF00FF;
2700                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2701                         } else {
2702                                 ivar &= 0x00FFFFFF;
2703                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2704                         }
2705                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2706                         adapter->que_mask |= que->eims;
2707                 }
2708
2709                 /* And for the link interrupt */
2710                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2711                 adapter->link_mask = 1 << adapter->linkvec;
2712                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2713                 break;
2714
2715         case e1000_82575:
2716                 /* enable MSI-X support*/
2717                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2718                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2719                 /* Auto-Mask interrupts upon ICR read. */
2720                 tmp |= E1000_CTRL_EXT_EIAME;
2721                 tmp |= E1000_CTRL_EXT_IRCA;
2722                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2723
2724                 /* Queues */
2725                 for (int i = 0; i < adapter->num_queues; i++) {
2726                         que = &adapter->queues[i];
2727                         tmp = E1000_EICR_RX_QUEUE0 << i;
2728                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2729                         que->eims = tmp;
2730                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2731                             i, que->eims);
2732                         adapter->que_mask |= que->eims;
2733                 }
2734
2735                 /* Link */
2736                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2737                     E1000_EIMS_OTHER);
2738                 adapter->link_mask |= E1000_EIMS_OTHER;
2739         default:
2740                 break;
2741         }
2742
2743         /* Set the starting interrupt rate */
2744         if (igb_max_interrupt_rate > 0)
2745                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2746
2747         if (hw->mac.type == e1000_82575)
2748                 newitr |= newitr << 16;
2749         else
2750                 newitr |= E1000_EITR_CNT_IGNR;
2751
2752         for (int i = 0; i < adapter->num_queues; i++) {
2753                 que = &adapter->queues[i];
2754                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2755         }
2756
2757         return;
2758 }
2759
2760
2761 static void
2762 igb_free_pci_resources(struct adapter *adapter)
2763 {
2764         struct          igb_queue *que = adapter->queues;
2765         device_t        dev = adapter->dev;
2766         int             rid;
2767
2768         /*
2769         ** There is a slight possibility of a failure mode
2770         ** in attach that will result in entering this function
2771         ** before interrupt resources have been initialized, and
2772         ** in that case we do not want to execute the loops below
2773         ** We can detect this reliably by the state of the adapter
2774         ** res pointer.
2775         */
2776         if (adapter->res == NULL)
2777                 goto mem;
2778
2779         /*
2780          * First release all the interrupt resources:
2781          */
2782         for (int i = 0; i < adapter->num_queues; i++, que++) {
2783                 rid = que->msix + 1;
2784                 if (que->tag != NULL) {
2785                         bus_teardown_intr(dev, que->res, que->tag);
2786                         que->tag = NULL;
2787                 }
2788                 if (que->res != NULL)
2789                         bus_release_resource(dev,
2790                             SYS_RES_IRQ, rid, que->res);
2791         }
2792
2793         /* Clean the Legacy or Link interrupt last */
2794         if (adapter->linkvec) /* we are doing MSIX */
2795                 rid = adapter->linkvec + 1;
2796         else
2797                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2798
2799         que = adapter->queues;
2800         if (adapter->tag != NULL) {
2801                 taskqueue_drain(que->tq, &adapter->link_task);
2802                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2803                 adapter->tag = NULL;
2804         }
2805         if (adapter->res != NULL)
2806                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2807
2808         for (int i = 0; i < adapter->num_queues; i++, que++) {
2809                 if (que->tq != NULL) {
2810 #ifndef IGB_LEGACY_TX
2811                         taskqueue_drain(que->tq, &que->txr->txq_task);
2812 #endif
2813                         taskqueue_drain(que->tq, &que->que_task);
2814                         taskqueue_free(que->tq);
2815                 }
2816         }
2817 mem:
2818         if (adapter->msix)
2819                 pci_release_msi(dev);
2820
2821         if (adapter->msix_mem != NULL)
2822                 bus_release_resource(dev, SYS_RES_MEMORY,
2823                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2824
2825         if (adapter->pci_mem != NULL)
2826                 bus_release_resource(dev, SYS_RES_MEMORY,
2827                     PCIR_BAR(0), adapter->pci_mem);
2828
2829 }
2830
2831 /*
2832  * Setup Either MSI/X or MSI
2833  */
2834 static int
2835 igb_setup_msix(struct adapter *adapter)
2836 {
2837         device_t dev = adapter->dev;
2838         int rid, want, queues, msgs, maxqueues;
2839
2840         /* tuneable override */
2841         if (igb_enable_msix == 0)
2842                 goto msi;
2843
2844         /* First try MSI/X */
2845         rid = PCIR_BAR(IGB_MSIX_BAR);
2846         adapter->msix_mem = bus_alloc_resource_any(dev,
2847             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2848         if (!adapter->msix_mem) {
2849                 /* May not be enabled */
2850                 device_printf(adapter->dev,
2851                     "Unable to map MSIX table \n");
2852                 goto msi;
2853         }
2854
2855         msgs = pci_msix_count(dev); 
2856         if (msgs == 0) { /* system has msix disabled */
2857                 bus_release_resource(dev, SYS_RES_MEMORY,
2858                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2859                 adapter->msix_mem = NULL;
2860                 goto msi;
2861         }
2862
2863         /* Figure out a reasonable auto config value */
2864         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2865
2866         /* Manual override */
2867         if (igb_num_queues != 0)
2868                 queues = igb_num_queues;
2869
2870         /* Sanity check based on HW */
2871         switch (adapter->hw.mac.type) {
2872                 case e1000_82575:
2873                         maxqueues = 4;
2874                         break;
2875                 case e1000_82576:
2876                 case e1000_82580:
2877                 case e1000_i350:
2878                         maxqueues = 8;
2879                         break;
2880                 case e1000_i210:
2881                         maxqueues = 4;
2882                         break;
2883                 case e1000_i211:
2884                         maxqueues = 2;
2885                         break;
2886                 default:  /* VF interfaces */
2887                         maxqueues = 1;
2888                         break;
2889         }
2890         if (queues > maxqueues)
2891                 queues = maxqueues;
2892
2893         /*
2894         ** One vector (RX/TX pair) per queue
2895         ** plus an additional for Link interrupt
2896         */
2897         want = queues + 1;
2898         if (msgs >= want)
2899                 msgs = want;
2900         else {
2901                 device_printf(adapter->dev,
2902                     "MSIX Configuration Problem, "
2903                     "%d vectors configured, but %d queues wanted!\n",
2904                     msgs, want);
2905                 return (0);
2906         }
2907         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2908                 device_printf(adapter->dev,
2909                     "Using MSIX interrupts with %d vectors\n", msgs);
2910                 adapter->num_queues = queues;
2911                 return (msgs);
2912         }
2913 msi:
2914         msgs = pci_msi_count(dev);
2915         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2916                 device_printf(adapter->dev," Using MSI interrupt\n");
2917                 return (msgs);
2918         }
2919         return (0);
2920 }
2921
2922 /*********************************************************************
2923  *
2924  *  Set up an fresh starting state
2925  *
2926  **********************************************************************/
2927 static void
2928 igb_reset(struct adapter *adapter)
2929 {
2930         device_t        dev = adapter->dev;
2931         struct e1000_hw *hw = &adapter->hw;
2932         struct e1000_fc_info *fc = &hw->fc;
2933         struct ifnet    *ifp = adapter->ifp;
2934         u32             pba = 0;
2935         u16             hwm;
2936
2937         INIT_DEBUGOUT("igb_reset: begin");
2938
2939         /* Let the firmware know the OS is in control */
2940         igb_get_hw_control(adapter);
2941
2942         /*
2943          * Packet Buffer Allocation (PBA)
2944          * Writing PBA sets the receive portion of the buffer
2945          * the remainder is used for the transmit buffer.
2946          */
2947         switch (hw->mac.type) {
2948         case e1000_82575:
2949                 pba = E1000_PBA_32K;
2950                 break;
2951         case e1000_82576:
2952         case e1000_vfadapt:
2953                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2954                 pba &= E1000_RXPBS_SIZE_MASK_82576;
2955                 break;
2956         case e1000_82580:
2957         case e1000_i350:
2958         case e1000_vfadapt_i350:
2959                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2960                 pba = e1000_rxpbs_adjust_82580(pba);
2961                 break;
2962         case e1000_i210:
2963         case e1000_i211:
2964                 pba = E1000_PBA_34K;
2965         default:
2966                 break;
2967         }
2968
2969         /* Special needs in case of Jumbo frames */
2970         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2971                 u32 tx_space, min_tx, min_rx;
2972                 pba = E1000_READ_REG(hw, E1000_PBA);
2973                 tx_space = pba >> 16;
2974                 pba &= 0xffff;
2975                 min_tx = (adapter->max_frame_size +
2976                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2977                 min_tx = roundup2(min_tx, 1024);
2978                 min_tx >>= 10;
2979                 min_rx = adapter->max_frame_size;
2980                 min_rx = roundup2(min_rx, 1024);
2981                 min_rx >>= 10;
2982                 if (tx_space < min_tx &&
2983                     ((min_tx - tx_space) < pba)) {
2984                         pba = pba - (min_tx - tx_space);
2985                         /*
2986                          * if short on rx space, rx wins
2987                          * and must trump tx adjustment
2988                          */
2989                         if (pba < min_rx)
2990                                 pba = min_rx;
2991                 }
2992                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2993         }
2994
2995         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2996
2997         /*
2998          * These parameters control the automatic generation (Tx) and
2999          * response (Rx) to Ethernet PAUSE frames.
3000          * - High water mark should allow for at least two frames to be
3001          *   received after sending an XOFF.
3002          * - Low water mark works best when it is very near the high water mark.
3003          *   This allows the receiver to restart by sending XON when it has
3004          *   drained a bit.
3005          */
3006         hwm = min(((pba << 10) * 9 / 10),
3007             ((pba << 10) - 2 * adapter->max_frame_size));
3008
3009         if (hw->mac.type < e1000_82576) {
3010                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3011                 fc->low_water = fc->high_water - 8;
3012         } else {
3013                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3014                 fc->low_water = fc->high_water - 16;
3015         }
3016
3017         fc->pause_time = IGB_FC_PAUSE_TIME;
3018         fc->send_xon = TRUE;
3019         if (adapter->fc)
3020                 fc->requested_mode = adapter->fc;
3021         else
3022                 fc->requested_mode = e1000_fc_default;
3023
3024         /* Issue a global reset */
3025         e1000_reset_hw(hw);
3026         E1000_WRITE_REG(hw, E1000_WUC, 0);
3027
3028         if (e1000_init_hw(hw) < 0)
3029                 device_printf(dev, "Hardware Initialization Failed\n");
3030
3031         /* Setup DMA Coalescing */
3032         if ((hw->mac.type > e1000_82580) &&
3033             (hw->mac.type != e1000_i211)) {
3034                 u32 dmac;
3035                 u32 reg = ~E1000_DMACR_DMAC_EN;
3036
3037                 if (adapter->dmac == 0) { /* Disabling it */
3038                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
3039                         goto reset_out;
3040                 }
3041
3042                 /* Set starting thresholds */
3043                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
3044                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3045
3046                 hwm = 64 * pba - adapter->max_frame_size / 16;
3047                 if (hwm < 64 * (pba - 6))
3048                         hwm = 64 * (pba - 6);
3049                 reg = E1000_READ_REG(hw, E1000_FCRTC);
3050                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3051                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3052                     & E1000_FCRTC_RTH_COAL_MASK);
3053                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3054
3055
3056                 dmac = pba - adapter->max_frame_size / 512;
3057                 if (dmac < pba - 10)
3058                         dmac = pba - 10;
3059                 reg = E1000_READ_REG(hw, E1000_DMACR);
3060                 reg &= ~E1000_DMACR_DMACTHR_MASK;
3061                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3062                     & E1000_DMACR_DMACTHR_MASK);
3063                 /* transition to L0x or L1 if available..*/
3064                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3065                 /* timer = value in adapter->dmac in 32usec intervals */
3066                 reg |= (adapter->dmac >> 5);
3067                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
3068
3069                 /* Set the interval before transition */
3070                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
3071                 reg |= 0x80000004;
3072                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3073
3074                 /* free space in tx packet buffer to wake from DMA coal */
3075                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
3076                     (20480 - (2 * adapter->max_frame_size)) >> 6);
3077
3078                 /* make low power state decision controlled by DMA coal */
3079                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3080                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3081                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3082                 device_printf(dev, "DMA Coalescing enabled\n");
3083
3084         } else if (hw->mac.type == e1000_82580) {
3085                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3086                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3087                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3088                     reg & ~E1000_PCIEMISC_LX_DECISION);
3089         }
3090
3091 reset_out:
3092         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3093         e1000_get_phy_info(hw);
3094         e1000_check_for_link(hw);
3095         return;
3096 }
3097
3098 /*********************************************************************
3099  *
3100  *  Setup networking device structure and register an interface.
3101  *
3102  **********************************************************************/
3103 static int
3104 igb_setup_interface(device_t dev, struct adapter *adapter)
3105 {
3106         struct ifnet   *ifp;
3107
3108         INIT_DEBUGOUT("igb_setup_interface: begin");
3109
3110         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3111         if (ifp == NULL) {
3112                 device_printf(dev, "can not allocate ifnet structure\n");
3113                 return (-1);
3114         }
3115         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3116         ifp->if_init =  igb_init;
3117         ifp->if_softc = adapter;
3118         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3119         ifp->if_ioctl = igb_ioctl;
3120 #ifndef IGB_LEGACY_TX
3121         ifp->if_transmit = igb_mq_start;
3122         ifp->if_qflush = igb_qflush;
3123 #else
3124         ifp->if_start = igb_start;
3125         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3126         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3127         IFQ_SET_READY(&ifp->if_snd);
3128 #endif
3129
3130         ether_ifattach(ifp, adapter->hw.mac.addr);
3131
3132         ifp->if_capabilities = ifp->if_capenable = 0;
3133
3134         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3135         ifp->if_capabilities |= IFCAP_TSO4;
3136         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3137         ifp->if_capenable = ifp->if_capabilities;
3138
3139         /* Don't enable LRO by default */
3140         ifp->if_capabilities |= IFCAP_LRO;
3141
3142 #ifdef DEVICE_POLLING
3143         ifp->if_capabilities |= IFCAP_POLLING;
3144 #endif
3145
3146         /*
3147          * Tell the upper layer(s) we
3148          * support full VLAN capability.
3149          */
3150         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3151         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3152                              |  IFCAP_VLAN_HWTSO
3153                              |  IFCAP_VLAN_MTU;
3154         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3155                           |  IFCAP_VLAN_HWTSO
3156                           |  IFCAP_VLAN_MTU;
3157
3158         /*
3159         ** Don't turn this on by default, if vlans are
3160         ** created on another pseudo device (eg. lagg)
3161         ** then vlan events are not passed thru, breaking
3162         ** operation, but with HW FILTER off it works. If
3163         ** using vlans directly on the igb driver you can
3164         ** enable this and get full hardware tag filtering.
3165         */
3166         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3167
3168         /*
3169          * Specify the media types supported by this adapter and register
3170          * callbacks to update media and link information
3171          */
3172         ifmedia_init(&adapter->media, IFM_IMASK,
3173             igb_media_change, igb_media_status);
3174         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3175             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3176                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3177                             0, NULL);
3178                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3179         } else {
3180                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3181                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3182                             0, NULL);
3183                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3184                             0, NULL);
3185                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3186                             0, NULL);
3187                 if (adapter->hw.phy.type != e1000_phy_ife) {
3188                         ifmedia_add(&adapter->media,
3189                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3190                         ifmedia_add(&adapter->media,
3191                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3192                 }
3193         }
3194         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3195         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3196         return (0);
3197 }
3198
3199
3200 /*
3201  * Manage DMA'able memory.
3202  */
3203 static void
3204 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3205 {
3206         if (error)
3207                 return;
3208         *(bus_addr_t *) arg = segs[0].ds_addr;
3209 }
3210
3211 static int
3212 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3213         struct igb_dma_alloc *dma, int mapflags)
3214 {
3215         int error;
3216
3217         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3218                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3219                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3220                                 BUS_SPACE_MAXADDR,      /* highaddr */
3221                                 NULL, NULL,             /* filter, filterarg */
3222                                 size,                   /* maxsize */
3223                                 1,                      /* nsegments */
3224                                 size,                   /* maxsegsize */
3225                                 0,                      /* flags */
3226                                 NULL,                   /* lockfunc */
3227                                 NULL,                   /* lockarg */
3228                                 &dma->dma_tag);
3229         if (error) {
3230                 device_printf(adapter->dev,
3231                     "%s: bus_dma_tag_create failed: %d\n",
3232                     __func__, error);
3233                 goto fail_0;
3234         }
3235
3236         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3237             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3238         if (error) {
3239                 device_printf(adapter->dev,
3240                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3241                     __func__, (uintmax_t)size, error);
3242                 goto fail_2;
3243         }
3244
3245         dma->dma_paddr = 0;
3246         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3247             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3248         if (error || dma->dma_paddr == 0) {
3249                 device_printf(adapter->dev,
3250                     "%s: bus_dmamap_load failed: %d\n",
3251                     __func__, error);
3252                 goto fail_3;
3253         }
3254
3255         return (0);
3256
3257 fail_3:
3258         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3259 fail_2:
3260         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3261         bus_dma_tag_destroy(dma->dma_tag);
3262 fail_0:
3263         dma->dma_map = NULL;
3264         dma->dma_tag = NULL;
3265
3266         return (error);
3267 }
3268
3269 static void
3270 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3271 {
3272         if (dma->dma_tag == NULL)
3273                 return;
3274         if (dma->dma_map != NULL) {
3275                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3276                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3277                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3278                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3279                 dma->dma_map = NULL;
3280         }
3281         bus_dma_tag_destroy(dma->dma_tag);
3282         dma->dma_tag = NULL;
3283 }
3284
3285
3286 /*********************************************************************
3287  *
3288  *  Allocate memory for the transmit and receive rings, and then
3289  *  the descriptors associated with each, called only once at attach.
3290  *
3291  **********************************************************************/
3292 static int
3293 igb_allocate_queues(struct adapter *adapter)
3294 {
3295         device_t dev = adapter->dev;
3296         struct igb_queue        *que = NULL;
3297         struct tx_ring          *txr = NULL;
3298         struct rx_ring          *rxr = NULL;
3299         int rsize, tsize, error = E1000_SUCCESS;
3300         int txconf = 0, rxconf = 0;
3301
3302         /* First allocate the top level queue structs */
3303         if (!(adapter->queues =
3304             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3305             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3306                 device_printf(dev, "Unable to allocate queue memory\n");
3307                 error = ENOMEM;
3308                 goto fail;
3309         }
3310
3311         /* Next allocate the TX ring struct memory */
3312         if (!(adapter->tx_rings =
3313             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3314             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3315                 device_printf(dev, "Unable to allocate TX ring memory\n");
3316                 error = ENOMEM;
3317                 goto tx_fail;
3318         }
3319
3320         /* Now allocate the RX */
3321         if (!(adapter->rx_rings =
3322             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3323             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3324                 device_printf(dev, "Unable to allocate RX ring memory\n");
3325                 error = ENOMEM;
3326                 goto rx_fail;
3327         }
3328
3329         tsize = roundup2(adapter->num_tx_desc *
3330             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3331         /*
3332          * Now set up the TX queues, txconf is needed to handle the
3333          * possibility that things fail midcourse and we need to
3334          * undo memory gracefully
3335          */ 
3336         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3337                 /* Set up some basics */
3338                 txr = &adapter->tx_rings[i];
3339                 txr->adapter = adapter;
3340                 txr->me = i;
3341
3342                 /* Initialize the TX lock */
3343                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3344                     device_get_nameunit(dev), txr->me);
3345                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3346
3347                 if (igb_dma_malloc(adapter, tsize,
3348                         &txr->txdma, BUS_DMA_NOWAIT)) {
3349                         device_printf(dev,
3350                             "Unable to allocate TX Descriptor memory\n");
3351                         error = ENOMEM;
3352                         goto err_tx_desc;
3353                 }
3354                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3355                 bzero((void *)txr->tx_base, tsize);
3356
3357                 /* Now allocate transmit buffers for the ring */
3358                 if (igb_allocate_transmit_buffers(txr)) {
3359                         device_printf(dev,
3360                             "Critical Failure setting up transmit buffers\n");
3361                         error = ENOMEM;
3362                         goto err_tx_desc;
3363                 }
3364 #ifndef IGB_LEGACY_TX
3365                 /* Allocate a buf ring */
3366                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3367                     M_WAITOK, &txr->tx_mtx);
3368 #endif
3369         }
3370
3371         /*
3372          * Next the RX queues...
3373          */ 
3374         rsize = roundup2(adapter->num_rx_desc *
3375             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3376         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3377                 rxr = &adapter->rx_rings[i];
3378                 rxr->adapter = adapter;
3379                 rxr->me = i;
3380
3381                 /* Initialize the RX lock */
3382                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3383                     device_get_nameunit(dev), txr->me);
3384                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3385
3386                 if (igb_dma_malloc(adapter, rsize,
3387                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3388                         device_printf(dev,
3389                             "Unable to allocate RxDescriptor memory\n");
3390                         error = ENOMEM;
3391                         goto err_rx_desc;
3392                 }
3393                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3394                 bzero((void *)rxr->rx_base, rsize);
3395
3396                 /* Allocate receive buffers for the ring*/
3397                 if (igb_allocate_receive_buffers(rxr)) {
3398                         device_printf(dev,
3399                             "Critical Failure setting up receive buffers\n");
3400                         error = ENOMEM;
3401                         goto err_rx_desc;
3402                 }
3403         }
3404
3405         /*
3406         ** Finally set up the queue holding structs
3407         */
3408         for (int i = 0; i < adapter->num_queues; i++) {
3409                 que = &adapter->queues[i];
3410                 que->adapter = adapter;
3411                 que->txr = &adapter->tx_rings[i];
3412                 que->rxr = &adapter->rx_rings[i];
3413         }
3414
3415         return (0);
3416
3417 err_rx_desc:
3418         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3419                 igb_dma_free(adapter, &rxr->rxdma);
3420 err_tx_desc:
3421         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3422                 igb_dma_free(adapter, &txr->txdma);
3423         free(adapter->rx_rings, M_DEVBUF);
3424 rx_fail:
3425 #ifndef IGB_LEGACY_TX
3426         buf_ring_free(txr->br, M_DEVBUF);
3427 #endif
3428         free(adapter->tx_rings, M_DEVBUF);
3429 tx_fail:
3430         free(adapter->queues, M_DEVBUF);
3431 fail:
3432         return (error);
3433 }
3434
3435 /*********************************************************************
3436  *
3437  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3438  *  the information needed to transmit a packet on the wire. This is
3439  *  called only once at attach, setup is done every reset.
3440  *
3441  **********************************************************************/
3442 static int
3443 igb_allocate_transmit_buffers(struct tx_ring *txr)
3444 {
3445         struct adapter *adapter = txr->adapter;
3446         device_t dev = adapter->dev;
3447         struct igb_tx_buffer *txbuf;
3448         int error, i;
3449
3450         /*
3451          * Setup DMA descriptor areas.
3452          */
3453         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3454                                1, 0,                    /* alignment, bounds */
3455                                BUS_SPACE_MAXADDR,       /* lowaddr */
3456                                BUS_SPACE_MAXADDR,       /* highaddr */
3457                                NULL, NULL,              /* filter, filterarg */
3458                                IGB_TSO_SIZE,            /* maxsize */
3459                                IGB_MAX_SCATTER,         /* nsegments */
3460                                PAGE_SIZE,               /* maxsegsize */
3461                                0,                       /* flags */
3462                                NULL,                    /* lockfunc */
3463                                NULL,                    /* lockfuncarg */
3464                                &txr->txtag))) {
3465                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3466                 goto fail;
3467         }
3468
3469         if (!(txr->tx_buffers =
3470             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3471             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3472                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3473                 error = ENOMEM;
3474                 goto fail;
3475         }
3476
3477         /* Create the descriptor buffer dma maps */
3478         txbuf = txr->tx_buffers;
3479         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3480                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3481                 if (error != 0) {
3482                         device_printf(dev, "Unable to create TX DMA map\n");
3483                         goto fail;
3484                 }
3485         }
3486
3487         return 0;
3488 fail:
3489         /* We free all, it handles case where we are in the middle */
3490         igb_free_transmit_structures(adapter);
3491         return (error);
3492 }
3493
3494 /*********************************************************************
3495  *
3496  *  Initialize a transmit ring.
3497  *
3498  **********************************************************************/
3499 static void
3500 igb_setup_transmit_ring(struct tx_ring *txr)
3501 {
3502         struct adapter *adapter = txr->adapter;
3503         struct igb_tx_buffer *txbuf;
3504         int i;
3505 #ifdef DEV_NETMAP
3506         struct netmap_adapter *na = NA(adapter->ifp);
3507         struct netmap_slot *slot;
3508 #endif /* DEV_NETMAP */
3509
3510         /* Clear the old descriptor contents */
3511         IGB_TX_LOCK(txr);
3512 #ifdef DEV_NETMAP
3513         slot = netmap_reset(na, NR_TX, txr->me, 0);
3514 #endif /* DEV_NETMAP */
3515         bzero((void *)txr->tx_base,
3516               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3517         /* Reset indices */
3518         txr->next_avail_desc = 0;
3519         txr->next_to_clean = 0;
3520
3521         /* Free any existing tx buffers. */
3522         txbuf = txr->tx_buffers;
3523         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3524                 if (txbuf->m_head != NULL) {
3525                         bus_dmamap_sync(txr->txtag, txbuf->map,
3526                             BUS_DMASYNC_POSTWRITE);
3527                         bus_dmamap_unload(txr->txtag, txbuf->map);
3528                         m_freem(txbuf->m_head);
3529                         txbuf->m_head = NULL;
3530                 }
3531 #ifdef DEV_NETMAP
3532                 if (slot) {
3533                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3534                         /* no need to set the address */
3535                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3536                 }
3537 #endif /* DEV_NETMAP */
3538                 /* clear the watch index */
3539                 txbuf->next_eop = -1;
3540         }
3541
3542         /* Set number of descriptors available */
3543         txr->tx_avail = adapter->num_tx_desc;
3544
3545         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3546             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3547         IGB_TX_UNLOCK(txr);
3548 }
3549
3550 /*********************************************************************
3551  *
3552  *  Initialize all transmit rings.
3553  *
3554  **********************************************************************/
3555 static void
3556 igb_setup_transmit_structures(struct adapter *adapter)
3557 {
3558         struct tx_ring *txr = adapter->tx_rings;
3559
3560         for (int i = 0; i < adapter->num_queues; i++, txr++)
3561                 igb_setup_transmit_ring(txr);
3562
3563         return;
3564 }
3565
3566 /*********************************************************************
3567  *
3568  *  Enable transmit unit.
3569  *
3570  **********************************************************************/
3571 static void
3572 igb_initialize_transmit_units(struct adapter *adapter)
3573 {
3574         struct tx_ring  *txr = adapter->tx_rings;
3575         struct e1000_hw *hw = &adapter->hw;
3576         u32             tctl, txdctl;
3577
3578         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3579         tctl = txdctl = 0;
3580
3581         /* Setup the Tx Descriptor Rings */
3582         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3583                 u64 bus_addr = txr->txdma.dma_paddr;
3584
3585                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3586                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3587                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3588                     (uint32_t)(bus_addr >> 32));
3589                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3590                     (uint32_t)bus_addr);
3591
3592                 /* Setup the HW Tx Head and Tail descriptor pointers */
3593                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3594                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3595
3596                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3597                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3598                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3599
3600                 txr->queue_status = IGB_QUEUE_IDLE;
3601
3602                 txdctl |= IGB_TX_PTHRESH;
3603                 txdctl |= IGB_TX_HTHRESH << 8;
3604                 txdctl |= IGB_TX_WTHRESH << 16;
3605                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3606                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3607         }
3608
3609         if (adapter->vf_ifp)
3610                 return;
3611
3612         e1000_config_collision_dist(hw);
3613
3614         /* Program the Transmit Control Register */
3615         tctl = E1000_READ_REG(hw, E1000_TCTL);
3616         tctl &= ~E1000_TCTL_CT;
3617         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3618                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3619
3620         /* This write will effectively turn on the transmit unit. */
3621         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3622 }
3623
3624 /*********************************************************************
3625  *
3626  *  Free all transmit rings.
3627  *
3628  **********************************************************************/
3629 static void
3630 igb_free_transmit_structures(struct adapter *adapter)
3631 {
3632         struct tx_ring *txr = adapter->tx_rings;
3633
3634         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3635                 IGB_TX_LOCK(txr);
3636                 igb_free_transmit_buffers(txr);
3637                 igb_dma_free(adapter, &txr->txdma);
3638                 IGB_TX_UNLOCK(txr);
3639                 IGB_TX_LOCK_DESTROY(txr);
3640         }
3641         free(adapter->tx_rings, M_DEVBUF);
3642 }
3643
3644 /*********************************************************************
3645  *
3646  *  Free transmit ring related data structures.
3647  *
3648  **********************************************************************/
3649 static void
3650 igb_free_transmit_buffers(struct tx_ring *txr)
3651 {
3652         struct adapter *adapter = txr->adapter;
3653         struct igb_tx_buffer *tx_buffer;
3654         int             i;
3655
3656         INIT_DEBUGOUT("free_transmit_ring: begin");
3657
3658         if (txr->tx_buffers == NULL)
3659                 return;
3660
3661         tx_buffer = txr->tx_buffers;
3662         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3663                 if (tx_buffer->m_head != NULL) {
3664                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3665                             BUS_DMASYNC_POSTWRITE);
3666                         bus_dmamap_unload(txr->txtag,
3667                             tx_buffer->map);
3668                         m_freem(tx_buffer->m_head);
3669                         tx_buffer->m_head = NULL;
3670                         if (tx_buffer->map != NULL) {
3671                                 bus_dmamap_destroy(txr->txtag,
3672                                     tx_buffer->map);
3673                                 tx_buffer->map = NULL;
3674                         }
3675                 } else if (tx_buffer->map != NULL) {
3676                         bus_dmamap_unload(txr->txtag,
3677                             tx_buffer->map);
3678                         bus_dmamap_destroy(txr->txtag,
3679                             tx_buffer->map);
3680                         tx_buffer->map = NULL;
3681                 }
3682         }
3683 #ifndef IGB_LEGACY_TX
3684         if (txr->br != NULL)
3685                 buf_ring_free(txr->br, M_DEVBUF);
3686 #endif
3687         if (txr->tx_buffers != NULL) {
3688                 free(txr->tx_buffers, M_DEVBUF);
3689                 txr->tx_buffers = NULL;
3690         }
3691         if (txr->txtag != NULL) {
3692                 bus_dma_tag_destroy(txr->txtag);
3693                 txr->txtag = NULL;
3694         }
3695         return;
3696 }
3697
3698 /**********************************************************************
3699  *
3700  *  Setup work for hardware segmentation offload (TSO)
3701  *
3702  **********************************************************************/
3703 static bool
3704 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3705         struct ip *ip, struct tcphdr *th)
3706 {
3707         struct adapter *adapter = txr->adapter;
3708         struct e1000_adv_tx_context_desc *TXD;
3709         struct igb_tx_buffer        *tx_buffer;
3710         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3711         u32 mss_l4len_idx = 0;
3712         u16 vtag = 0;
3713         int ctxd, ip_hlen, tcp_hlen;
3714
3715         ctxd = txr->next_avail_desc;
3716         tx_buffer = &txr->tx_buffers[ctxd];
3717         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3718
3719         ip->ip_sum = 0;
3720         ip_hlen = ip->ip_hl << 2;
3721         tcp_hlen = th->th_off << 2;
3722
3723         /* VLAN MACLEN IPLEN */
3724         if (mp->m_flags & M_VLANTAG) {
3725                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3726                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3727         }
3728
3729         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3730         vlan_macip_lens |= ip_hlen;
3731         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3732
3733         /* ADV DTYPE TUCMD */
3734         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3735         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3736         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3737         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3738
3739         /* MSS L4LEN IDX */
3740         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3741         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3742         /* 82575 needs the queue index added */
3743         if (adapter->hw.mac.type == e1000_82575)
3744                 mss_l4len_idx |= txr->me << 4;
3745         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3746
3747         TXD->seqnum_seed = htole32(0);
3748         tx_buffer->m_head = NULL;
3749         tx_buffer->next_eop = -1;
3750
3751         if (++ctxd == adapter->num_tx_desc)
3752                 ctxd = 0;
3753
3754         txr->tx_avail--;
3755         txr->next_avail_desc = ctxd;
3756         return TRUE;
3757 }
3758
3759
3760 /*********************************************************************
3761  *
3762  *  Context Descriptor setup for VLAN or CSUM
3763  *
3764  **********************************************************************/
3765
3766 static bool
3767 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3768 {
3769         struct adapter *adapter = txr->adapter;
3770         struct e1000_adv_tx_context_desc *TXD;
3771         struct igb_tx_buffer        *tx_buffer;
3772         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3773         struct ether_vlan_header *eh;
3774         struct ip *ip = NULL;
3775         struct ip6_hdr *ip6;
3776         int  ehdrlen, ctxd, ip_hlen = 0;
3777         u16     etype, vtag = 0;
3778         u8      ipproto = 0;
3779         bool    offload = TRUE;
3780
3781         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3782                 offload = FALSE;
3783
3784         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3785         ctxd = txr->next_avail_desc;
3786         tx_buffer = &txr->tx_buffers[ctxd];
3787         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3788
3789         /*
3790         ** In advanced descriptors the vlan tag must 
3791         ** be placed into the context descriptor, thus
3792         ** we need to be here just for that setup.
3793         */
3794         if (mp->m_flags & M_VLANTAG) {
3795                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3796                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3797         } else if (offload == FALSE)
3798                 return FALSE;
3799
3800         /*
3801          * Determine where frame payload starts.
3802          * Jump over vlan headers if already present,
3803          * helpful for QinQ too.
3804          */
3805         eh = mtod(mp, struct ether_vlan_header *);
3806         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3807                 etype = ntohs(eh->evl_proto);
3808                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3809         } else {
3810                 etype = ntohs(eh->evl_encap_proto);
3811                 ehdrlen = ETHER_HDR_LEN;
3812         }
3813
3814         /* Set the ether header length */
3815         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3816
3817         switch (etype) {
3818                 case ETHERTYPE_IP:
3819                         ip = (struct ip *)(mp->m_data + ehdrlen);
3820                         ip_hlen = ip->ip_hl << 2;
3821                         if (mp->m_len < ehdrlen + ip_hlen) {
3822                                 offload = FALSE;
3823                                 break;
3824                         }
3825                         ipproto = ip->ip_p;
3826                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3827                         break;
3828                 case ETHERTYPE_IPV6:
3829                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3830                         ip_hlen = sizeof(struct ip6_hdr);
3831                         ipproto = ip6->ip6_nxt;
3832                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3833                         break;
3834                 default:
3835                         offload = FALSE;
3836                         break;
3837         }
3838
3839         vlan_macip_lens |= ip_hlen;
3840         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3841
3842         switch (ipproto) {
3843                 case IPPROTO_TCP:
3844                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3845                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3846                         break;
3847                 case IPPROTO_UDP:
3848                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3849                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3850                         break;
3851 #if __FreeBSD_version >= 800000
3852                 case IPPROTO_SCTP:
3853                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3854                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3855                         break;
3856 #endif
3857                 default:
3858                         offload = FALSE;
3859                         break;
3860         }
3861
3862         /* 82575 needs the queue index added */
3863         if (adapter->hw.mac.type == e1000_82575)
3864                 mss_l4len_idx = txr->me << 4;
3865
3866         /* Now copy bits into descriptor */
3867         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3868         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3869         TXD->seqnum_seed = htole32(0);
3870         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3871
3872         tx_buffer->m_head = NULL;
3873         tx_buffer->next_eop = -1;
3874
3875         /* We've consumed the first desc, adjust counters */
3876         if (++ctxd == adapter->num_tx_desc)
3877                 ctxd = 0;
3878         txr->next_avail_desc = ctxd;
3879         --txr->tx_avail;
3880
3881         return (offload);
3882 }
3883
3884
3885 /**********************************************************************
3886  *
3887  *  Examine each tx_buffer in the used queue. If the hardware is done
3888  *  processing the packet then free associated resources. The
3889  *  tx_buffer is put back on the free queue.
3890  *
3891  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3892  **********************************************************************/
3893 static bool
3894 igb_txeof(struct tx_ring *txr)
3895 {
3896         struct adapter  *adapter = txr->adapter;
3897         int first, last, done, processed;
3898         struct igb_tx_buffer *tx_buffer;
3899         struct e1000_tx_desc   *tx_desc, *eop_desc;
3900         struct ifnet   *ifp = adapter->ifp;
3901
3902         IGB_TX_LOCK_ASSERT(txr);
3903
3904 #ifdef DEV_NETMAP
3905         if (netmap_tx_irq(ifp, txr->me |
3906             (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
3907                 return (FALSE);
3908 #endif /* DEV_NETMAP */
3909         if (txr->tx_avail == adapter->num_tx_desc) {
3910                 txr->queue_status = IGB_QUEUE_IDLE;
3911                 return FALSE;
3912         }
3913
3914         processed = 0;
3915         first = txr->next_to_clean;
3916         tx_desc = &txr->tx_base[first];
3917         tx_buffer = &txr->tx_buffers[first];
3918         last = tx_buffer->next_eop;
3919         eop_desc = &txr->tx_base[last];
3920
3921         /*
3922          * What this does is get the index of the
3923          * first descriptor AFTER the EOP of the 
3924          * first packet, that way we can do the
3925          * simple comparison on the inner while loop.
3926          */
3927         if (++last == adapter->num_tx_desc)
3928                 last = 0;
3929         done = last;
3930
3931         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3932             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3933
3934         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3935                 /* We clean the range of the packet */
3936                 while (first != done) {
3937                         tx_desc->upper.data = 0;
3938                         tx_desc->lower.data = 0;
3939                         tx_desc->buffer_addr = 0;
3940                         ++txr->tx_avail;
3941                         ++processed;
3942
3943                         if (tx_buffer->m_head) {
3944                                 txr->bytes +=
3945                                     tx_buffer->m_head->m_pkthdr.len;
3946                                 bus_dmamap_sync(txr->txtag,
3947                                     tx_buffer->map,
3948                                     BUS_DMASYNC_POSTWRITE);
3949                                 bus_dmamap_unload(txr->txtag,
3950                                     tx_buffer->map);
3951
3952                                 m_freem(tx_buffer->m_head);
3953                                 tx_buffer->m_head = NULL;
3954                         }
3955                         tx_buffer->next_eop = -1;
3956                         txr->watchdog_time = ticks;
3957
3958                         if (++first == adapter->num_tx_desc)
3959                                 first = 0;
3960
3961                         tx_buffer = &txr->tx_buffers[first];
3962                         tx_desc = &txr->tx_base[first];
3963                 }
3964                 ++txr->packets;
3965                 ++ifp->if_opackets;
3966                 /* See if we can continue to the next packet */
3967                 last = tx_buffer->next_eop;
3968                 if (last != -1) {
3969                         eop_desc = &txr->tx_base[last];
3970                         /* Get new done point */
3971                         if (++last == adapter->num_tx_desc) last = 0;
3972                         done = last;
3973                 } else
3974                         break;
3975         }
3976         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3977             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3978
3979         txr->next_to_clean = first;
3980
3981         /*
3982         ** Watchdog calculation, we know there's
3983         ** work outstanding or the first return
3984         ** would have been taken, so none processed
3985         ** for too long indicates a hang.
3986         */
3987         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3988                 txr->queue_status |= IGB_QUEUE_HUNG;
3989         /*
3990          * If we have a minimum free,
3991          * clear depleted state bit
3992          */
3993         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)          
3994                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3995
3996         /* All clean, turn off the watchdog */
3997         if (txr->tx_avail == adapter->num_tx_desc) {
3998                 txr->queue_status = IGB_QUEUE_IDLE;
3999                 return (FALSE);
4000         }
4001
4002         return (TRUE);
4003 }
4004
4005 /*********************************************************************
4006  *
4007  *  Refresh mbuf buffers for RX descriptor rings
4008  *   - now keeps its own state so discards due to resource
4009  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4010  *     it just returns, keeping its placeholder, thus it can simply
4011  *     be recalled to try again.
4012  *
4013  **********************************************************************/
4014 static void
4015 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4016 {
4017         struct adapter          *adapter = rxr->adapter;
4018         bus_dma_segment_t       hseg[1];
4019         bus_dma_segment_t       pseg[1];
4020         struct igb_rx_buf       *rxbuf;
4021         struct mbuf             *mh, *mp;
4022         int                     i, j, nsegs, error;
4023         bool                    refreshed = FALSE;
4024
4025         i = j = rxr->next_to_refresh;
4026         /*
4027         ** Get one descriptor beyond
4028         ** our work mark to control
4029         ** the loop.
4030         */
4031         if (++j == adapter->num_rx_desc)
4032                 j = 0;
4033
4034         while (j != limit) {
4035                 rxbuf = &rxr->rx_buffers[i];
4036                 /* No hdr mbuf used with header split off */
4037                 if (rxr->hdr_split == FALSE)
4038                         goto no_split;
4039                 if (rxbuf->m_head == NULL) {
4040                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4041                         if (mh == NULL)
4042                                 goto update;
4043                 } else
4044                         mh = rxbuf->m_head;
4045
4046                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4047                 mh->m_len = MHLEN;
4048                 mh->m_flags |= M_PKTHDR;
4049                 /* Get the memory mapping */
4050                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4051                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4052                 if (error != 0) {
4053                         printf("Refresh mbufs: hdr dmamap load"
4054                             " failure - %d\n", error);
4055                         m_free(mh);
4056                         rxbuf->m_head = NULL;
4057                         goto update;
4058                 }
4059                 rxbuf->m_head = mh;
4060                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4061                     BUS_DMASYNC_PREREAD);
4062                 rxr->rx_base[i].read.hdr_addr =
4063                     htole64(hseg[0].ds_addr);
4064 no_split:
4065                 if (rxbuf->m_pack == NULL) {
4066                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4067                             M_PKTHDR, adapter->rx_mbuf_sz);
4068                         if (mp == NULL)
4069                                 goto update;
4070                 } else
4071                         mp = rxbuf->m_pack;
4072
4073                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4074                 /* Get the memory mapping */
4075                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4076                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4077                 if (error != 0) {
4078                         printf("Refresh mbufs: payload dmamap load"
4079                             " failure - %d\n", error);
4080                         m_free(mp);
4081                         rxbuf->m_pack = NULL;
4082                         goto update;
4083                 }
4084                 rxbuf->m_pack = mp;
4085                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4086                     BUS_DMASYNC_PREREAD);
4087                 rxr->rx_base[i].read.pkt_addr =
4088                     htole64(pseg[0].ds_addr);
4089                 refreshed = TRUE; /* I feel wefreshed :) */
4090
4091                 i = j; /* our next is precalculated */
4092                 rxr->next_to_refresh = i;
4093                 if (++j == adapter->num_rx_desc)
4094                         j = 0;
4095         }
4096 update:
4097         if (refreshed) /* update tail */
4098                 E1000_WRITE_REG(&adapter->hw,
4099                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4100         return;
4101 }
4102
4103
4104 /*********************************************************************
4105  *
4106  *  Allocate memory for rx_buffer structures. Since we use one
4107  *  rx_buffer per received packet, the maximum number of rx_buffer's
4108  *  that we'll need is equal to the number of receive descriptors
4109  *  that we've allocated.
4110  *
4111  **********************************************************************/
4112 static int
4113 igb_allocate_receive_buffers(struct rx_ring *rxr)
4114 {
4115         struct  adapter         *adapter = rxr->adapter;
4116         device_t                dev = adapter->dev;
4117         struct igb_rx_buf       *rxbuf;
4118         int                     i, bsize, error;
4119
4120         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4121         if (!(rxr->rx_buffers =
4122             (struct igb_rx_buf *) malloc(bsize,
4123             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4124                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4125                 error = ENOMEM;
4126                 goto fail;
4127         }
4128
4129         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4130                                    1, 0,                /* alignment, bounds */
4131                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4132                                    BUS_SPACE_MAXADDR,   /* highaddr */
4133                                    NULL, NULL,          /* filter, filterarg */
4134                                    MSIZE,               /* maxsize */
4135                                    1,                   /* nsegments */
4136                                    MSIZE,               /* maxsegsize */
4137                                    0,                   /* flags */
4138                                    NULL,                /* lockfunc */
4139                                    NULL,                /* lockfuncarg */
4140                                    &rxr->htag))) {
4141                 device_printf(dev, "Unable to create RX DMA tag\n");
4142                 goto fail;
4143         }
4144
4145         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4146                                    1, 0,                /* alignment, bounds */
4147                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4148                                    BUS_SPACE_MAXADDR,   /* highaddr */
4149                                    NULL, NULL,          /* filter, filterarg */
4150                                    MJUM9BYTES,          /* maxsize */
4151                                    1,                   /* nsegments */
4152                                    MJUM9BYTES,          /* maxsegsize */
4153                                    0,                   /* flags */
4154                                    NULL,                /* lockfunc */
4155                                    NULL,                /* lockfuncarg */
4156                                    &rxr->ptag))) {
4157                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4158                 goto fail;
4159         }
4160
4161         for (i = 0; i < adapter->num_rx_desc; i++) {
4162                 rxbuf = &rxr->rx_buffers[i];
4163                 error = bus_dmamap_create(rxr->htag,
4164                     BUS_DMA_NOWAIT, &rxbuf->hmap);
4165                 if (error) {
4166                         device_printf(dev,
4167                             "Unable to create RX head DMA maps\n");
4168                         goto fail;
4169                 }
4170                 error = bus_dmamap_create(rxr->ptag,
4171                     BUS_DMA_NOWAIT, &rxbuf->pmap);
4172                 if (error) {
4173                         device_printf(dev,
4174                             "Unable to create RX packet DMA maps\n");
4175                         goto fail;
4176                 }
4177         }
4178
4179         return (0);
4180
4181 fail:
4182         /* Frees all, but can handle partial completion */
4183         igb_free_receive_structures(adapter);
4184         return (error);
4185 }
4186
4187
4188 static void
4189 igb_free_receive_ring(struct rx_ring *rxr)
4190 {
4191         struct  adapter         *adapter = rxr->adapter;
4192         struct igb_rx_buf       *rxbuf;
4193
4194
4195         for (int i = 0; i < adapter->num_rx_desc; i++) {
4196                 rxbuf = &rxr->rx_buffers[i];
4197                 if (rxbuf->m_head != NULL) {
4198                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4199                             BUS_DMASYNC_POSTREAD);
4200                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4201                         rxbuf->m_head->m_flags |= M_PKTHDR;
4202                         m_freem(rxbuf->m_head);
4203                 }
4204                 if (rxbuf->m_pack != NULL) {
4205                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4206                             BUS_DMASYNC_POSTREAD);
4207                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4208                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4209                         m_freem(rxbuf->m_pack);
4210                 }
4211                 rxbuf->m_head = NULL;
4212                 rxbuf->m_pack = NULL;
4213         }
4214 }
4215
4216
4217 /*********************************************************************
4218  *
4219  *  Initialize a receive ring and its buffers.
4220  *
4221  **********************************************************************/
4222 static int
4223 igb_setup_receive_ring(struct rx_ring *rxr)
4224 {
4225         struct  adapter         *adapter;
4226         struct  ifnet           *ifp;
4227         device_t                dev;
4228         struct igb_rx_buf       *rxbuf;
4229         bus_dma_segment_t       pseg[1], hseg[1];
4230         struct lro_ctrl         *lro = &rxr->lro;
4231         int                     rsize, nsegs, error = 0;
4232 #ifdef DEV_NETMAP
4233         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4234         struct netmap_slot *slot;
4235 #endif /* DEV_NETMAP */
4236
4237         adapter = rxr->adapter;
4238         dev = adapter->dev;
4239         ifp = adapter->ifp;
4240
4241         /* Clear the ring contents */
4242         IGB_RX_LOCK(rxr);
4243 #ifdef DEV_NETMAP
4244         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4245 #endif /* DEV_NETMAP */
4246         rsize = roundup2(adapter->num_rx_desc *
4247             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4248         bzero((void *)rxr->rx_base, rsize);
4249
4250         /*
4251         ** Free current RX buffer structures and their mbufs
4252         */
4253         igb_free_receive_ring(rxr);
4254
4255         /* Configure for header split? */
4256         if (igb_header_split)
4257                 rxr->hdr_split = TRUE;
4258
4259         /* Now replenish the ring mbufs */
4260         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4261                 struct mbuf     *mh, *mp;
4262
4263                 rxbuf = &rxr->rx_buffers[j];
4264 #ifdef DEV_NETMAP
4265                 if (slot) {
4266                         /* slot sj is mapped to the i-th NIC-ring entry */
4267                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4268                         uint64_t paddr;
4269                         void *addr;
4270
4271                         addr = PNMB(slot + sj, &paddr);
4272                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4273                         /* Update descriptor */
4274                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4275                         continue;
4276                 }
4277 #endif /* DEV_NETMAP */
4278                 if (rxr->hdr_split == FALSE)
4279                         goto skip_head;
4280
4281                 /* First the header */
4282                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4283                 if (rxbuf->m_head == NULL) {
4284                         error = ENOBUFS;
4285                         goto fail;
4286                 }
4287                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4288                 mh = rxbuf->m_head;
4289                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4290                 mh->m_flags |= M_PKTHDR;
4291                 /* Get the memory mapping */
4292                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4293                     rxbuf->hmap, rxbuf->m_head, hseg,
4294                     &nsegs, BUS_DMA_NOWAIT);
4295                 if (error != 0) /* Nothing elegant to do here */
4296                         goto fail;
4297                 bus_dmamap_sync(rxr->htag,
4298                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4299                 /* Update descriptor */
4300                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4301
4302 skip_head:
4303                 /* Now the payload cluster */
4304                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4305                     M_PKTHDR, adapter->rx_mbuf_sz);
4306                 if (rxbuf->m_pack == NULL) {
4307                         error = ENOBUFS;
4308                         goto fail;
4309                 }
4310                 mp = rxbuf->m_pack;
4311                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4312                 /* Get the memory mapping */
4313                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4314                     rxbuf->pmap, mp, pseg,
4315                     &nsegs, BUS_DMA_NOWAIT);
4316                 if (error != 0)
4317                         goto fail;
4318                 bus_dmamap_sync(rxr->ptag,
4319                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4320                 /* Update descriptor */
4321                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4322         }
4323
4324         /* Setup our descriptor indices */
4325         rxr->next_to_check = 0;
4326         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4327         rxr->lro_enabled = FALSE;
4328         rxr->rx_split_packets = 0;
4329         rxr->rx_bytes = 0;
4330
4331         rxr->fmp = NULL;
4332         rxr->lmp = NULL;
4333         rxr->discard = FALSE;
4334
4335         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4336             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4337
4338         /*
4339         ** Now set up the LRO interface, we
4340         ** also only do head split when LRO
4341         ** is enabled, since so often they
4342         ** are undesireable in similar setups.
4343         */
4344         if (ifp->if_capenable & IFCAP_LRO) {
4345                 error = tcp_lro_init(lro);
4346                 if (error) {
4347                         device_printf(dev, "LRO Initialization failed!\n");
4348                         goto fail;
4349                 }
4350                 INIT_DEBUGOUT("RX LRO Initialized\n");
4351                 rxr->lro_enabled = TRUE;
4352                 lro->ifp = adapter->ifp;
4353         }
4354
4355         IGB_RX_UNLOCK(rxr);
4356         return (0);
4357
4358 fail:
4359         igb_free_receive_ring(rxr);
4360         IGB_RX_UNLOCK(rxr);
4361         return (error);
4362 }
4363
4364
4365 /*********************************************************************
4366  *
4367  *  Initialize all receive rings.
4368  *
4369  **********************************************************************/
4370 static int
4371 igb_setup_receive_structures(struct adapter *adapter)
4372 {
4373         struct rx_ring *rxr = adapter->rx_rings;
4374         int i;
4375
4376         for (i = 0; i < adapter->num_queues; i++, rxr++)
4377                 if (igb_setup_receive_ring(rxr))
4378                         goto fail;
4379
4380         return (0);
4381 fail:
4382         /*
4383          * Free RX buffers allocated so far, we will only handle
4384          * the rings that completed, the failing case will have
4385          * cleaned up for itself. 'i' is the endpoint.
4386          */
4387         for (int j = 0; j < i; ++j) {
4388                 rxr = &adapter->rx_rings[j];
4389                 IGB_RX_LOCK(rxr);
4390                 igb_free_receive_ring(rxr);
4391                 IGB_RX_UNLOCK(rxr);
4392         }
4393
4394         return (ENOBUFS);
4395 }
4396
4397 /*********************************************************************
4398  *
4399  *  Enable receive unit.
4400  *
4401  **********************************************************************/
4402 static void
4403 igb_initialize_receive_units(struct adapter *adapter)
4404 {
4405         struct rx_ring  *rxr = adapter->rx_rings;
4406         struct ifnet    *ifp = adapter->ifp;
4407         struct e1000_hw *hw = &adapter->hw;
4408         u32             rctl, rxcsum, psize, srrctl = 0;
4409
4410         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4411
4412         /*
4413          * Make sure receives are disabled while setting
4414          * up the descriptor ring
4415          */
4416         rctl = E1000_READ_REG(hw, E1000_RCTL);
4417         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4418
4419         /*
4420         ** Set up for header split
4421         */
4422         if (igb_header_split) {
4423                 /* Use a standard mbuf for the header */
4424                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4425                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4426         } else
4427                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4428
4429         /*
4430         ** Set up for jumbo frames
4431         */
4432         if (ifp->if_mtu > ETHERMTU) {
4433                 rctl |= E1000_RCTL_LPE;
4434                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4435                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4436                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4437                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4438                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4439                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4440                 }
4441                 /* Set maximum packet len */
4442                 psize = adapter->max_frame_size;
4443                 /* are we on a vlan? */
4444                 if (adapter->ifp->if_vlantrunk != NULL)
4445                         psize += VLAN_TAG_SIZE;
4446                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4447         } else {
4448                 rctl &= ~E1000_RCTL_LPE;
4449                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4450                 rctl |= E1000_RCTL_SZ_2048;
4451         }
4452
4453         /* Setup the Base and Length of the Rx Descriptor Rings */
4454         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4455                 u64 bus_addr = rxr->rxdma.dma_paddr;
4456                 u32 rxdctl;
4457
4458                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4459                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4460                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4461                     (uint32_t)(bus_addr >> 32));
4462                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4463                     (uint32_t)bus_addr);
4464                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4465                 /* Enable this Queue */
4466                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4467                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4468                 rxdctl &= 0xFFF00000;
4469                 rxdctl |= IGB_RX_PTHRESH;
4470                 rxdctl |= IGB_RX_HTHRESH << 8;
4471                 rxdctl |= IGB_RX_WTHRESH << 16;
4472                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4473         }
4474
4475         /*
4476         ** Setup for RX MultiQueue
4477         */
4478         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4479         if (adapter->num_queues >1) {
4480                 u32 random[10], mrqc, shift = 0;
4481                 union igb_reta {
4482                         u32 dword;
4483                         u8  bytes[4];
4484                 } reta;
4485
4486                 arc4rand(&random, sizeof(random), 0);
4487                 if (adapter->hw.mac.type == e1000_82575)
4488                         shift = 6;
4489                 /* Warning FM follows */
4490                 for (int i = 0; i < 128; i++) {
4491                         reta.bytes[i & 3] =
4492                             (i % adapter->num_queues) << shift;
4493                         if ((i & 3) == 3)
4494                                 E1000_WRITE_REG(hw,
4495                                     E1000_RETA(i >> 2), reta.dword);
4496                 }
4497                 /* Now fill in hash table */
4498                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4499                 for (int i = 0; i < 10; i++)
4500                         E1000_WRITE_REG_ARRAY(hw,
4501                             E1000_RSSRK(0), i, random[i]);
4502
4503                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4504                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4505                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4506                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4507                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4508                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4509                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4510                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4511
4512                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4513
4514                 /*
4515                 ** NOTE: Receive Full-Packet Checksum Offload 
4516                 ** is mutually exclusive with Multiqueue. However
4517                 ** this is not the same as TCP/IP checksums which
4518                 ** still work.
4519                 */
4520                 rxcsum |= E1000_RXCSUM_PCSD;
4521 #if __FreeBSD_version >= 800000
4522                 /* For SCTP Offload */
4523                 if ((hw->mac.type == e1000_82576)
4524                     && (ifp->if_capenable & IFCAP_RXCSUM))
4525                         rxcsum |= E1000_RXCSUM_CRCOFL;
4526 #endif
4527         } else {
4528                 /* Non RSS setup */
4529                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4530                         rxcsum |= E1000_RXCSUM_IPPCSE;
4531 #if __FreeBSD_version >= 800000
4532                         if (adapter->hw.mac.type == e1000_82576)
4533                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4534 #endif
4535                 } else
4536                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4537         }
4538         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4539
4540         /* Setup the Receive Control Register */
4541         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4542         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4543                    E1000_RCTL_RDMTS_HALF |
4544                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4545         /* Strip CRC bytes. */
4546         rctl |= E1000_RCTL_SECRC;
4547         /* Make sure VLAN Filters are off */
4548         rctl &= ~E1000_RCTL_VFE;
4549         /* Don't store bad packets */
4550         rctl &= ~E1000_RCTL_SBP;
4551
4552         /* Enable Receives */
4553         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4554
4555         /*
4556          * Setup the HW Rx Head and Tail Descriptor Pointers
4557          *   - needs to be after enable
4558          */
4559         for (int i = 0; i < adapter->num_queues; i++) {
4560                 rxr = &adapter->rx_rings[i];
4561                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4562 #ifdef DEV_NETMAP
4563                 /*
4564                  * an init() while a netmap client is active must
4565                  * preserve the rx buffers passed to userspace.
4566                  * In this driver it means we adjust RDT to
4567                  * somthing different from next_to_refresh
4568                  * (which is not used in netmap mode).
4569                  */
4570                 if (ifp->if_capenable & IFCAP_NETMAP) {
4571                         struct netmap_adapter *na = NA(adapter->ifp);
4572                         struct netmap_kring *kring = &na->rx_rings[i];
4573                         int t = rxr->next_to_refresh - kring->nr_hwavail;
4574
4575                         if (t >= adapter->num_rx_desc)
4576                                 t -= adapter->num_rx_desc;
4577                         else if (t < 0)
4578                                 t += adapter->num_rx_desc;
4579                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4580                 } else
4581 #endif /* DEV_NETMAP */
4582                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4583         }
4584         return;
4585 }
4586
4587 /*********************************************************************
4588  *
4589  *  Free receive rings.
4590  *
4591  **********************************************************************/
4592 static void
4593 igb_free_receive_structures(struct adapter *adapter)
4594 {
4595         struct rx_ring *rxr = adapter->rx_rings;
4596
4597         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4598                 struct lro_ctrl *lro = &rxr->lro;
4599                 igb_free_receive_buffers(rxr);
4600                 tcp_lro_free(lro);
4601                 igb_dma_free(adapter, &rxr->rxdma);
4602         }
4603
4604         free(adapter->rx_rings, M_DEVBUF);
4605 }
4606
4607 /*********************************************************************
4608  *
4609  *  Free receive ring data structures.
4610  *
4611  **********************************************************************/
4612 static void
4613 igb_free_receive_buffers(struct rx_ring *rxr)
4614 {
4615         struct adapter          *adapter = rxr->adapter;
4616         struct igb_rx_buf       *rxbuf;
4617         int i;
4618
4619         INIT_DEBUGOUT("free_receive_structures: begin");
4620
4621         /* Cleanup any existing buffers */
4622         if (rxr->rx_buffers != NULL) {
4623                 for (i = 0; i < adapter->num_rx_desc; i++) {
4624                         rxbuf = &rxr->rx_buffers[i];
4625                         if (rxbuf->m_head != NULL) {
4626                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4627                                     BUS_DMASYNC_POSTREAD);
4628                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4629                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4630                                 m_freem(rxbuf->m_head);
4631                         }
4632                         if (rxbuf->m_pack != NULL) {
4633                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4634                                     BUS_DMASYNC_POSTREAD);
4635                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4636                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4637                                 m_freem(rxbuf->m_pack);
4638                         }
4639                         rxbuf->m_head = NULL;
4640                         rxbuf->m_pack = NULL;
4641                         if (rxbuf->hmap != NULL) {
4642                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4643                                 rxbuf->hmap = NULL;
4644                         }
4645                         if (rxbuf->pmap != NULL) {
4646                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4647                                 rxbuf->pmap = NULL;
4648                         }
4649                 }
4650                 if (rxr->rx_buffers != NULL) {
4651                         free(rxr->rx_buffers, M_DEVBUF);
4652                         rxr->rx_buffers = NULL;
4653                 }
4654         }
4655
4656         if (rxr->htag != NULL) {
4657                 bus_dma_tag_destroy(rxr->htag);
4658                 rxr->htag = NULL;
4659         }
4660         if (rxr->ptag != NULL) {
4661                 bus_dma_tag_destroy(rxr->ptag);
4662                 rxr->ptag = NULL;
4663         }
4664 }
4665
4666 static __inline void
4667 igb_rx_discard(struct rx_ring *rxr, int i)
4668 {
4669         struct igb_rx_buf       *rbuf;
4670
4671         rbuf = &rxr->rx_buffers[i];
4672
4673         /* Partially received? Free the chain */
4674         if (rxr->fmp != NULL) {
4675                 rxr->fmp->m_flags |= M_PKTHDR;
4676                 m_freem(rxr->fmp);
4677                 rxr->fmp = NULL;
4678                 rxr->lmp = NULL;
4679         }
4680
4681         /*
4682         ** With advanced descriptors the writeback
4683         ** clobbers the buffer addrs, so its easier
4684         ** to just free the existing mbufs and take
4685         ** the normal refresh path to get new buffers
4686         ** and mapping.
4687         */
4688         if (rbuf->m_head) {
4689                 m_free(rbuf->m_head);
4690                 rbuf->m_head = NULL;
4691         }
4692
4693         if (rbuf->m_pack) {
4694                 m_free(rbuf->m_pack);
4695                 rbuf->m_pack = NULL;
4696         }
4697
4698         return;
4699 }
4700
4701 static __inline void
4702 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4703 {
4704
4705         /*
4706          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4707          * should be computed by hardware. Also it should not have VLAN tag in
4708          * ethernet header.
4709          */
4710         if (rxr->lro_enabled &&
4711             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4712             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4713             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4714             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4715             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4716             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4717                 /*
4718                  * Send to the stack if:
4719                  **  - LRO not enabled, or
4720                  **  - no LRO resources, or
4721                  **  - lro enqueue fails
4722                  */
4723                 if (rxr->lro.lro_cnt != 0)
4724                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4725                                 return;
4726         }
4727         IGB_RX_UNLOCK(rxr);
4728         (*ifp->if_input)(ifp, m);
4729         IGB_RX_LOCK(rxr);
4730 }
4731
4732 /*********************************************************************
4733  *
4734  *  This routine executes in interrupt context. It replenishes
4735  *  the mbufs in the descriptor and sends data which has been
4736  *  dma'ed into host memory to upper layer.
4737  *
4738  *  We loop at most count times if count is > 0, or until done if
4739  *  count < 0.
4740  *
4741  *  Return TRUE if more to clean, FALSE otherwise
4742  *********************************************************************/
4743 static bool
4744 igb_rxeof(struct igb_queue *que, int count, int *done)
4745 {
4746         struct adapter          *adapter = que->adapter;
4747         struct rx_ring          *rxr = que->rxr;
4748         struct ifnet            *ifp = adapter->ifp;
4749         struct lro_ctrl         *lro = &rxr->lro;
4750         struct lro_entry        *queued;
4751         int                     i, processed = 0, rxdone = 0;
4752         u32                     ptype, staterr = 0;
4753         union e1000_adv_rx_desc *cur;
4754
4755         IGB_RX_LOCK(rxr);
4756         /* Sync the ring. */
4757         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4758             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4759
4760 #ifdef DEV_NETMAP
4761         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4762                 return (FALSE);
4763 #endif /* DEV_NETMAP */
4764
4765         /* Main clean loop */
4766         for (i = rxr->next_to_check; count != 0;) {
4767                 struct mbuf             *sendmp, *mh, *mp;
4768                 struct igb_rx_buf       *rxbuf;
4769                 u16                     hlen, plen, hdr, vtag;
4770                 bool                    eop = FALSE;
4771  
4772                 cur = &rxr->rx_base[i];
4773                 staterr = le32toh(cur->wb.upper.status_error);
4774                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4775                         break;
4776                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4777                         break;
4778                 count--;
4779                 sendmp = mh = mp = NULL;
4780                 cur->wb.upper.status_error = 0;
4781                 rxbuf = &rxr->rx_buffers[i];
4782                 plen = le16toh(cur->wb.upper.length);
4783                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4784                 if ((adapter->hw.mac.type == e1000_i350) &&
4785                     (staterr & E1000_RXDEXT_STATERR_LB))
4786                         vtag = be16toh(cur->wb.upper.vlan);
4787                 else
4788                         vtag = le16toh(cur->wb.upper.vlan);
4789                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4790                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4791
4792                 /* Make sure all segments of a bad packet are discarded */
4793                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4794                     (rxr->discard)) {
4795                         adapter->dropped_pkts++;
4796                         ++rxr->rx_discarded;
4797                         if (!eop) /* Catch subsequent segs */
4798                                 rxr->discard = TRUE;
4799                         else
4800                                 rxr->discard = FALSE;
4801                         igb_rx_discard(rxr, i);
4802                         goto next_desc;
4803                 }
4804
4805                 /*
4806                 ** The way the hardware is configured to
4807                 ** split, it will ONLY use the header buffer
4808                 ** when header split is enabled, otherwise we
4809                 ** get normal behavior, ie, both header and
4810                 ** payload are DMA'd into the payload buffer.
4811                 **
4812                 ** The fmp test is to catch the case where a
4813                 ** packet spans multiple descriptors, in that
4814                 ** case only the first header is valid.
4815                 */
4816                 if (rxr->hdr_split && rxr->fmp == NULL) {
4817                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4818                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4819                         if (hlen > IGB_HDR_BUF)
4820                                 hlen = IGB_HDR_BUF;
4821                         mh = rxr->rx_buffers[i].m_head;
4822                         mh->m_len = hlen;
4823                         /* clear buf pointer for refresh */
4824                         rxbuf->m_head = NULL;
4825                         /*
4826                         ** Get the payload length, this
4827                         ** could be zero if its a small
4828                         ** packet.
4829                         */
4830                         if (plen > 0) {
4831                                 mp = rxr->rx_buffers[i].m_pack;
4832                                 mp->m_len = plen;
4833                                 mh->m_next = mp;
4834                                 /* clear buf pointer */
4835                                 rxbuf->m_pack = NULL;
4836                                 rxr->rx_split_packets++;
4837                         }
4838                 } else {
4839                         /*
4840                         ** Either no header split, or a
4841                         ** secondary piece of a fragmented
4842                         ** split packet.
4843                         */
4844                         mh = rxr->rx_buffers[i].m_pack;
4845                         mh->m_len = plen;
4846                         /* clear buf info for refresh */
4847                         rxbuf->m_pack = NULL;
4848                 }
4849
4850                 ++processed; /* So we know when to refresh */
4851
4852                 /* Initial frame - setup */
4853                 if (rxr->fmp == NULL) {
4854                         mh->m_pkthdr.len = mh->m_len;
4855                         /* Save the head of the chain */
4856                         rxr->fmp = mh;
4857                         rxr->lmp = mh;
4858                         if (mp != NULL) {
4859                                 /* Add payload if split */
4860                                 mh->m_pkthdr.len += mp->m_len;
4861                                 rxr->lmp = mh->m_next;
4862                         }
4863                 } else {
4864                         /* Chain mbuf's together */
4865                         rxr->lmp->m_next = mh;
4866                         rxr->lmp = rxr->lmp->m_next;
4867                         rxr->fmp->m_pkthdr.len += mh->m_len;
4868                 }
4869
4870                 if (eop) {
4871                         rxr->fmp->m_pkthdr.rcvif = ifp;
4872                         ifp->if_ipackets++;
4873                         rxr->rx_packets++;
4874                         /* capture data for AIM */
4875                         rxr->packets++;
4876                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4877                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4878
4879                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4880                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4881
4882                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4883                             (staterr & E1000_RXD_STAT_VP) != 0) {
4884                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4885                                 rxr->fmp->m_flags |= M_VLANTAG;
4886                         }
4887 #ifndef IGB_LEGACY_TX
4888                         rxr->fmp->m_pkthdr.flowid = que->msix;
4889                         rxr->fmp->m_flags |= M_FLOWID;
4890 #endif
4891                         sendmp = rxr->fmp;
4892                         /* Make sure to set M_PKTHDR. */
4893                         sendmp->m_flags |= M_PKTHDR;
4894                         rxr->fmp = NULL;
4895                         rxr->lmp = NULL;
4896                 }
4897
4898 next_desc:
4899                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4900                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4901
4902                 /* Advance our pointers to the next descriptor. */
4903                 if (++i == adapter->num_rx_desc)
4904                         i = 0;
4905                 /*
4906                 ** Send to the stack or LRO
4907                 */
4908                 if (sendmp != NULL) {
4909                         rxr->next_to_check = i;
4910                         igb_rx_input(rxr, ifp, sendmp, ptype);
4911                         i = rxr->next_to_check;
4912                         rxdone++;
4913                 }
4914
4915                 /* Every 8 descriptors we go to refresh mbufs */
4916                 if (processed == 8) {
4917                         igb_refresh_mbufs(rxr, i);
4918                         processed = 0;
4919                 }
4920         }
4921
4922         /* Catch any remainders */
4923         if (igb_rx_unrefreshed(rxr))
4924                 igb_refresh_mbufs(rxr, i);
4925
4926         rxr->next_to_check = i;
4927
4928         /*
4929          * Flush any outstanding LRO work
4930          */
4931         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4932                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4933                 tcp_lro_flush(lro, queued);
4934         }
4935
4936         if (done != NULL)
4937                 *done += rxdone;
4938
4939         IGB_RX_UNLOCK(rxr);
4940         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4941 }
4942
4943 /*********************************************************************
4944  *
4945  *  Verify that the hardware indicated that the checksum is valid.
4946  *  Inform the stack about the status of checksum so that stack
4947  *  doesn't spend time verifying the checksum.
4948  *
4949  *********************************************************************/
4950 static void
4951 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4952 {
4953         u16 status = (u16)staterr;
4954         u8  errors = (u8) (staterr >> 24);
4955         int sctp;
4956
4957         /* Ignore Checksum bit is set */
4958         if (status & E1000_RXD_STAT_IXSM) {
4959                 mp->m_pkthdr.csum_flags = 0;
4960                 return;
4961         }
4962
4963         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4964             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4965                 sctp = 1;
4966         else
4967                 sctp = 0;
4968         if (status & E1000_RXD_STAT_IPCS) {
4969                 /* Did it pass? */
4970                 if (!(errors & E1000_RXD_ERR_IPE)) {
4971                         /* IP Checksum Good */
4972                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4973                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4974                 } else
4975                         mp->m_pkthdr.csum_flags = 0;
4976         }
4977
4978         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4979                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4980 #if __FreeBSD_version >= 800000
4981                 if (sctp) /* reassign */
4982                         type = CSUM_SCTP_VALID;
4983 #endif
4984                 /* Did it pass? */
4985                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4986                         mp->m_pkthdr.csum_flags |= type;
4987                         if (sctp == 0)
4988                                 mp->m_pkthdr.csum_data = htons(0xffff);
4989                 }
4990         }
4991         return;
4992 }
4993
4994 /*
4995  * This routine is run via an vlan
4996  * config EVENT
4997  */
4998 static void
4999 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5000 {
5001         struct adapter  *adapter = ifp->if_softc;
5002         u32             index, bit;
5003
5004         if (ifp->if_softc !=  arg)   /* Not our event */
5005                 return;
5006
5007         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5008                 return;
5009
5010         IGB_CORE_LOCK(adapter);
5011         index = (vtag >> 5) & 0x7F;
5012         bit = vtag & 0x1F;
5013         adapter->shadow_vfta[index] |= (1 << bit);
5014         ++adapter->num_vlans;
5015         /* Change hw filter setting */
5016         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5017                 igb_setup_vlan_hw_support(adapter);
5018         IGB_CORE_UNLOCK(adapter);
5019 }
5020
5021 /*
5022  * This routine is run via an vlan
5023  * unconfig EVENT
5024  */
5025 static void
5026 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5027 {
5028         struct adapter  *adapter = ifp->if_softc;
5029         u32             index, bit;
5030
5031         if (ifp->if_softc !=  arg)
5032                 return;
5033
5034         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5035                 return;
5036
5037         IGB_CORE_LOCK(adapter);
5038         index = (vtag >> 5) & 0x7F;
5039         bit = vtag & 0x1F;
5040         adapter->shadow_vfta[index] &= ~(1 << bit);
5041         --adapter->num_vlans;
5042         /* Change hw filter setting */
5043         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5044                 igb_setup_vlan_hw_support(adapter);
5045         IGB_CORE_UNLOCK(adapter);
5046 }
5047
5048 static void
5049 igb_setup_vlan_hw_support(struct adapter *adapter)
5050 {
5051         struct e1000_hw *hw = &adapter->hw;
5052         struct ifnet    *ifp = adapter->ifp;
5053         u32             reg;
5054
5055         if (adapter->vf_ifp) {
5056                 e1000_rlpml_set_vf(hw,
5057                     adapter->max_frame_size + VLAN_TAG_SIZE);
5058                 return;
5059         }
5060
5061         reg = E1000_READ_REG(hw, E1000_CTRL);
5062         reg |= E1000_CTRL_VME;
5063         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5064
5065         /* Enable the Filter Table */
5066         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5067                 reg = E1000_READ_REG(hw, E1000_RCTL);
5068                 reg &= ~E1000_RCTL_CFIEN;
5069                 reg |= E1000_RCTL_VFE;
5070                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5071         }
5072
5073         /* Update the frame size */
5074         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5075             adapter->max_frame_size + VLAN_TAG_SIZE);
5076
5077         /* Don't bother with table if no vlans */
5078         if ((adapter->num_vlans == 0) ||
5079             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5080                 return;
5081         /*
5082         ** A soft reset zero's out the VFTA, so
5083         ** we need to repopulate it now.
5084         */
5085         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5086                 if (adapter->shadow_vfta[i] != 0) {
5087                         if (adapter->vf_ifp)
5088                                 e1000_vfta_set_vf(hw,
5089                                     adapter->shadow_vfta[i], TRUE);
5090                         else
5091                                 e1000_write_vfta(hw,
5092                                     i, adapter->shadow_vfta[i]);
5093                 }
5094 }
5095
5096 static void
5097 igb_enable_intr(struct adapter *adapter)
5098 {
5099         /* With RSS set up what to auto clear */
5100         if (adapter->msix_mem) {
5101                 u32 mask = (adapter->que_mask | adapter->link_mask);
5102                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5103                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5104                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5105                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5106                     E1000_IMS_LSC);
5107         } else {
5108                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5109                     IMS_ENABLE_MASK);
5110         }
5111         E1000_WRITE_FLUSH(&adapter->hw);
5112
5113         return;
5114 }
5115
5116 static void
5117 igb_disable_intr(struct adapter *adapter)
5118 {
5119         if (adapter->msix_mem) {
5120                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5121                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5122         } 
5123         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5124         E1000_WRITE_FLUSH(&adapter->hw);
5125         return;
5126 }
5127
5128 /*
5129  * Bit of a misnomer, what this really means is
5130  * to enable OS management of the system... aka
5131  * to disable special hardware management features 
5132  */
5133 static void
5134 igb_init_manageability(struct adapter *adapter)
5135 {
5136         if (adapter->has_manage) {
5137                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5138                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5139
5140                 /* disable hardware interception of ARP */
5141                 manc &= ~(E1000_MANC_ARP_EN);
5142
5143                 /* enable receiving management packets to the host */
5144                 manc |= E1000_MANC_EN_MNG2HOST;
5145                 manc2h |= 1 << 5;  /* Mng Port 623 */
5146                 manc2h |= 1 << 6;  /* Mng Port 664 */
5147                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5148                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5149         }
5150 }
5151
5152 /*
5153  * Give control back to hardware management
5154  * controller if there is one.
5155  */
5156 static void
5157 igb_release_manageability(struct adapter *adapter)
5158 {
5159         if (adapter->has_manage) {
5160                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5161
5162                 /* re-enable hardware interception of ARP */
5163                 manc |= E1000_MANC_ARP_EN;
5164                 manc &= ~E1000_MANC_EN_MNG2HOST;
5165
5166                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5167         }
5168 }
5169
5170 /*
5171  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5172  * For ASF and Pass Through versions of f/w this means that
5173  * the driver is loaded. 
5174  *
5175  */
5176 static void
5177 igb_get_hw_control(struct adapter *adapter)
5178 {
5179         u32 ctrl_ext;
5180
5181         if (adapter->vf_ifp)
5182                 return;
5183
5184         /* Let firmware know the driver has taken over */
5185         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5186         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5187             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5188 }
5189
5190 /*
5191  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5192  * For ASF and Pass Through versions of f/w this means that the
5193  * driver is no longer loaded.
5194  *
5195  */
5196 static void
5197 igb_release_hw_control(struct adapter *adapter)
5198 {
5199         u32 ctrl_ext;
5200
5201         if (adapter->vf_ifp)
5202                 return;
5203
5204         /* Let firmware taken over control of h/w */
5205         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5206         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5207             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5208 }
5209
5210 static int
5211 igb_is_valid_ether_addr(uint8_t *addr)
5212 {
5213         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5214
5215         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5216                 return (FALSE);
5217         }
5218
5219         return (TRUE);
5220 }
5221
5222
5223 /*
5224  * Enable PCI Wake On Lan capability
5225  */
5226 static void
5227 igb_enable_wakeup(device_t dev)
5228 {
5229         u16     cap, status;
5230         u8      id;
5231
5232         /* First find the capabilities pointer*/
5233         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5234         /* Read the PM Capabilities */
5235         id = pci_read_config(dev, cap, 1);
5236         if (id != PCIY_PMG)     /* Something wrong */
5237                 return;
5238         /* OK, we have the power capabilities, so
5239            now get the status register */
5240         cap += PCIR_POWER_STATUS;
5241         status = pci_read_config(dev, cap, 2);
5242         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5243         pci_write_config(dev, cap, status, 2);
5244         return;
5245 }
5246
5247 static void
5248 igb_led_func(void *arg, int onoff)
5249 {
5250         struct adapter  *adapter = arg;
5251
5252         IGB_CORE_LOCK(adapter);
5253         if (onoff) {
5254                 e1000_setup_led(&adapter->hw);
5255                 e1000_led_on(&adapter->hw);
5256         } else {
5257                 e1000_led_off(&adapter->hw);
5258                 e1000_cleanup_led(&adapter->hw);
5259         }
5260         IGB_CORE_UNLOCK(adapter);
5261 }
5262
5263 /**********************************************************************
5264  *
5265  *  Update the board statistics counters.
5266  *
5267  **********************************************************************/
5268 static void
5269 igb_update_stats_counters(struct adapter *adapter)
5270 {
5271         struct ifnet            *ifp;
5272         struct e1000_hw         *hw = &adapter->hw;
5273         struct e1000_hw_stats   *stats;
5274
5275         /* 
5276         ** The virtual function adapter has only a
5277         ** small controlled set of stats, do only 
5278         ** those and return.
5279         */
5280         if (adapter->vf_ifp) {
5281                 igb_update_vf_stats_counters(adapter);
5282                 return;
5283         }
5284
5285         stats = (struct e1000_hw_stats  *)adapter->stats;
5286
5287         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5288            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5289                 stats->symerrs +=
5290                     E1000_READ_REG(hw,E1000_SYMERRS);
5291                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5292         }
5293
5294         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5295         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5296         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5297         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5298
5299         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5300         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5301         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5302         stats->dc += E1000_READ_REG(hw, E1000_DC);
5303         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5304         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5305         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5306         /*
5307         ** For watchdog management we need to know if we have been
5308         ** paused during the last interval, so capture that here.
5309         */ 
5310         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5311         stats->xoffrxc += adapter->pause_frames;
5312         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5313         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5314         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5315         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5316         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5317         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5318         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5319         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5320         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5321         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5322         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5323         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5324
5325         /* For the 64-bit byte counters the low dword must be read first. */
5326         /* Both registers clear on the read of the high dword */
5327
5328         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5329             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5330         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5331             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5332
5333         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5334         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5335         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5336         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5337         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5338
5339         stats->tor += E1000_READ_REG(hw, E1000_TORH);
5340         stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5341
5342         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5343         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5344         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5345         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5346         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5347         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5348         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5349         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5350         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5351         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5352
5353         /* Interrupt Counts */
5354
5355         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5356         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5357         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5358         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5359         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5360         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5361         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5362         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5363         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5364
5365         /* Host to Card Statistics */
5366
5367         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5368         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5369         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5370         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5371         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5372         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5373         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5374         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5375             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5376         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5377             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5378         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5379         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5380         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5381
5382         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5383         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5384         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5385         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5386         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5387         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5388
5389         ifp = adapter->ifp;
5390         ifp->if_collisions = stats->colc;
5391
5392         /* Rx Errors */
5393         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5394             stats->crcerrs + stats->algnerrc +
5395             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5396
5397         /* Tx Errors */
5398         ifp->if_oerrors = stats->ecol +
5399             stats->latecol + adapter->watchdog_events;
5400
5401         /* Driver specific counters */
5402         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5403         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5404         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5405         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5406         adapter->packet_buf_alloc_tx =
5407             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5408         adapter->packet_buf_alloc_rx =
5409             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5410 }
5411
5412
5413 /**********************************************************************
5414  *
5415  *  Initialize the VF board statistics counters.
5416  *
5417  **********************************************************************/
5418 static void
5419 igb_vf_init_stats(struct adapter *adapter)
5420 {
5421         struct e1000_hw *hw = &adapter->hw;
5422         struct e1000_vf_stats   *stats;
5423
5424         stats = (struct e1000_vf_stats  *)adapter->stats;
5425         if (stats == NULL)
5426                 return;
5427         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5428         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5429         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5430         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5431         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5432 }
5433  
5434 /**********************************************************************
5435  *
5436  *  Update the VF board statistics counters.
5437  *
5438  **********************************************************************/
5439 static void
5440 igb_update_vf_stats_counters(struct adapter *adapter)
5441 {
5442         struct e1000_hw *hw = &adapter->hw;
5443         struct e1000_vf_stats   *stats;
5444
5445         if (adapter->link_speed == 0)
5446                 return;
5447
5448         stats = (struct e1000_vf_stats  *)adapter->stats;
5449
5450         UPDATE_VF_REG(E1000_VFGPRC,
5451             stats->last_gprc, stats->gprc);
5452         UPDATE_VF_REG(E1000_VFGORC,
5453             stats->last_gorc, stats->gorc);
5454         UPDATE_VF_REG(E1000_VFGPTC,
5455             stats->last_gptc, stats->gptc);
5456         UPDATE_VF_REG(E1000_VFGOTC,
5457             stats->last_gotc, stats->gotc);
5458         UPDATE_VF_REG(E1000_VFMPRC,
5459             stats->last_mprc, stats->mprc);
5460 }
5461
5462 /* Export a single 32-bit register via a read-only sysctl. */
5463 static int
5464 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5465 {
5466         struct adapter *adapter;
5467         u_int val;
5468
5469         adapter = oidp->oid_arg1;
5470         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5471         return (sysctl_handle_int(oidp, &val, 0, req));
5472 }
5473
5474 /*
5475 **  Tuneable interrupt rate handler
5476 */
5477 static int
5478 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5479 {
5480         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5481         int                     error;
5482         u32                     reg, usec, rate;
5483                         
5484         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5485         usec = ((reg & 0x7FFC) >> 2);
5486         if (usec > 0)
5487                 rate = 1000000 / usec;
5488         else
5489                 rate = 0;
5490         error = sysctl_handle_int(oidp, &rate, 0, req);
5491         if (error || !req->newptr)
5492                 return error;
5493         return 0;
5494 }
5495
5496 /*
5497  * Add sysctl variables, one per statistic, to the system.
5498  */
5499 static void
5500 igb_add_hw_stats(struct adapter *adapter)
5501 {
5502         device_t dev = adapter->dev;
5503
5504         struct tx_ring *txr = adapter->tx_rings;
5505         struct rx_ring *rxr = adapter->rx_rings;
5506
5507         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5508         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5509         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5510         struct e1000_hw_stats *stats = adapter->stats;
5511
5512         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5513         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5514
5515 #define QUEUE_NAME_LEN 32
5516         char namebuf[QUEUE_NAME_LEN];
5517
5518         /* Driver Statistics */
5519         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5520                         CTLFLAG_RD, &adapter->link_irq, 0,
5521                         "Link MSIX IRQ Handled");
5522         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5523                         CTLFLAG_RD, &adapter->dropped_pkts,
5524                         "Driver dropped packets");
5525         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5526                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5527                         "Driver tx dma failure in xmit");
5528         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5529                         CTLFLAG_RD, &adapter->rx_overruns,
5530                         "RX overruns");
5531         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5532                         CTLFLAG_RD, &adapter->watchdog_events,
5533                         "Watchdog timeouts");
5534
5535         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5536                         CTLFLAG_RD, &adapter->device_control,
5537                         "Device Control Register");
5538         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5539                         CTLFLAG_RD, &adapter->rx_control,
5540                         "Receiver Control Register");
5541         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5542                         CTLFLAG_RD, &adapter->int_mask,
5543                         "Interrupt Mask");
5544         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5545                         CTLFLAG_RD, &adapter->eint_mask,
5546                         "Extended Interrupt Mask");
5547         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5548                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5549                         "Transmit Buffer Packet Allocation");
5550         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5551                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5552                         "Receive Buffer Packet Allocation");
5553         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5554                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5555                         "Flow Control High Watermark");
5556         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5557                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5558                         "Flow Control Low Watermark");
5559
5560         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5561                 struct lro_ctrl *lro = &rxr->lro;
5562
5563                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5564                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5565                                             CTLFLAG_RD, NULL, "Queue Name");
5566                 queue_list = SYSCTL_CHILDREN(queue_node);
5567
5568                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5569                                 CTLFLAG_RD, &adapter->queues[i],
5570                                 sizeof(&adapter->queues[i]),
5571                                 igb_sysctl_interrupt_rate_handler,
5572                                 "IU", "Interrupt Rate");
5573
5574                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5575                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5576                                 igb_sysctl_reg_handler, "IU",
5577                                 "Transmit Descriptor Head");
5578                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5579                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5580                                 igb_sysctl_reg_handler, "IU",
5581                                 "Transmit Descriptor Tail");
5582                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5583                                 CTLFLAG_RD, &txr->no_desc_avail,
5584                                 "Queue No Descriptor Available");
5585                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5586                                 CTLFLAG_RD, &txr->tx_packets,
5587                                 "Queue Packets Transmitted");
5588
5589                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5590                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5591                                 igb_sysctl_reg_handler, "IU",
5592                                 "Receive Descriptor Head");
5593                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5594                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5595                                 igb_sysctl_reg_handler, "IU",
5596                                 "Receive Descriptor Tail");
5597                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5598                                 CTLFLAG_RD, &rxr->rx_packets,
5599                                 "Queue Packets Received");
5600                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5601                                 CTLFLAG_RD, &rxr->rx_bytes,
5602                                 "Queue Bytes Received");
5603                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5604                                 CTLFLAG_RD, &lro->lro_queued, 0,
5605                                 "LRO Queued");
5606                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5607                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5608                                 "LRO Flushed");
5609         }
5610
5611         /* MAC stats get their own sub node */
5612
5613         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5614                                     CTLFLAG_RD, NULL, "MAC Statistics");
5615         stat_list = SYSCTL_CHILDREN(stat_node);
5616
5617         /*
5618         ** VF adapter has a very limited set of stats
5619         ** since its not managing the metal, so to speak.
5620         */
5621         if (adapter->vf_ifp) {
5622         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5623                         CTLFLAG_RD, &stats->gprc,
5624                         "Good Packets Received");
5625         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5626                         CTLFLAG_RD, &stats->gptc,
5627                         "Good Packets Transmitted");
5628         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5629                         CTLFLAG_RD, &stats->gorc, 
5630                         "Good Octets Received"); 
5631         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5632                         CTLFLAG_RD, &stats->gotc, 
5633                         "Good Octets Transmitted"); 
5634         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5635                         CTLFLAG_RD, &stats->mprc,
5636                         "Multicast Packets Received");
5637                 return;
5638         }
5639
5640         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5641                         CTLFLAG_RD, &stats->ecol,
5642                         "Excessive collisions");
5643         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5644                         CTLFLAG_RD, &stats->scc,
5645                         "Single collisions");
5646         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5647                         CTLFLAG_RD, &stats->mcc,
5648                         "Multiple collisions");
5649         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5650                         CTLFLAG_RD, &stats->latecol,
5651                         "Late collisions");
5652         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5653                         CTLFLAG_RD, &stats->colc,
5654                         "Collision Count");
5655         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5656                         CTLFLAG_RD, &stats->symerrs,
5657                         "Symbol Errors");
5658         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5659                         CTLFLAG_RD, &stats->sec,
5660                         "Sequence Errors");
5661         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5662                         CTLFLAG_RD, &stats->dc,
5663                         "Defer Count");
5664         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5665                         CTLFLAG_RD, &stats->mpc,
5666                         "Missed Packets");
5667         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5668                         CTLFLAG_RD, &stats->rnbc,
5669                         "Receive No Buffers");
5670         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5671                         CTLFLAG_RD, &stats->ruc,
5672                         "Receive Undersize");
5673         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5674                         CTLFLAG_RD, &stats->rfc,
5675                         "Fragmented Packets Received ");
5676         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5677                         CTLFLAG_RD, &stats->roc,
5678                         "Oversized Packets Received");
5679         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5680                         CTLFLAG_RD, &stats->rjc,
5681                         "Recevied Jabber");
5682         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5683                         CTLFLAG_RD, &stats->rxerrc,
5684                         "Receive Errors");
5685         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5686                         CTLFLAG_RD, &stats->crcerrs,
5687                         "CRC errors");
5688         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5689                         CTLFLAG_RD, &stats->algnerrc,
5690                         "Alignment Errors");
5691         /* On 82575 these are collision counts */
5692         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5693                         CTLFLAG_RD, &stats->cexterr,
5694                         "Collision/Carrier extension errors");
5695         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5696                         CTLFLAG_RD, &stats->xonrxc,
5697                         "XON Received");
5698         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5699                         CTLFLAG_RD, &stats->xontxc,
5700                         "XON Transmitted");
5701         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5702                         CTLFLAG_RD, &stats->xoffrxc,
5703                         "XOFF Received");
5704         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5705                         CTLFLAG_RD, &stats->xofftxc,
5706                         "XOFF Transmitted");
5707         /* Packet Reception Stats */
5708         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5709                         CTLFLAG_RD, &stats->tpr,
5710                         "Total Packets Received ");
5711         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5712                         CTLFLAG_RD, &stats->gprc,
5713                         "Good Packets Received");
5714         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5715                         CTLFLAG_RD, &stats->bprc,
5716                         "Broadcast Packets Received");
5717         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5718                         CTLFLAG_RD, &stats->mprc,
5719                         "Multicast Packets Received");
5720         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5721                         CTLFLAG_RD, &stats->prc64,
5722                         "64 byte frames received ");
5723         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5724                         CTLFLAG_RD, &stats->prc127,
5725                         "65-127 byte frames received");
5726         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5727                         CTLFLAG_RD, &stats->prc255,
5728                         "128-255 byte frames received");
5729         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5730                         CTLFLAG_RD, &stats->prc511,
5731                         "256-511 byte frames received");
5732         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5733                         CTLFLAG_RD, &stats->prc1023,
5734                         "512-1023 byte frames received");
5735         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5736                         CTLFLAG_RD, &stats->prc1522,
5737                         "1023-1522 byte frames received");
5738         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5739                         CTLFLAG_RD, &stats->gorc, 
5740                         "Good Octets Received"); 
5741
5742         /* Packet Transmission Stats */
5743         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5744                         CTLFLAG_RD, &stats->gotc, 
5745                         "Good Octets Transmitted"); 
5746         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5747                         CTLFLAG_RD, &stats->tpt,
5748                         "Total Packets Transmitted");
5749         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5750                         CTLFLAG_RD, &stats->gptc,
5751                         "Good Packets Transmitted");
5752         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5753                         CTLFLAG_RD, &stats->bptc,
5754                         "Broadcast Packets Transmitted");
5755         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5756                         CTLFLAG_RD, &stats->mptc,
5757                         "Multicast Packets Transmitted");
5758         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5759                         CTLFLAG_RD, &stats->ptc64,
5760                         "64 byte frames transmitted ");
5761         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5762                         CTLFLAG_RD, &stats->ptc127,
5763                         "65-127 byte frames transmitted");
5764         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5765                         CTLFLAG_RD, &stats->ptc255,
5766                         "128-255 byte frames transmitted");
5767         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5768                         CTLFLAG_RD, &stats->ptc511,
5769                         "256-511 byte frames transmitted");
5770         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5771                         CTLFLAG_RD, &stats->ptc1023,
5772                         "512-1023 byte frames transmitted");
5773         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5774                         CTLFLAG_RD, &stats->ptc1522,
5775                         "1024-1522 byte frames transmitted");
5776         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5777                         CTLFLAG_RD, &stats->tsctc,
5778                         "TSO Contexts Transmitted");
5779         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5780                         CTLFLAG_RD, &stats->tsctfc,
5781                         "TSO Contexts Failed");
5782
5783
5784         /* Interrupt Stats */
5785
5786         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5787                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5788         int_list = SYSCTL_CHILDREN(int_node);
5789
5790         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5791                         CTLFLAG_RD, &stats->iac,
5792                         "Interrupt Assertion Count");
5793
5794         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5795                         CTLFLAG_RD, &stats->icrxptc,
5796                         "Interrupt Cause Rx Pkt Timer Expire Count");
5797
5798         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5799                         CTLFLAG_RD, &stats->icrxatc,
5800                         "Interrupt Cause Rx Abs Timer Expire Count");
5801
5802         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5803                         CTLFLAG_RD, &stats->ictxptc,
5804                         "Interrupt Cause Tx Pkt Timer Expire Count");
5805
5806         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5807                         CTLFLAG_RD, &stats->ictxatc,
5808                         "Interrupt Cause Tx Abs Timer Expire Count");
5809
5810         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5811                         CTLFLAG_RD, &stats->ictxqec,
5812                         "Interrupt Cause Tx Queue Empty Count");
5813
5814         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5815                         CTLFLAG_RD, &stats->ictxqmtc,
5816                         "Interrupt Cause Tx Queue Min Thresh Count");
5817
5818         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5819                         CTLFLAG_RD, &stats->icrxdmtc,
5820                         "Interrupt Cause Rx Desc Min Thresh Count");
5821
5822         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5823                         CTLFLAG_RD, &stats->icrxoc,
5824                         "Interrupt Cause Receiver Overrun Count");
5825
5826         /* Host to Card Stats */
5827
5828         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5829                                     CTLFLAG_RD, NULL, 
5830                                     "Host to Card Statistics");
5831
5832         host_list = SYSCTL_CHILDREN(host_node);
5833
5834         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5835                         CTLFLAG_RD, &stats->cbtmpc,
5836                         "Circuit Breaker Tx Packet Count");
5837
5838         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5839                         CTLFLAG_RD, &stats->htdpmc,
5840                         "Host Transmit Discarded Packets");
5841
5842         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5843                         CTLFLAG_RD, &stats->rpthc,
5844                         "Rx Packets To Host");
5845
5846         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5847                         CTLFLAG_RD, &stats->cbrmpc,
5848                         "Circuit Breaker Rx Packet Count");
5849
5850         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5851                         CTLFLAG_RD, &stats->cbrdpc,
5852                         "Circuit Breaker Rx Dropped Count");
5853
5854         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5855                         CTLFLAG_RD, &stats->hgptc,
5856                         "Host Good Packets Tx Count");
5857
5858         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5859                         CTLFLAG_RD, &stats->htcbdpc,
5860                         "Host Tx Circuit Breaker Dropped Count");
5861
5862         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5863                         CTLFLAG_RD, &stats->hgorc,
5864                         "Host Good Octets Received Count");
5865
5866         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5867                         CTLFLAG_RD, &stats->hgotc,
5868                         "Host Good Octets Transmit Count");
5869
5870         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5871                         CTLFLAG_RD, &stats->lenerrs,
5872                         "Length Errors");
5873
5874         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5875                         CTLFLAG_RD, &stats->scvpc,
5876                         "SerDes/SGMII Code Violation Pkt Count");
5877
5878         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5879                         CTLFLAG_RD, &stats->hrmpc,
5880                         "Header Redirection Missed Packet Count");
5881 }
5882
5883
5884 /**********************************************************************
5885  *
5886  *  This routine provides a way to dump out the adapter eeprom,
5887  *  often a useful debug/service tool. This only dumps the first
5888  *  32 words, stuff that matters is in that extent.
5889  *
5890  **********************************************************************/
5891 static int
5892 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5893 {
5894         struct adapter *adapter;
5895         int error;
5896         int result;
5897
5898         result = -1;
5899         error = sysctl_handle_int(oidp, &result, 0, req);
5900
5901         if (error || !req->newptr)
5902                 return (error);
5903
5904         /*
5905          * This value will cause a hex dump of the
5906          * first 32 16-bit words of the EEPROM to
5907          * the screen.
5908          */
5909         if (result == 1) {
5910                 adapter = (struct adapter *)arg1;
5911                 igb_print_nvm_info(adapter);
5912         }
5913
5914         return (error);
5915 }
5916
5917 static void
5918 igb_print_nvm_info(struct adapter *adapter)
5919 {
5920         u16     eeprom_data;
5921         int     i, j, row = 0;
5922
5923         /* Its a bit crude, but it gets the job done */
5924         printf("\nInterface EEPROM Dump:\n");
5925         printf("Offset\n0x0000  ");
5926         for (i = 0, j = 0; i < 32; i++, j++) {
5927                 if (j == 8) { /* Make the offset block */
5928                         j = 0; ++row;
5929                         printf("\n0x00%x0  ",row);
5930                 }
5931                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5932                 printf("%04x ", eeprom_data);
5933         }
5934         printf("\n");
5935 }
5936
5937 static void
5938 igb_set_sysctl_value(struct adapter *adapter, const char *name,
5939         const char *description, int *limit, int value)
5940 {
5941         *limit = value;
5942         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5943             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5944             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5945 }
5946
5947 /*
5948 ** Set flow control using sysctl:
5949 ** Flow control values:
5950 **      0 - off
5951 **      1 - rx pause
5952 **      2 - tx pause
5953 **      3 - full
5954 */
5955 static int
5956 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5957 {
5958         int             error;
5959         static int      input = 3; /* default is full */
5960         struct adapter  *adapter = (struct adapter *) arg1;
5961
5962         error = sysctl_handle_int(oidp, &input, 0, req);
5963
5964         if ((error) || (req->newptr == NULL))
5965                 return (error);
5966
5967         switch (input) {
5968                 case e1000_fc_rx_pause:
5969                 case e1000_fc_tx_pause:
5970                 case e1000_fc_full:
5971                 case e1000_fc_none:
5972                         adapter->hw.fc.requested_mode = input;
5973                         adapter->fc = input;
5974                         break;
5975                 default:
5976                         /* Do nothing */
5977                         return (error);
5978         }
5979
5980         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5981         e1000_force_mac_fc(&adapter->hw);
5982         return (error);
5983 }
5984
5985 /*
5986 ** Manage DMA Coalesce:
5987 ** Control values:
5988 **      0/1 - off/on
5989 **      Legal timer values are:
5990 **      250,500,1000-10000 in thousands
5991 */
5992 static int
5993 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5994 {
5995         struct adapter *adapter = (struct adapter *) arg1;
5996         int             error;
5997
5998         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5999
6000         if ((error) || (req->newptr == NULL))
6001                 return (error);
6002
6003         switch (adapter->dmac) {
6004                 case 0:
6005                         /*Disabling */
6006                         break;
6007                 case 1: /* Just enable and use default */
6008                         adapter->dmac = 1000;
6009                         break;
6010                 case 250:
6011                 case 500:
6012                 case 1000:
6013                 case 2000:
6014                 case 3000:
6015                 case 4000:
6016                 case 5000:
6017                 case 6000:
6018                 case 7000:
6019                 case 8000:
6020                 case 9000:
6021                 case 10000:
6022                         /* Legal values - allow */
6023                         break;
6024                 default:
6025                         /* Do nothing, illegal value */
6026                         adapter->dmac = 0;
6027                         return (error);
6028         }
6029         /* Reinit the interface */
6030         igb_init(adapter);
6031         return (error);
6032 }
6033
6034 /*
6035 ** Manage Energy Efficient Ethernet:
6036 ** Control values:
6037 **     0/1 - enabled/disabled
6038 */
6039 static int
6040 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6041 {
6042         struct adapter  *adapter = (struct adapter *) arg1;
6043         int             error, value;
6044
6045         value = adapter->hw.dev_spec._82575.eee_disable;
6046         error = sysctl_handle_int(oidp, &value, 0, req);
6047         if (error || req->newptr == NULL)
6048                 return (error);
6049         IGB_CORE_LOCK(adapter);
6050         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6051         igb_init_locked(adapter);
6052         IGB_CORE_UNLOCK(adapter);
6053         return (0);
6054 }